From 11c624e488663f4f7554d1f92a072c7caee3908e Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Mon, 21 Jan 2019 16:59:10 +0100
Subject: Refactor stdsimd

This commit:

* renames `coresimd` to `core_arch` and `stdsimd` to `std_detect`

* `std_detect` does no longer depend on `core_arch` - it is a freestanding
  `no_std` library that only depends on `core` - it is renamed to `std_detect`

* moves the top-level coresimd and stdsimd directories into the appropriate
  crates/... directories - this simplifies creating crate.io releases of these crates

* moves the top-level `coresimd` and `stdsimd` sub-directories into their
  corresponding crates in `crates/{core_arch, std_detect}`.
---
 .../crates/std_detect/src/detect/arch/aarch64.rs   | 103 ++++++
 .../crates/std_detect/src/detect/arch/arm.rs       |  36 +++
 .../crates/std_detect/src/detect/arch/mips.rs      |  26 ++
 .../crates/std_detect/src/detect/arch/mips64.rs    |  26 ++
 .../crates/std_detect/src/detect/arch/powerpc.rs   |  39 +++
 .../crates/std_detect/src/detect/arch/powerpc64.rs |  39 +++
 .../crates/std_detect/src/detect/arch/x86.rs       | 331 +++++++++++++++++++
 .../stdarch/crates/std_detect/src/detect/bit.rs    |   9 +
 .../stdarch/crates/std_detect/src/detect/cache.rs  | 162 ++++++++++
 .../crates/std_detect/src/detect/error_macros.rs   | 150 +++++++++
 .../stdarch/crates/std_detect/src/detect/mod.rs    |  85 +++++
 .../crates/std_detect/src/detect/os/aarch64.rs     |  79 +++++
 .../std_detect/src/detect/os/freebsd/aarch64.rs    |  28 ++
 .../crates/std_detect/src/detect/os/freebsd/mod.rs |  14 +
 .../std_detect/src/detect/os/linux/aarch64.rs      | 157 +++++++++
 .../crates/std_detect/src/detect/os/linux/arm.rs   |  49 +++
 .../std_detect/src/detect/os/linux/auxvec.rs       | 270 ++++++++++++++++
 .../std_detect/src/detect/os/linux/cpuinfo.rs      | 301 +++++++++++++++++
 .../crates/std_detect/src/detect/os/linux/mips.rs  |  31 ++
 .../crates/std_detect/src/detect/os/linux/mod.rs   |  26 ++
 .../std_detect/src/detect/os/linux/powerpc.rs      |  41 +++
 .../crates/std_detect/src/detect/os/other.rs       |   9 +
 .../stdarch/crates/std_detect/src/detect/os/x86.rs | 357 +++++++++++++++++++++
 .../src/detect/test_data/linux-rpi3.auxv           | Bin 0 -> 160 bytes
 .../src/detect/test_data/linux-x64-i7-6850k.auxv   | Bin 0 -> 304 bytes
 .../macos-virtualbox-linux-x86-4850HQ.auxv         | Bin 0 -> 160 bytes
 library/stdarch/crates/std_detect/src/lib.rs       |  37 +++
 library/stdarch/crates/std_detect/src/mod.rs       |   5 +
 28 files changed, 2410 insertions(+)
 create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/arm.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/mips.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/x86.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/bit.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/cache.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/error_macros.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/mod.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/other.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/x86.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv
 create mode 100644 library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv
 create mode 100644 library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv
 create mode 100644 library/stdarch/crates/std_detect/src/lib.rs
 create mode 100644 library/stdarch/crates/std_detect/src/mod.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
new file mode 100644
index 00000000000..882c22cc174
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -0,0 +1,103 @@
+//! Aarch64 run-time features.
+
+/// Checks if `aarch64` feature is enabled.
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+#[allow_internal_unstable]
+macro_rules! is_aarch64_feature_detected {
+    ("neon") => {
+        // FIXME: this should be removed once we rename Aarch64 neon to asimd
+        cfg!(target_feature = "neon") ||
+            $crate::detect::check_for($crate::detect::Feature::asimd)
+    };
+    ("asimd") => {
+        cfg!(target_feature = "neon") ||
+            $crate::detect::check_for($crate::detect::Feature::asimd)
+    };
+    ("pmull") => {
+        cfg!(target_feature = "pmull") ||
+            $crate::detect::check_for($crate::detect::Feature::pmull)
+    };
+    ("fp") => {
+        cfg!(target_feature = "fp") ||
+            $crate::detect::check_for($crate::detect::Feature::fp)
+    };
+    ("fp16") => {
+        cfg!(target_feature = "fp16") ||
+            $crate::detect::check_for($crate::detect::Feature::fp16)
+    };
+    ("sve") => {
+        cfg!(target_feature = "sve") ||
+            $crate::detect::check_for($crate::detect::Feature::sve)
+    };
+    ("crc") => {
+        cfg!(target_feature = "crc") ||
+            $crate::detect::check_for($crate::detect::Feature::crc)
+    };
+    ("crypto") => {
+        cfg!(target_feature = "crypto") ||
+            $crate::detect::check_for($crate::detect::Feature::crypto)
+    };
+    ("lse") => {
+        cfg!(target_feature = "lse") ||
+            $crate::detect::check_for($crate::detect::Feature::lse)
+    };
+    ("rdm") => {
+        cfg!(target_feature = "rdm") ||
+            $crate::detect::check_for($crate::detect::Feature::rdm)
+    };
+    ("rcpc") => {
+        cfg!(target_feature = "rcpc") ||
+            $crate::detect::check_for($crate::detect::Feature::rcpc)
+    };
+    ("dotprod") => {
+        cfg!(target_feature = "dotprod") ||
+            $crate::detect::check_for($crate::detect::Feature::dotprod)
+    };
+    ("ras") => {
+        compile_error!("\"ras\" feature cannot be detected at run-time")
+    };
+    ("v8.1a") => {
+        compile_error!("\"v8.1a\" feature cannot be detected at run-time")
+    };
+    ("v8.2a") => {
+        compile_error!("\"v8.2a\" feature cannot be detected at run-time")
+    };
+    ("v8.3a") => {
+        compile_error!("\"v8.3a\" feature cannot be detected at run-time")
+    };
+    ($t:tt) => { compile_error!(concat!("unknown aarch64 target feature: ", $t)) };
+}
+
+/// ARM Aarch64 CPU Feature enum. Each variant denotes a position in a bitset
+/// for a particular feature.
+///
+/// PLEASE: do not use this, it is an implementation detail subject to change.
+#[doc(hidden)]
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+#[unstable(feature = "stdsimd_internal", issue = "0")]
+pub enum Feature {
+    /// ARM Advanced SIMD (ASIMD)
+    asimd,
+    /// Polynomial Multiply
+    pmull,
+    /// Floating point support
+    fp,
+    /// Half-float support.
+    fp16,
+    /// Scalable Vector Extension (SVE)
+    sve,
+    /// CRC32 (Cyclic Redundancy Check)
+    crc,
+    /// Crypto: AES + PMULL + SHA1 + SHA2
+    crypto,
+    /// Atomics (Large System Extension)
+    lse,
+    /// Rounding Double Multiply (ASIMDRDM)
+    rdm,
+    /// Release consistent Processor consistent (RcPc)
+    rcpc,
+    /// Vector Dot-Product (ASIMDDP)
+    dotprod,
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
new file mode 100644
index 00000000000..cb6ac6badcc
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -0,0 +1,36 @@
+//! Run-time feature detection on ARM Aarch32.
+
+/// Checks if `arm` feature is enabled.
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+#[allow_internal_unstable]
+macro_rules! is_arm_feature_detected {
+    ("neon") => {
+        cfg!(target_feature = "neon") ||
+            $crate::detect::check_for($crate::detect::Feature::neon)
+    };
+    ("pmull") => {
+        cfg!(target_feature = "pmull") ||
+            $crate::detect::check_for($crate::detect::Feature::pmull)
+    };
+    ("v7") => { compile_error!("\"v7\" feature cannot be detected at run-time") };
+    ("vfp2") => { compile_error!("\"vfp2\" feature cannot be detected at run-time") };
+    ("vfp3") => { compile_error!("\"vfp3\" feature cannot be detected at run-time") };
+    ("vfp4") => { compile_error!("\"vfp4\" feature cannot be detected at run-time") };
+    ($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
+}
+
+/// ARM CPU Feature enum. Each variant denotes a position in a bitset for a
+/// particular feature.
+///
+/// PLEASE: do not use this, it is an implementation detail subject to change.
+#[doc(hidden)]
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+#[unstable(feature = "stdsimd_internal", issue = "0")]
+pub enum Feature {
+    /// ARM Advanced SIMD (NEON) - Aarch32
+    neon,
+    /// Polynomial Multiply
+    pmull,
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
new file mode 100644
index 00000000000..876f8dde262
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
@@ -0,0 +1,26 @@
+//! Run-time feature detection on MIPS.
+
+/// Checks if `mips` feature is enabled.
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+#[allow_internal_unstable]
+macro_rules! is_mips_feature_detected {
+    ("msa") => {
+        cfg!(target_feature = "msa") ||
+            $crate::detect::check_for($crate::detect::Feature::msa)
+    };
+    ($t:tt) => { compile_error!(concat!("unknown mips target feature: ", $t)) };
+}
+
+/// MIPS CPU Feature enum. Each variant denotes a position in a bitset for a
+/// particular feature.
+///
+/// PLEASE: do not use this, it is an implementation detail subject to change.
+#[doc(hidden)]
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+#[unstable(feature = "stdsimd_internal", issue = "0")]
+pub enum Feature {
+    /// MIPS SIMD Architecture (MSA)
+    msa,
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
new file mode 100644
index 00000000000..ab837b3d5c9
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
@@ -0,0 +1,26 @@
+//! Run-time feature detection on MIPS64.
+
+/// Checks if `mips64` feature is enabled.
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+#[allow_internal_unstable]
+macro_rules! is_mips64_feature_detected {
+    ("msa") => {
+        cfg!(target_feature = "msa") ||
+            $crate::detect::check_for($crate::detect::Feature::msa)
+    };
+    ($t:tt) => { compile_error!(concat!("unknown mips64 target feature: ", $t)) };
+}
+
+/// MIPS64 CPU Feature enum. Each variant denotes a position in a bitset
+/// for a particular feature.
+///
+/// PLEASE: do not use this, it is an implementation detail subject to change.
+#[doc(hidden)]
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+#[unstable(feature = "stdsimd_internal", issue = "0")]
+pub enum Feature {
+    /// MIPS SIMD Architecture (MSA)
+    msa,
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
new file mode 100644
index 00000000000..9c440b1d6b0
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
@@ -0,0 +1,39 @@
+//! Run-time feature detection on PowerPC.
+
+/// Checks if `powerpc` feature is enabled.
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+#[allow_internal_unstable]
+macro_rules! is_powerpc_feature_detected {
+    ("altivec") => {
+        cfg!(target_feature = "altivec") ||
+            $crate::detect::check_for($crate::detect::Feature::altivec)
+    };
+    ("vsx") => {
+        cfg!(target_feature = "vsx") ||
+            $crate::detect::check_for($crate::detect::Feature::vsx)
+    };
+    ("power8") => {
+        cfg!(target_feature = "power8") ||
+            $crate::detect::check_for($crate::detect::Feature::power8)
+    };
+    ($t:tt) => { compile_error!(concat!("unknown powerpc target feature: ", $t)) };
+}
+
+
+/// PowerPC CPU Feature enum. Each variant denotes a position in a bitset
+/// for a particular feature.
+///
+/// PLEASE: do not use this, it is an implementation detail subject to change.
+#[doc(hidden)]
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+#[unstable(feature = "stdsimd_internal", issue = "0")]
+pub enum Feature {
+    /// Altivec
+    altivec,
+    /// VSX
+    vsx,
+    /// Power8
+    power8,
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
new file mode 100644
index 00000000000..910940f0bb9
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
@@ -0,0 +1,39 @@
+//! Run-time feature detection on PowerPC64.
+
+/// Checks if `powerpc64` feature is enabled.
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+#[allow_internal_unstable]
+macro_rules! is_powerpc64_feature_detected {
+    ("altivec") => {
+        cfg!(target_feature = "altivec") ||
+            $crate::detect::check_for($crate::detect::Feature::altivec)
+    };
+    ("vsx") => {
+        cfg!(target_feature = "vsx") ||
+            $crate::detect::check_for($crate::detect::Feature::vsx)
+    };
+    ("power8") => {
+        cfg!(target_feature = "power8") ||
+            $crate::detect::check_for($crate::detect::Feature::power8)
+    };
+    ($t:tt) => { compile_error!(concat!("unknown powerpc64 target feature: ", $t)) };
+}
+
+
+/// PowerPC64 CPU Feature enum. Each variant denotes a position in a bitset
+/// for a particular feature.
+///
+/// PLEASE: do not use this, it is an implementation detail subject to change.
+#[doc(hidden)]
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+#[unstable(feature = "stdsimd_internal", issue = "0")]
+pub enum Feature {
+    /// Altivec
+    altivec,
+    /// VSX
+    vsx,
+    /// Power8
+    power8,
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
new file mode 100644
index 00000000000..3ef8d31d12b
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -0,0 +1,331 @@
+//! This module implements minimal run-time feature detection for x86.
+//!
+//! The features are detected using the `detect_features` function below.
+//! This function uses the CPUID instruction to read the feature flags from the
+//! CPU and encodes them in an `usize` where each bit position represents
+//! whether a feature is available (bit is set) or unavaiable (bit is cleared).
+//!
+//! The enum `Feature` is used to map bit positions to feature names, and the
+//! the `__crate::detect::check_for!` macro is used to map string literals (e.g.
+//! "avx") to these bit positions (e.g. `Feature::avx`).
+//!
+//!
+//! The run-time feature detection is performed by the
+//! `__crate::detect::check_for(Feature) -> bool` function. On its first call,
+//! this functions queries the CPU for the available features and stores them
+//! in a global `AtomicUsize` variable. The query is performed by just checking
+//! whether the feature bit in this global variable is set or cleared.
+
+/// A macro to test at *runtime* whether a CPU feature is available on
+/// x86/x86-64 platforms.
+///
+/// This macro is provided in the standard library and will detect at runtime
+/// whether the specified CPU feature is detected. This does *not* resolve at
+/// compile time unless the specified feature is already enabled for the entire
+/// crate. Runtime detection currently relies mostly on the `cpuid` instruction.
+///
+/// This macro only takes one argument which is a string literal of the feature
+/// being tested for. The feature names supported are the lowercase versions of
+/// the ones defined by Intel in [their documentation][docs].
+///
+/// ## Supported arguments
+///
+/// This macro supports the same names that `#[target_feature]` supports. Unlike
+/// `#[target_feature]`, however, this macro does not support names separated
+/// with a comma. Instead testing for multiple features must be done through
+/// separate macro invocations for now.
+///
+/// Supported arguments are:
+///
+/// * `"aes"`
+/// * `"pclmulqdq"`
+/// * `"rdrand"`
+/// * `"rdseed"`
+/// * `"tsc"`
+/// * `"mmx"`
+/// * `"sse"`
+/// * `"sse2"`
+/// * `"sse3"`
+/// * `"ssse3"`
+/// * `"sse4.1"`
+/// * `"sse4.2"`
+/// * `"sse4a"`
+/// * `"sha"`
+/// * `"avx"`
+/// * `"avx2"`
+/// * `"avx512f"`
+/// * `"avx512cd"`
+/// * `"avx512er"`
+/// * `"avx512pf"`
+/// * `"avx512bw"`
+/// * `"avx512dq"`
+/// * `"avx512vl"`
+/// * `"avx512ifma"`
+/// * `"avx512vbmi"`
+/// * `"avx512vpopcntdq"`
+/// * `"fma"`
+/// * `"bmi1"`
+/// * `"bmi2"`
+/// * `"abm"`
+/// * `"lzcnt"`
+/// * `"tbm"`
+/// * `"popcnt"`
+/// * `"fxsr"`
+/// * `"xsave"`
+/// * `"xsaveopt"`
+/// * `"xsaves"`
+/// * `"xsavec"`
+///
+/// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
+#[macro_export]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+#[allow_internal_unstable]
+macro_rules! is_x86_feature_detected {
+    ("aes") => {
+        cfg!(target_feature = "aes") || $crate::detect::check_for(
+            $crate::detect::Feature::aes)  };
+    ("pclmulqdq") => {
+        cfg!(target_feature = "pclmulqdq") || $crate::detect::check_for(
+            $crate::detect::Feature::pclmulqdq)  };
+    ("rdrand") => {
+        cfg!(target_feature = "rdrand") || $crate::detect::check_for(
+            $crate::detect::Feature::rdrand)  };
+    ("rdseed") => {
+        cfg!(target_feature = "rdseed") || $crate::detect::check_for(
+            $crate::detect::Feature::rdseed)  };
+    ("tsc") => {
+        cfg!(target_feature = "tsc") || $crate::detect::check_for(
+            $crate::detect::Feature::tsc)  };
+    ("mmx") => {
+        cfg!(target_feature = "mmx") || $crate::detect::check_for(
+            $crate::detect::Feature::mmx)  };
+    ("sse") => {
+        cfg!(target_feature = "sse") || $crate::detect::check_for(
+            $crate::detect::Feature::sse)  };
+    ("sse2") => {
+        cfg!(target_feature = "sse2") || $crate::detect::check_for(
+            $crate::detect::Feature::sse2)
+    };
+    ("sse3") => {
+        cfg!(target_feature = "sse3") || $crate::detect::check_for(
+            $crate::detect::Feature::sse3)
+    };
+    ("ssse3") => {
+        cfg!(target_feature = "ssse3") || $crate::detect::check_for(
+            $crate::detect::Feature::ssse3)
+    };
+    ("sse4.1") => {
+        cfg!(target_feature = "sse4.1") || $crate::detect::check_for(
+            $crate::detect::Feature::sse4_1)
+    };
+    ("sse4.2") => {
+        cfg!(target_feature = "sse4.2") || $crate::detect::check_for(
+            $crate::detect::Feature::sse4_2)
+    };
+    ("sse4a") => {
+        cfg!(target_feature = "sse4a") || $crate::detect::check_for(
+            $crate::detect::Feature::sse4a)
+    };
+    ("sha") => {
+        cfg!(target_feature = "sha") || $crate::detect::check_for(
+            $crate::detect::Feature::sha)
+    };
+    ("avx") => {
+        cfg!(target_feature = "avx") || $crate::detect::check_for(
+            $crate::detect::Feature::avx)
+    };
+    ("avx2") => {
+        cfg!(target_feature = "avx2") || $crate::detect::check_for(
+            $crate::detect::Feature::avx2)
+    };
+    ("avx512f") => {
+        cfg!(target_feature = "avx512f") || $crate::detect::check_for(
+            $crate::detect::Feature::avx512f)
+    };
+    ("avx512cd") => {
+        cfg!(target_feature = "avx512cd") || $crate::detect::check_for(
+            $crate::detect::Feature::avx512cd)
+    };
+    ("avx512er") => {
+        cfg!(target_feature = "avx512er") || $crate::detect::check_for(
+            $crate::detect::Feature::avx512er)
+    };
+    ("avx512pf") => {
+        cfg!(target_feature = "avx512pf") || $crate::detect::check_for(
+            $crate::detect::Feature::avx512pf)
+    };
+    ("avx512bw") => {
+        cfg!(target_feature = "avx512bw") || $crate::detect::check_for(
+            $crate::detect::Feature::avx512bw)
+    };
+    ("avx512dq") => {
+        cfg!(target_feature = "avx512dq") || $crate::detect::check_for(
+            $crate::detect::Feature::avx512dq)
+    };
+    ("avx512vl") => {
+        cfg!(target_Feature = "avx512vl") || $crate::detect::check_for(
+            $crate::detect::Feature::avx512vl)
+    };
+    ("avx512ifma") => {
+        cfg!(target_feature = "avx512ifma") || $crate::detect::check_for(
+            $crate::detect::Feature::avx512_ifma)
+    };
+    ("avx512vbmi") => {
+        cfg!(target_feature = "avx512vbmi") || $crate::detect::check_for(
+            $crate::detect::Feature::avx512_vbmi)
+    };
+    ("avx512vpopcntdq") => {
+        cfg!(target_feature = "avx512vpopcntdq") || $crate::detect::check_for(
+            $crate::detect::Feature::avx512_vpopcntdq)
+    };
+    ("fma") => {
+        cfg!(target_feature = "fma") || $crate::detect::check_for(
+            $crate::detect::Feature::fma)
+    };
+    ("bmi1") => {
+        cfg!(target_feature = "bmi1") || $crate::detect::check_for(
+            $crate::detect::Feature::bmi)
+    };
+    ("bmi2") => {
+        cfg!(target_feature = "bmi2") || $crate::detect::check_for(
+            $crate::detect::Feature::bmi2)
+    };
+    ("abm") => {
+        cfg!(target_feature = "abm") || $crate::detect::check_for(
+            $crate::detect::Feature::abm)
+    };
+    ("lzcnt") => {
+        cfg!(target_feature = "lzcnt") || $crate::detect::check_for(
+            $crate::detect::Feature::abm)
+    };
+    ("tbm") => {
+        cfg!(target_feature = "tbm") || $crate::detect::check_for(
+            $crate::detect::Feature::tbm)
+    };
+    ("popcnt") => {
+        cfg!(target_feature = "popcnt") || $crate::detect::check_for(
+            $crate::detect::Feature::popcnt)
+    };
+    ("fxsr") => {
+        cfg!(target_feature = "fxsr") || $crate::detect::check_for(
+            $crate::detect::Feature::fxsr)
+    };
+    ("xsave") => {
+        cfg!(target_feature = "xsave") || $crate::detect::check_for(
+            $crate::detect::Feature::xsave)
+    };
+    ("xsaveopt") => {
+        cfg!(target_feature = "xsaveopt") || $crate::detect::check_for(
+            $crate::detect::Feature::xsaveopt)
+    };
+    ("xsaves") => {
+        cfg!(target_feature = "xsaves") || $crate::detect::check_for(
+            $crate::detect::Feature::xsaves)
+    };
+    ("xsavec") => {
+        cfg!(target_feature = "xsavec") || $crate::detect::check_for(
+            $crate::detect::Feature::xsavec)
+    };
+    ("cmpxchg16b") => {
+        cfg!(target_feature = "cmpxchg16b") || $crate::detect::check_for(
+            $crate::detect::Feature::cmpxchg16b)
+    };
+    ("adx") => {
+        cfg!(target_feature = "adx") || $crate::detect::check_for(
+            $crate::detect::Feature::adx)
+    };
+    ($t:tt) => {
+        compile_error!(concat!("unknown target feature: ", $t))
+    };
+}
+
+/// X86 CPU Feature enum. Each variant denotes a position in a bitset for a
+/// particular feature.
+///
+/// This is an unstable implementation detail subject to change.
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+#[doc(hidden)]
+#[unstable(feature = "stdsimd_internal", issue = "0")]
+pub enum Feature {
+    /// AES (Advanced Encryption Standard New Instructions AES-NI)
+    aes,
+    /// CLMUL (Carry-less Multiplication)
+    pclmulqdq,
+    /// RDRAND
+    rdrand,
+    /// RDSEED
+    rdseed,
+    /// TSC (Time Stamp Counter)
+    tsc,
+    /// MMX
+    mmx,
+    /// SSE (Streaming SIMD Extensions)
+    sse,
+    /// SSE2 (Streaming SIMD Extensions 2)
+    sse2,
+    /// SSE3 (Streaming SIMD Extensions 3)
+    sse3,
+    /// SSSE3 (Supplemental Streaming SIMD Extensions 3)
+    ssse3,
+    /// SSE4.1 (Streaming SIMD Extensions 4.1)
+    sse4_1,
+    /// SSE4.2 (Streaming SIMD Extensions 4.2)
+    sse4_2,
+    /// SSE4a (Streaming SIMD Extensions 4a)
+    sse4a,
+    /// SHA
+    sha,
+    /// AVX (Advanced Vector Extensions)
+    avx,
+    /// AVX2 (Advanced Vector Extensions 2)
+    avx2,
+    /// AVX-512 F (Foundation)
+    avx512f,
+    /// AVX-512 CD (Conflict Detection Instructions)
+    avx512cd,
+    /// AVX-512 ER (Exponential and Reciprocal Instructions)
+    avx512er,
+    /// AVX-512 PF (Prefetch Instructions)
+    avx512pf,
+    /// AVX-512 BW (Byte and Word Instructions)
+    avx512bw,
+    /// AVX-512 DQ (Doubleword and Quadword)
+    avx512dq,
+    /// AVX-512 VL (Vector Length Extensions)
+    avx512vl,
+    /// AVX-512 IFMA (Integer Fused Multiply Add)
+    avx512_ifma,
+    /// AVX-512 VBMI (Vector Byte Manipulation Instructions)
+    avx512_vbmi,
+    /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
+    /// Quadword)
+    avx512_vpopcntdq,
+    /// FMA (Fused Multiply Add)
+    fma,
+    /// BMI1 (Bit Manipulation Instructions 1)
+    bmi,
+    /// BMI1 (Bit Manipulation Instructions 2)
+    bmi2,
+    /// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero
+    /// Count) on Intel
+    abm,
+    /// TBM (Trailing Bit Manipulation)
+    tbm,
+    /// POPCNT (Population Count)
+    popcnt,
+    /// FXSR (Floating-point context fast save and restor)
+    fxsr,
+    /// XSAVE (Save Processor Extended States)
+    xsave,
+    /// XSAVEOPT (Save Processor Extended States Optimized)
+    xsaveopt,
+    /// XSAVES (Save Processor Extended States Supervisor)
+    xsaves,
+    /// XSAVEC (Save Processor Extended States Compacted)
+    xsavec,
+    /// CMPXCH16B, a 16-byte compare-and-swap instruction
+    cmpxchg16b,
+    /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+    adx,
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/bit.rs b/library/stdarch/crates/std_detect/src/detect/bit.rs
new file mode 100644
index 00000000000..578f0b16b74
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/bit.rs
@@ -0,0 +1,9 @@
+//! Bit manipulation utilities.
+
+/// Tests the `bit` of `x`.
+#[allow(dead_code)]
+#[inline]
+pub(crate) fn test(x: usize, bit: u32) -> bool {
+    debug_assert!(bit < 32, "bit index out-of-bounds");
+    x & (1 << bit) != 0
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
new file mode 100644
index 00000000000..c2de4da7349
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -0,0 +1,162 @@
+//! Caches run-time feature detection so that it only needs to be computed
+//! once.
+
+#![allow(dead_code)] // not used on all platforms
+
+use core::sync::atomic::Ordering;
+
+#[cfg(target_pointer_width = "64")]
+use core::sync::atomic::AtomicU64;
+
+#[cfg(target_pointer_width = "32")]
+use core::sync::atomic::AtomicU32;
+
+/// Sets the `bit` of `x`.
+#[inline]
+const fn set_bit(x: u64, bit: u32) -> u64 {
+    x | 1 << bit
+}
+
+/// Tests the `bit` of `x`.
+#[inline]
+const fn test_bit(x: u64, bit: u32) -> bool {
+    x & (1 << bit) != 0
+}
+
+/// Maximum number of features that can be cached.
+const CACHE_CAPACITY: u32 = 63;
+
+/// This type is used to initialize the cache
+#[derive(Copy, Clone)]
+pub(crate) struct Initializer(u64);
+
+impl Default for Initializer {
+    fn default() -> Self {
+        Initializer(0)
+    }
+}
+
+impl Initializer {
+    /// Tests the `bit` of the cache.
+    #[allow(dead_code)]
+    #[inline]
+    pub(crate) fn test(self, bit: u32) -> bool {
+        // FIXME: this way of making sure that the cache is large enough is
+        // brittle.
+        debug_assert!(
+            bit < CACHE_CAPACITY,
+            "too many features, time to increase the cache size!"
+        );
+        test_bit(self.0, bit)
+    }
+
+    /// Sets the `bit` of the cache.
+    #[inline]
+    pub(crate) fn set(&mut self, bit: u32) {
+        // FIXME: this way of making sure that the cache is large enough is
+        // brittle.
+        debug_assert!(
+            bit < CACHE_CAPACITY,
+            "too many features, time to increase the cache size!"
+        );
+        let v = self.0;
+        self.0 = set_bit(v, bit);
+    }
+}
+
+/// This global variable is a cache of the features supported by the CPU.
+static CACHE: Cache = Cache::uninitialized();
+
+/// Feature cache with capacity for `CACHE_CAPACITY` features.
+///
+/// Note: the last feature bit is used to represent an
+/// uninitialized cache.
+#[cfg(target_pointer_width = "64")]
+struct Cache(AtomicU64);
+
+#[cfg(target_pointer_width = "64")]
+impl Cache {
+    /// Creates an uninitialized cache.
+    const fn uninitialized() -> Self {
+        const X: AtomicU64 = AtomicU64::new(u64::max_value());
+        Self(X)
+    }
+    /// Is the cache uninitialized?
+    #[inline]
+    pub(crate) fn is_uninitialized(&self) -> bool {
+        self.0.load(Ordering::Relaxed) == u64::max_value()
+    }
+
+    /// Is the `bit` in the cache set?
+    #[inline]
+    pub(crate) fn test(&self, bit: u32) -> bool {
+        test_bit(CACHE.0.load(Ordering::Relaxed), bit)
+    }
+
+    /// Initializes the cache.
+    #[inline]
+    pub(crate) fn initialize(&self, value: Initializer) {
+        self.0.store(value.0, Ordering::Relaxed);
+    }
+}
+
+/// Feature cache with capacity for `CACHE_CAPACITY` features.
+///
+/// Note: the last feature bit is used to represent an
+/// uninitialized cache.
+#[cfg(target_pointer_width = "32")]
+struct Cache(AtomicU32, AtomicU32);
+
+#[cfg(target_pointer_width = "32")]
+impl Cache {
+    /// Creates an uninitialized cache.
+    const fn uninitialized() -> Self {
+        Cache(
+            AtomicU32::new(u32::max_value()),
+            AtomicU32::new(u32::max_value()),
+        )
+    }
+    /// Is the cache uninitialized?
+    #[inline]
+    pub(crate) fn is_uninitialized(&self) -> bool {
+        self.1.load(Ordering::Relaxed) == u32::max_value()
+    }
+
+    /// Is the `bit` in the cache set?
+    #[inline]
+    pub(crate) fn test(&self, bit: u32) -> bool {
+        if bit < 32 {
+            test_bit(CACHE.0.load(Ordering::Relaxed) as u64, bit)
+        } else {
+            test_bit(CACHE.1.load(Ordering::Relaxed) as u64, bit - 32)
+        }
+    }
+
+    /// Initializes the cache.
+    #[inline]
+    pub(crate) fn initialize(&self, value: Initializer) {
+        let lo: u32 = value.0 as u32;
+        let hi: u32 = (value.0 >> 32) as u32;
+        self.0.store(lo, Ordering::Relaxed);
+        self.1.store(hi, Ordering::Relaxed);
+    }
+}
+
+/// Test the `bit` of the storage. If the storage has not been initialized,
+/// initializes it with the result of `f()`.
+///
+/// On its first invocation, it detects the CPU features and caches them in the
+/// `CACHE` global variable as an `AtomicU64`.
+///
+/// It uses the `Feature` variant to index into this variable as a bitset. If
+/// the bit is set, the feature is enabled, and otherwise it is disabled.
+#[inline]
+pub(crate) fn test<F>(bit: u32, f: F) -> bool
+where
+    F: FnOnce() -> Initializer,
+{
+    if CACHE.is_uninitialized() {
+        CACHE.initialize(f());
+    }
+    CACHE.test(bit)
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/error_macros.rs b/library/stdarch/crates/std_detect/src/detect/error_macros.rs
new file mode 100644
index 00000000000..6769757ed93
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/error_macros.rs
@@ -0,0 +1,150 @@
+//! The `is_{target_arch}_feature_detected!` macro are only available on their
+//! architecture. These macros provide a better error messages when the user
+//! attempts to call them in a different architecture.
+
+/// Prevents compilation if `is_x86_feature_detected` is used somewhere
+/// else than `x86` and `x86_64` targets.
+#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+macro_rules! is_x86_feature_detected {
+    ($t: tt) => {
+        compile_error!(
+            r#"
+        is_x86_feature_detected can only be used on x86 and x86_64 targets.
+        You can prevent it from being used in other architectures by
+        guarding it behind a cfg(target_arch) as follows:
+
+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
+                if is_x86_feature_detected(...) { ... }
+            }
+        "#
+        )
+    };
+}
+
+/// Prevents compilation if `is_arm_feature_detected` is used somewhere else
+/// than `ARM` targets.
+#[cfg(not(target_arch = "arm"))]
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+macro_rules! is_arm_feature_detected {
+    ($t:tt) => {
+        compile_error!(
+            r#"
+        is_arm_feature_detected can only be used on ARM targets.
+        You can prevent it from being used in other architectures by
+        guarding it behind a cfg(target_arch) as follows:
+
+            #[cfg(target_arch = "arm")] {
+                if is_arm_feature_detected(...) { ... }
+            }
+        "#
+        )
+    };
+}
+
+/// Prevents compilation if `is_aarch64_feature_detected` is used somewhere else
+/// than `aarch64` targets.
+#[cfg(not(target_arch = "aarch64"))]
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+macro_rules! is_aarch64_feature_detected {
+    ($t: tt) => {
+        compile_error!(
+            r#"
+        is_aarch64_feature_detected can only be used on AArch64 targets.
+        You can prevent it from being used in other architectures by
+        guarding it behind a cfg(target_arch) as follows:
+
+            #[cfg(target_arch = "aarch64")] {
+                if is_aarch64_feature_detected(...) { ... }
+            }
+        "#
+        )
+    };
+}
+
+/// Prevents compilation if `is_powerpc_feature_detected` is used somewhere else
+/// than `PowerPC` targets.
+#[cfg(not(target_arch = "powerpc"))]
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+macro_rules! is_powerpc_feature_detected {
+    ($t:tt) => {
+        compile_error!(
+            r#"
+is_powerpc_feature_detected can only be used on PowerPC targets.
+You can prevent it from being used in other architectures by
+guarding it behind a cfg(target_arch) as follows:
+
+    #[cfg(target_arch = "powerpc")] {
+        if is_powerpc_feature_detected(...) { ... }
+    }
+"#
+        )
+    };
+}
+
+/// Prevents compilation if `is_powerpc64_feature_detected` is used somewhere
+/// else than `PowerPC64` targets.
+#[cfg(not(target_arch = "powerpc64"))]
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+macro_rules! is_powerpc64_feature_detected {
+    ($t:tt) => {
+        compile_error!(
+            r#"
+is_powerpc64_feature_detected can only be used on PowerPC64 targets.
+You can prevent it from being used in other architectures by
+guarding it behind a cfg(target_arch) as follows:
+
+    #[cfg(target_arch = "powerpc64")] {
+        if is_powerpc64_feature_detected(...) { ... }
+    }
+"#
+        )
+    };
+}
+
+/// Prevents compilation if `is_mips_feature_detected` is used somewhere else
+/// than `MIPS` targets.
+#[cfg(not(target_arch = "mips"))]
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+macro_rules! is_mips_feature_detected {
+    ($t:tt) => {
+        compile_error!(
+            r#"
+        is_mips_feature_detected can only be used on MIPS targets.
+        You can prevent it from being used in other architectures by
+        guarding it behind a cfg(target_arch) as follows:
+
+            #[cfg(target_arch = "mips")] {
+                if is_mips_feature_detected(...) { ... }
+            }
+        "#
+        )
+    };
+}
+
+/// Prevents compilation if `is_mips64_feature_detected` is used somewhere else
+/// than `MIPS64` targets.
+#[cfg(not(target_arch = "mips64"))]
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+macro_rules! is_mips64_feature_detected {
+    ($t:tt) => {
+        compile_error!(
+            r#"
+        is_mips64_feature_detected can only be used on MIPS64 targets.
+        You can prevent it from being used in other architectures by
+        guarding it behind a cfg(target_arch) as follows:
+
+            #[cfg(target_arch = "mips64")] {
+                if is_mips64_feature_detected(...) { ... }
+            }
+        "#
+        )
+    };
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
new file mode 100644
index 00000000000..f446e88eedc
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -0,0 +1,85 @@
+//! This module implements run-time feature detection.
+//!
+//! The `is_{arch}_feature_detected!("feature-name")` macros take the name of a
+//! feature as a string-literal, and return a boolean indicating whether the
+//! feature is enabled at run-time or not.
+//!
+//! These macros do two things:
+//! * map the string-literal into an integer stored as a `Feature` enum,
+//! * call a `os::check_for(x: Feature)` function that returns `true` if the
+//! feature is enabled.
+//!
+//! The `Feature` enums are also implemented in the `arch/{target_arch}.rs`
+//! modules.
+//!
+//! The `check_for` functions are, in general, Operating System dependent. Most
+//! architectures do not allow user-space programs to query the feature bits
+//! due to security concerns (x86 is the big exception). These functions are
+//! implemented in the `os/{target_os}.rs` modules.
+
+#[macro_use]
+mod error_macros;
+
+cfg_if! {
+    if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
+        #[path = "arch/x86.rs"]
+        #[macro_use]
+        mod arch;
+    } else if #[cfg(target_arch = "arm")] {
+        #[path = "arch/arm.rs"]
+        #[macro_use]
+        mod arch;
+    } else if #[cfg(target_arch = "aarch64")] {
+        #[path = "arch/aarch64.rs"]
+        #[macro_use]
+        mod arch;
+    } else if #[cfg(target_arch = "powerpc")] {
+        #[path = "arch/powerpc.rs"]
+        #[macro_use]
+        mod arch;
+    } else if #[cfg(target_arch = "powerpc64")] {
+        #[path = "arch/powerpc64.rs"]
+        #[macro_use]
+        mod arch;
+    } else if #[cfg(target_arch = "mips")] {
+        #[path = "arch/mips.rs"]
+        #[macro_use]
+        mod arch;
+    } else if #[cfg(target_arch = "mips64")] {
+        #[path = "arch/mips64.rs"]
+        #[macro_use]
+        mod arch;
+    } else {
+        // Unimplemented architecture:
+        mod arch {
+            pub enum Feature {
+                Null
+            }
+        }
+    }
+}
+pub use self::arch::Feature;
+
+mod bit;
+mod cache;
+
+cfg_if! {
+    if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
+        // On x86/x86_64 no OS specific functionality is required.
+        #[path = "os/x86.rs"]
+        mod os;
+    } else if #[cfg(all(target_os = "linux", feature = "use_std"))] {
+        #[path = "os/linux/mod.rs"]
+        mod os;
+    } else if #[cfg(target_os = "freebsd")] {
+        #[cfg(target_arch = "aarch64")]
+        #[path = "os/aarch64.rs"]
+        mod aarch64;
+        #[path = "os/freebsd/mod.rs"]
+        mod os;
+    } else {
+        #[path = "os/other.rs"]
+        mod os;
+    }
+}
+pub use self::os::check_for;
diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
new file mode 100644
index 00000000000..f28d15a7c3e
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -0,0 +1,79 @@
+//! Run-time feature detection for Aarch64 on any OS that emulates the mrs instruction.
+//!
+//! On FreeBSD >= 12.0, Linux >= 4.11 and other operating systems, it is possible to use
+//! privileged system registers from userspace to check CPU feature support.
+//!
+//! AArch64 system registers ID_AA64ISAR0_EL1, ID_AA64PFR0_EL1, ID_AA64ISAR1_EL1
+//! have bits dedicated to features like AdvSIMD, CRC32, AES, atomics (LSE), etc.
+//! Each part of the register indicates the level of support for a certain feature, e.g.
+//! when ID_AA64ISAR0_EL1[7:4] is >= 1, AES is supported; when it's >= 2, PMULL is supported.
+//!
+//! For proper support of [SoCs where different cores have different capabilities](https://medium.com/@jadr2ddude/a-big-little-problem-a-tale-of-big-little-gone-wrong-e7778ce744bb),
+//! the OS has to always report only the features supported by all cores, like [FreeBSD does](https://reviews.freebsd.org/D17137#393947).
+//!
+//! References:
+//!
+//! - [Zircon implementation](https://fuchsia.googlesource.com/zircon/+/master/kernel/arch/arm64/feature.cpp)
+//! - [Linux documentation](https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt)
+
+use crate::detect::{Feature, cache};
+
+/// Try to read the features from the system registers.
+///
+/// This will cause SIGILL if the current OS is not trapping the mrs instruction.
+pub(crate) fn detect_features() -> cache::Initializer {
+    let mut value = cache::Initializer::default();
+
+    {
+        let mut enable_feature = |f, enable| {
+            if enable {
+                value.set(f as u32);
+            }
+        };
+
+        // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
+        let aa64isar0: u64;
+        unsafe { asm!("mrs $0, ID_AA64ISAR0_EL1" : "=r"(aa64isar0)); }
+
+        let aes = bits_shift(aa64isar0, 7, 4) >= 1;
+        let pmull = bits_shift(aa64isar0, 7, 4) >= 2;
+        let sha1 = bits_shift(aa64isar0, 11, 8) >= 1;
+        let sha2 = bits_shift(aa64isar0, 15, 12) >= 1;
+        enable_feature(Feature::pmull, pmull);
+        // Crypto is specified as AES + PMULL + SHA1 + SHA2 per LLVM/hosts.cpp
+        enable_feature(Feature::crypto, aes && pmull && sha1 && sha2);
+        enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 1);
+        enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1);
+
+        // ID_AA64PFR0_EL1 - Processor Feature Register 0
+        let aa64pfr0: u64;
+        unsafe { asm!("mrs $0, ID_AA64PFR0_EL1" : "=r"(aa64pfr0)); }
+
+        let fp = bits_shift(aa64pfr0, 19, 16) < 0xF;
+        let fphp = bits_shift(aa64pfr0, 19, 16) >= 1;
+        let asimd = bits_shift(aa64pfr0, 23, 20) < 0xF;
+        let asimdhp = bits_shift(aa64pfr0, 23, 20) >= 1;
+        enable_feature(Feature::fp, fp);
+        enable_feature(Feature::fp16, fphp);
+        // SIMD support requires float support - if half-floats are
+        // supported, it also requires half-float support:
+        enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp));
+        // SIMD extensions require SIMD support:
+        enable_feature(Feature::rdm, asimd && bits_shift(aa64isar0, 31, 28) >= 1);
+        enable_feature(Feature::dotprod, asimd && bits_shift(aa64isar0, 47, 44) >= 1);
+        enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1);
+
+        // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1
+        let aa64isar1: u64;
+        unsafe { asm!("mrs $0, ID_AA64ISAR1_EL1" : "=r"(aa64isar1)); }
+
+        enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1);
+    }
+
+    value
+}
+
+#[inline]
+fn bits_shift(x: u64, high: usize, low: usize) -> u64 {
+    (x >> low) & ((1 << (high - low + 1)) - 1)
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
new file mode 100644
index 00000000000..910d2f33b39
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
@@ -0,0 +1,28 @@
+//! Run-time feature detection for Aarch64 on FreeBSD.
+
+use crate::detect::{Feature, cache};
+use super::super::aarch64::detect_features;
+
+/// Performs run-time feature detection.
+#[inline]
+pub fn check_for(x: Feature) -> bool {
+    cache::test(x as u32, detect_features)
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn dump() {
+        println!("asimd: {:?}", is_aarch64_feature_detected!("asimd"));
+        println!("pmull: {:?}", is_aarch64_feature_detected!("pmull"));
+        println!("fp: {:?}", is_aarch64_feature_detected!("fp"));
+        println!("fp16: {:?}", is_aarch64_feature_detected!("fp16"));
+        println!("sve: {:?}", is_aarch64_feature_detected!("sve"));
+        println!("crc: {:?}", is_aarch64_feature_detected!("crc"));
+        println!("crypto: {:?}", is_aarch64_feature_detected!("crypto"));
+        println!("lse: {:?}", is_aarch64_feature_detected!("lse"));
+        println!("rdm: {:?}", is_aarch64_feature_detected!("rdm"));
+        println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc"));
+        println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod"));
+    }
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
new file mode 100644
index 00000000000..1c73cefd47d
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
@@ -0,0 +1,14 @@
+//! Run-time feature detection on FreeBSD
+
+cfg_if! {
+    if #[cfg(target_arch = "aarch64")] {
+        mod aarch64;
+        pub use self::aarch64::check_for;
+    } else {
+        use arch::detect::Feature;
+        /// Performs run-time feature detection.
+        pub fn check_for(_x: Feature) -> bool {
+            false
+        }
+    }
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
new file mode 100644
index 00000000000..f7dc0f0222e
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -0,0 +1,157 @@
+//! Run-time feature detection for Aarch64 on Linux.
+
+use crate::detect::{Feature, cache, bit};
+use super::{auxvec, cpuinfo};
+
+/// Performs run-time feature detection.
+#[inline]
+pub fn check_for(x: Feature) -> bool {
+    cache::test(x as u32, detect_features)
+}
+
+/// Try to read the features from the auxiliary vector, and if that fails, try
+/// to read them from /proc/cpuinfo.
+fn detect_features() -> cache::Initializer {
+    if let Ok(auxv) = auxvec::auxv() {
+        let hwcap: AtHwcap = auxv.into();
+        return hwcap.cache();
+    }
+    if let Ok(c) = cpuinfo::CpuInfo::new() {
+        let hwcap: AtHwcap = c.into();
+        return hwcap.cache();
+    }
+    cache::Initializer::default()
+}
+
+/// These values are part of the platform-specific [asm/hwcap.h][hwcap] .
+///
+/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
+struct AtHwcap {
+    fp: bool, // 0
+    asimd: bool, // 1
+    // evtstrm: bool, // 2
+    aes: bool, // 3
+    pmull: bool, // 4
+    sha1: bool, // 5
+    sha2: bool, // 6
+    crc32: bool, // 7
+    atomics: bool, // 8
+    fphp: bool, // 9
+    asimdhp: bool, // 10
+    // cpuid: bool, // 11
+    asimdrdm: bool, // 12
+    // jscvt: bool, // 13
+    // fcma: bool, // 14
+    lrcpc: bool, // 15
+    // dcpop: bool, // 16
+    // sha3: bool, // 17
+    // sm3: bool, // 18
+    // sm4: bool, // 19
+    asimddp: bool, // 20
+    // sha512: bool, // 21
+    sve: bool, // 22
+}
+
+impl From<auxvec::AuxVec> for AtHwcap {
+    /// Reads AtHwcap from the auxiliary vector.
+    fn from(auxv: auxvec::AuxVec) -> Self {
+        AtHwcap {
+            fp: bit::test(auxv.hwcap, 0),
+            asimd: bit::test(auxv.hwcap, 1),
+            // evtstrm: bit::test(auxv.hwcap, 2),
+            aes: bit::test(auxv.hwcap, 3),
+            pmull: bit::test(auxv.hwcap, 4),
+            sha1: bit::test(auxv.hwcap, 5),
+            sha2: bit::test(auxv.hwcap, 6),
+            crc32: bit::test(auxv.hwcap, 7),
+            atomics: bit::test(auxv.hwcap, 8),
+            fphp: bit::test(auxv.hwcap, 9),
+            asimdhp: bit::test(auxv.hwcap, 10),
+            // cpuid: bit::test(auxv.hwcap, 11),
+            asimdrdm: bit::test(auxv.hwcap, 12),
+            // jscvt: bit::test(auxv.hwcap, 13),
+            // fcma: bit::test(auxv.hwcap, 14),
+            lrcpc: bit::test(auxv.hwcap, 15),
+            // dcpop: bit::test(auxv.hwcap, 16),
+            // sha3: bit::test(auxv.hwcap, 17),
+            // sm3: bit::test(auxv.hwcap, 18),
+            // sm4: bit::test(auxv.hwcap, 19),
+            asimddp: bit::test(auxv.hwcap, 20),
+            // sha512: bit::test(auxv.hwcap, 21),
+            sve: bit::test(auxv.hwcap, 22),
+        }
+    }
+}
+
+impl From<cpuinfo::CpuInfo> for AtHwcap {
+    /// Reads AtHwcap from /proc/cpuinfo .
+    fn from(c: cpuinfo::CpuInfo) -> Self {
+        let f = &c.field("Features");
+        AtHwcap {
+            // 64-bit names. FIXME: In 32-bit compatibility mode /proc/cpuinfo will
+            // map some of the 64-bit names to some 32-bit feature names. This does not
+            // cover that yet.
+            fp: f.has("fp"),
+            asimd: f.has("asimd"),
+            // evtstrm: f.has("evtstrm"),
+            aes: f.has("aes"),
+            pmull: f.has("pmull"),
+            sha1: f.has("sha1"),
+            sha2: f.has("sha2"),
+            crc32: f.has("crc32"),
+            atomics: f.has("atomics"),
+            fphp: f.has("fphp"),
+            asimdhp: f.has("asimdhp"),
+            // cpuid: f.has("cpuid"),
+            asimdrdm: f.has("asimdrdm"),
+            // jscvt: f.has("jscvt"),
+            // fcma: f.has("fcma"),
+            lrcpc: f.has("lrcpc"),
+            // dcpop: f.has("dcpop"),
+            // sha3: f.has("sha3"),
+            // sm3: f.has("sm3"),
+            // sm4: f.has("sm4"),
+            asimddp: f.has("asimddp"),
+            // sha512: f.has("sha512"),
+            sve: f.has("sve"),
+        }
+    }
+}
+
+impl AtHwcap {
+    /// Initializes the cache from the feature -bits.
+    ///
+    /// The features are enabled approximately like in LLVM host feature detection:
+    /// https://github.com/llvm-mirror/llvm/blob/master/lib/Support/Host.cpp#L1273
+    fn cache(self) -> cache::Initializer {
+        let mut value = cache::Initializer::default();
+        {
+            let mut enable_feature = |f, enable| {
+                if enable {
+                    value.set(f as u32);
+                }
+            };
+
+            enable_feature(Feature::fp, self.fp);
+            // Half-float support requires float support
+            enable_feature(Feature::fp16, self.fp && self.fphp);
+            enable_feature(Feature::pmull, self.pmull);
+            enable_feature(Feature::crc, self.crc32);
+            enable_feature(Feature::lse, self.atomics);
+            enable_feature(Feature::rcpc, self.lrcpc);
+
+            // SIMD support requires float support - if half-floats are
+            // supported, it also requires half-float support:
+            let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp);
+            enable_feature(Feature::asimd, asimd);
+            // SIMD extensions require SIMD support:
+            enable_feature(Feature::rdm, self.asimdrdm && asimd);
+            enable_feature(Feature::dotprod, self.asimddp && asimd);
+            enable_feature(Feature::sve, self.sve && asimd);
+
+            // Crypto is specified as AES + PMULL + SHA1 + SHA2 per LLVM/hosts.cpp
+            enable_feature(Feature::crypto, self.aes && self.pmull && self.sha1 && self.sha2);
+        }
+        value
+    }
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
new file mode 100644
index 00000000000..0d58a847cd6
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
@@ -0,0 +1,49 @@
+//! Run-time feature detection for ARM on Linux.
+
+use crate::detect::{Feature, cache, bit};
+use super::{auxvec, cpuinfo};
+
+/// Performs run-time feature detection.
+#[inline]
+pub fn check_for(x: Feature) -> bool {
+    cache::test(x as u32, detect_features)
+}
+
+/// Try to read the features from the auxiliary vector, and if that fails, try
+/// to read them from /proc/cpuinfo.
+fn detect_features() -> cache::Initializer {
+    let mut value = cache::Initializer::default();
+    let enable_feature = |value: &mut cache::Initializer, f, enable| {
+        if enable {
+            value.set(f as u32);
+        }
+    };
+
+    // The values are part of the platform-specific [asm/hwcap.h][hwcap]
+    //
+    // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
+    if let Ok(auxv) = auxvec::auxv() {
+        enable_feature(&mut value, Feature::neon, bit::test(auxv.hwcap, 12));
+        enable_feature(&mut value, Feature::pmull, bit::test(auxv.hwcap2, 1));
+        return value;
+    }
+
+    if let Ok(c) = cpuinfo::CpuInfo::new() {
+        enable_feature(&mut value, Feature::neon, c.field("Features").has("neon") &&
+            !has_broken_neon(&c));
+        enable_feature(&mut value, Feature::pmull, c.field("Features").has("pmull"));
+        return value;
+    }
+    value
+}
+
+/// Is the CPU known to have a broken NEON unit?
+///
+/// See https://crbug.com/341598.
+fn has_broken_neon(cpuinfo: &cpuinfo::CpuInfo) -> bool {
+    cpuinfo.field("CPU implementer") == "0x51"
+        && cpuinfo.field("CPU architecture") == "7"
+        && cpuinfo.field("CPU variant") == "0x1"
+        && cpuinfo.field("CPU part") == "0x04d"
+        && cpuinfo.field("CPU revision") == "0"
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
new file mode 100644
index 00000000000..31c980fd382
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -0,0 +1,270 @@
+//! Parses ELF auxiliary vectors.
+#![cfg_attr(not(target_arch = "aarch64"), allow(dead_code))]
+
+extern crate std;
+use self::std::{prelude::v1::*, fs::File, io::Read};
+
+use core::mem;
+
+/// Key to access the CPU Hardware capabilities bitfield.
+pub(crate) const AT_HWCAP: usize = 16;
+/// Key to access the CPU Hardware capabilities 2 bitfield.
+#[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+pub(crate) const AT_HWCAP2: usize = 26;
+
+/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
+///
+/// If an entry cannot be read all the bits in the bitfield are set to zero.
+/// This should be interpreted as all the features being disabled.
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct AuxVec {
+    pub hwcap: usize,
+    #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+    pub hwcap2: usize,
+}
+
+/// ELF Auxiliary Vector
+///
+/// The auxiliary vector is a memory region in a running ELF program's stack
+/// composed of (key: usize, value: usize) pairs.
+///
+/// The keys used in the aux vector are platform dependent. For Linux, they are
+/// defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given
+/// CPU can be queried with the  `AT_HWCAP` and `AT_HWCAP2` keys.
+///
+/// There is no perfect way of reading the auxiliary vector.
+///
+/// - If the `getauxval` is dynamically linked to this binary, it will be used.
+/// - Otherwise, try to read `/proc/self/auxv`.
+/// - If that fails, this function returns an error.
+///
+/// Note that run-time feature detection is not invoked for features that can
+/// be detected at compile-time. Also note that if this function returns an
+/// error, cpuinfo still can (and will) be used to try to perform run-time
+/// feature detecton on some platforms.
+///
+/// For more information about when `getauxval` is available check the great
+/// [`auxv` crate documentation][auxv_docs].
+///
+/// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
+/// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/
+pub(crate) fn auxv() -> Result<AuxVec, ()> {
+    // Try to call a dynamically-linked getauxval function.
+    if let Ok(hwcap) = getauxval(AT_HWCAP) {
+        // Targets with only AT_HWCAP:
+        #[cfg(any(target_arch = "aarch64", target_arch = "mips",
+                  target_arch = "mips64"))]
+        {
+            if hwcap != 0 {
+                return Ok(AuxVec { hwcap });
+            }
+        }
+
+        // Targets with AT_HWCAP and AT_HWCAP2:
+        #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+        {
+            if let Ok(hwcap2) = getauxval(AT_HWCAP2) {
+                if hwcap != 0 && hwcap2 != 0 {
+                    return Ok(AuxVec { hwcap, hwcap2 });
+                }
+            }
+        }
+        drop(hwcap);
+    }
+    // If calling getauxval fails, try to read the auxiliary vector from
+    // its file:
+    auxv_from_file("/proc/self/auxv")
+}
+
+/// Tries to read the `key` from the auxiliary vector by calling the
+/// dynamically-linked `getauxval` function. If the function is not linked,
+/// this function return `Err`.
+fn getauxval(key: usize) -> Result<usize, ()> {
+    use libc;
+    pub type F = unsafe extern "C" fn(usize) -> usize;
+    unsafe {
+        let ptr = libc::dlsym(
+            libc::RTLD_DEFAULT,
+            "getauxval\0".as_ptr() as *const _,
+        );
+        if ptr.is_null() {
+            return Err(());
+        }
+
+        let ffi_getauxval: F = mem::transmute(ptr);
+        Ok(ffi_getauxval(key))
+    }
+}
+
+/// Tries to read the auxiliary vector from the `file`. If this fails, this
+/// function returns `Err`.
+fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
+    let mut file = File::open(file).map_err(|_| ())?;
+
+    // See https://github.com/torvalds/linux/blob/v3.19/include/uapi/linux/auxvec.h
+    //
+    // The auxiliary vector contains at most 32 (key,value) fields: from
+    // `AT_EXECFN = 31` to `AT_NULL = 0`. That is, a buffer of
+    // 2*32 `usize` elements is enough to read the whole vector.
+    let mut buf = [0_usize; 64];
+    {
+        let raw: &mut [u8; 64 * mem::size_of::<usize>()] =
+            unsafe { mem::transmute(&mut buf) };
+        file.read(raw).map_err(|_| ())?;
+    }
+    auxv_from_buf(&buf)
+}
+
+/// Tries to interpret the `buffer` as an auxiliary vector. If that fails, this
+/// function returns `Err`.
+fn auxv_from_buf(buf: &[usize; 64]) -> Result<AuxVec, ()> {
+    // Targets with only AT_HWCAP:
+    #[cfg(any(target_arch = "aarch64", target_arch = "mips",
+              target_arch = "mips64"))]
+    {
+        for el in buf.chunks(2) {
+            match el[0] {
+                AT_HWCAP => return Ok(AuxVec { hwcap: el[1] }),
+                _ => (),
+            }
+        }
+    }
+    // Targets with AT_HWCAP and AT_HWCAP2:
+    #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+    {
+        let mut hwcap = None;
+        let mut hwcap2 = None;
+        for el in buf.chunks(2) {
+            match el[0] {
+                AT_HWCAP => hwcap = Some(el[1]),
+                AT_HWCAP2 => hwcap2 = Some(el[1]),
+                _ => (),
+            }
+        }
+
+        if let (Some(hwcap), Some(hwcap2)) = (hwcap, hwcap2) {
+            return Ok(AuxVec { hwcap, hwcap2 });
+        }
+    }
+    drop(buf);
+    Err(())
+}
+
+#[cfg(test)]
+mod tests {
+    extern crate auxv as auxv_crate;
+    use super::*;
+
+    // Reads the Auxiliary Vector key from /proc/self/auxv
+    // using the auxv crate.
+    fn auxv_crate_getprocfs(key: usize) -> Option<usize> {
+        use self::auxv_crate::AuxvType;
+        use self::auxv_crate::procfs::search_procfs_auxv;
+        let k = key as AuxvType;
+        match search_procfs_auxv(&[k]) {
+            Ok(v) => Some(v[&k] as usize),
+            Err(_) => None,
+        }
+    }
+
+    // Reads the Auxiliary Vector key from getauxval()
+    // using the auxv crate.
+    #[cfg(not(any(target_arch = "mips", target_arch = "mips64")))]
+    fn auxv_crate_getauxval(key: usize) -> Option<usize> {
+        use self::auxv_crate::AuxvType;
+        use self::auxv_crate::getauxval::Getauxval;
+        let q = auxv_crate::getauxval::NativeGetauxval {};
+        match q.getauxval(key as AuxvType) {
+            Ok(v) => Some(v as usize),
+            Err(_) => None,
+        }
+    }
+
+    // FIXME: on mips/mips64 getauxval returns 0, and /proc/self/auxv
+    // does not always contain the AT_HWCAP key under qemu.
+    #[cfg(not(any(target_arch = "mips", target_arch = "mips64", target_arch = "powerpc")))]
+    #[test]
+    fn auxv_crate() {
+        let v = auxv();
+        if let Some(hwcap) = auxv_crate_getauxval(AT_HWCAP) {
+            let rt_hwcap = v.expect("failed to find hwcap key").hwcap;
+            assert_eq!(rt_hwcap, hwcap);
+        }
+
+        // Targets with AT_HWCAP and AT_HWCAP2:
+        #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+        {
+            if let Some(hwcap2) = auxv_crate_getauxval(AT_HWCAP2) {
+                let rt_hwcap2 = v.expect("failed to find hwcap2 key").hwcap2;
+                assert_eq!(rt_hwcap2, hwcap2);
+            }
+        }
+    }
+
+    #[test]
+    fn auxv_dump() {
+        if let Ok(auxvec) = auxv() {
+            println!("{:?}", auxvec);
+        } else {
+            println!("both getauxval() and reading /proc/self/auxv failed!");
+        }
+    }
+
+    cfg_if! {
+        if #[cfg(target_arch = "arm")] {
+            #[test]
+            fn linux_rpi3() {
+                let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-rpi3.auxv");
+                println!("file: {}", file);
+                let v = auxv_from_file(file).unwrap();
+                assert_eq!(v.hwcap, 4174038);
+                assert_eq!(v.hwcap2, 16);
+            }
+
+            #[test]
+            #[should_panic]
+            fn linux_macos_vb() {
+                let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv");
+                println!("file: {}", file);
+                let v = auxv_from_file(file).unwrap();
+                // this file is incomplete (contains hwcap but not hwcap2), we
+                // want to fall back to /proc/cpuinfo in this case, so
+                // reading should fail. assert_eq!(v.hwcap, 126614527);
+                // assert_eq!(v.hwcap2, 0);
+            }
+        } else if #[cfg(target_arch = "aarch64")] {
+            #[test]
+            fn linux_x64() {
+                let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-x64-i7-6850k.auxv");
+                println!("file: {}", file);
+                let v = auxv_from_file(file).unwrap();
+                assert_eq!(v.hwcap, 3219913727);
+            }
+        }
+    }
+
+    #[test]
+    fn auxv_dump_procfs() {
+        if let Ok(auxvec) = auxv_from_file("/proc/self/auxv") {
+            println!("{:?}", auxvec);
+        } else {
+            println!("reading /proc/self/auxv failed!");
+        }
+    }
+
+    #[test]
+    fn auxv_crate_procfs() {
+        let v = auxv();
+        if let Some(hwcap) = auxv_crate_getprocfs(AT_HWCAP) {
+            assert_eq!(v.unwrap().hwcap, hwcap);
+        }
+
+        // Targets with AT_HWCAP and AT_HWCAP2:
+        #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+        {
+            if let Some(hwcap2) = auxv_crate_getprocfs(AT_HWCAP2) {
+                assert_eq!(v.unwrap().hwcap2, hwcap2);
+            }
+        }
+    }
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
new file mode 100644
index 00000000000..b3168578537
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
@@ -0,0 +1,301 @@
+//! Parses /proc/cpuinfo
+#![cfg_attr(not(target_arch = "arm"), allow(dead_code))]
+
+extern crate std;
+use self::std::{prelude::v1::*, fs::File, io, io::Read};
+
+/// cpuinfo
+pub(crate) struct CpuInfo {
+    raw: String,
+}
+
+impl CpuInfo {
+    /// Reads /proc/cpuinfo into CpuInfo.
+    pub(crate) fn new() -> Result<Self, io::Error> {
+        let mut file = File::open("/proc/cpuinfo")?;
+        let mut cpui = Self { raw: String::new() };
+        file.read_to_string(&mut cpui.raw)?;
+        Ok(cpui)
+    }
+    /// Returns the value of the cpuinfo `field`.
+    pub(crate) fn field(&self, field: &str) -> CpuInfoField {
+        for l in self.raw.lines() {
+            if l.trim().starts_with(field) {
+                return CpuInfoField::new(l.split(": ").nth(1));
+            }
+        }
+        CpuInfoField(None)
+    }
+
+    /// Returns the `raw` contents of `/proc/cpuinfo`
+    #[cfg(test)]
+    fn raw(&self) -> &String {
+        &self.raw
+    }
+
+    #[cfg(test)]
+    fn from_str(other: &str) -> Result<Self, ::std::io::Error> {
+        Ok(Self {
+            raw: String::from(other),
+        })
+    }
+}
+
+/// Field of cpuinfo
+#[derive(Debug)]
+pub(crate) struct CpuInfoField<'a>(Option<&'a str>);
+
+impl<'a> PartialEq<&'a str> for CpuInfoField<'a> {
+    fn eq(&self, other: &&'a str) -> bool {
+        match self.0 {
+            None => other.is_empty(),
+            Some(f) => f == other.trim(),
+        }
+    }
+}
+
+impl<'a> CpuInfoField<'a> {
+    pub(crate) fn new<'b>(v: Option<&'b str>) -> CpuInfoField<'b> {
+        match v {
+            None => CpuInfoField::<'b>(None),
+            Some(f) => CpuInfoField::<'b>(Some(f.trim())),
+        }
+    }
+    /// Does the field exist?
+    #[cfg(test)]
+    pub(crate) fn exists(&self) -> bool {
+        self.0.is_some()
+    }
+    /// Does the field contain `other`?
+    pub(crate) fn has(&self, other: &str) -> bool {
+        match self.0 {
+            None => other.is_empty(),
+            Some(f) => {
+                let other = other.trim();
+                for v in f.split(' ') {
+                    if v == other {
+                        return true;
+                    }
+                }
+                false
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn raw_dump() {
+        let cpuinfo = CpuInfo::new().unwrap();
+        if cpuinfo.field("vendor_id") == "GenuineIntel" {
+            assert!(cpuinfo.field("flags").exists());
+            assert!(!cpuinfo.field("vendor33_id").exists());
+            assert!(cpuinfo.field("flags").has("sse"));
+            assert!(!cpuinfo.field("flags").has("avx314"));
+        }
+        println!("{}", cpuinfo.raw());
+    }
+
+    const CORE_DUO_T6500: &str = r"processor       : 0
+vendor_id       : GenuineIntel
+cpu family      : 6
+model           : 23
+model name      : Intel(R) Core(TM)2 Duo CPU     T6500  @ 2.10GHz
+stepping        : 10
+microcode       : 0xa0b
+cpu MHz         : 1600.000
+cache size      : 2048 KB
+physical id     : 0
+siblings        : 2
+core id         : 0
+cpu cores       : 2
+apicid          : 0
+initial apicid  : 0
+fdiv_bug        : no
+hlt_bug         : no
+f00f_bug        : no
+coma_bug        : no
+fpu             : yes
+fpu_exception   : yes
+cpuid level     : 13
+wp              : yes
+flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts aperfmperf pni dtes64 monitor ds_cpl est tm2 ssse3 cx16 xtpr pdcm sse4_1 xsave lahf_lm dtherm
+bogomips        : 4190.43
+clflush size    : 64
+cache_alignment : 64
+address sizes   : 36 bits physical, 48 bits virtual
+power management:
+";
+
+    #[test]
+    fn core_duo_t6500() {
+        let cpuinfo = CpuInfo::from_str(CORE_DUO_T6500).unwrap();
+        assert_eq!(cpuinfo.field("vendor_id"), "GenuineIntel");
+        assert_eq!(cpuinfo.field("cpu family"), "6");
+        assert_eq!(cpuinfo.field("model"), "23");
+        assert_eq!(
+            cpuinfo.field("model name"),
+            "Intel(R) Core(TM)2 Duo CPU     T6500  @ 2.10GHz"
+        );
+        assert_eq!(
+            cpuinfo.field("flags"),
+            "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts aperfmperf pni dtes64 monitor ds_cpl est tm2 ssse3 cx16 xtpr pdcm sse4_1 xsave lahf_lm dtherm"
+        );
+        assert!(cpuinfo.field("flags").has("fpu"));
+        assert!(cpuinfo.field("flags").has("dtherm"));
+        assert!(cpuinfo.field("flags").has("sse2"));
+        assert!(!cpuinfo.field("flags").has("avx"));
+    }
+
+    const ARM_CORTEX_A53: &str =
+        r"Processor   : AArch64 Processor rev 3 (aarch64)
+        processor   : 0
+        processor   : 1
+        processor   : 2
+        processor   : 3
+        processor   : 4
+        processor   : 5
+        processor   : 6
+        processor   : 7
+        Features    : fp asimd evtstrm aes pmull sha1 sha2 crc32
+        CPU implementer : 0x41
+        CPU architecture: AArch64
+        CPU variant : 0x0
+        CPU part    : 0xd03
+        CPU revision    : 3
+
+        Hardware    : HiKey Development Board
+        ";
+
+    #[test]
+    fn arm_cortex_a53() {
+        let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A53).unwrap();
+        assert_eq!(
+            cpuinfo.field("Processor"),
+            "AArch64 Processor rev 3 (aarch64)"
+        );
+        assert_eq!(
+            cpuinfo.field("Features"),
+            "fp asimd evtstrm aes pmull sha1 sha2 crc32"
+        );
+        assert!(cpuinfo.field("Features").has("pmull"));
+        assert!(!cpuinfo.field("Features").has("neon"));
+        assert!(cpuinfo.field("Features").has("asimd"));
+    }
+
+    const ARM_CORTEX_A57: &str = r"Processor	: Cortex A57 Processor rev 1 (aarch64)
+processor	: 0
+processor	: 1
+processor	: 2
+processor	: 3
+Features	: fp asimd aes pmull sha1 sha2 crc32 wp half thumb fastmult vfp edsp neon vfpv3 tlsi vfpv4 idiva idivt
+CPU implementer	: 0x41
+CPU architecture: 8
+CPU variant	: 0x1
+CPU part	: 0xd07
+CPU revision	: 1";
+
+    #[test]
+    fn arm_cortex_a57() {
+        let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A57).unwrap();
+        assert_eq!(
+            cpuinfo.field("Processor"),
+            "Cortex A57 Processor rev 1 (aarch64)"
+        );
+        assert_eq!(
+            cpuinfo.field("Features"),
+            "fp asimd aes pmull sha1 sha2 crc32 wp half thumb fastmult vfp edsp neon vfpv3 tlsi vfpv4 idiva idivt"
+        );
+        assert!(cpuinfo.field("Features").has("pmull"));
+        assert!(cpuinfo.field("Features").has("neon"));
+        assert!(cpuinfo.field("Features").has("asimd"));
+    }
+
+    const POWER8E_POWERKVM: &str = r"processor       : 0
+cpu             : POWER8E (raw), altivec supported
+clock           : 3425.000000MHz
+revision        : 2.1 (pvr 004b 0201)
+
+processor       : 1
+cpu             : POWER8E (raw), altivec supported
+clock           : 3425.000000MHz
+revision        : 2.1 (pvr 004b 0201)
+
+processor       : 2
+cpu             : POWER8E (raw), altivec supported
+clock           : 3425.000000MHz
+revision        : 2.1 (pvr 004b 0201)
+
+processor       : 3
+cpu             : POWER8E (raw), altivec supported
+clock           : 3425.000000MHz
+revision        : 2.1 (pvr 004b 0201)
+
+timebase        : 512000000
+platform        : pSeries
+model           : IBM pSeries (emulated by qemu)
+machine         : CHRP IBM pSeries (emulated by qemu)";
+
+    #[test]
+    fn power8_powerkvm() {
+        let cpuinfo = CpuInfo::from_str(POWER8E_POWERKVM).unwrap();
+        assert_eq!(cpuinfo.field("cpu"), "POWER8E (raw), altivec supported");
+
+        assert!(cpuinfo.field("cpu").has("altivec"));
+    }
+
+    const POWER5P: &str = r"processor       : 0
+cpu             : POWER5+ (gs)
+clock           : 1900.098000MHz
+revision        : 2.1 (pvr 003b 0201)
+
+processor       : 1
+cpu             : POWER5+ (gs)
+clock           : 1900.098000MHz
+revision        : 2.1 (pvr 003b 0201)
+
+processor       : 2
+cpu             : POWER5+ (gs)
+clock           : 1900.098000MHz
+revision        : 2.1 (pvr 003b 0201)
+
+processor       : 3
+cpu             : POWER5+ (gs)
+clock           : 1900.098000MHz
+revision        : 2.1 (pvr 003b 0201)
+
+processor       : 4
+cpu             : POWER5+ (gs)
+clock           : 1900.098000MHz
+revision        : 2.1 (pvr 003b 0201)
+
+processor       : 5
+cpu             : POWER5+ (gs)
+clock           : 1900.098000MHz
+revision        : 2.1 (pvr 003b 0201)
+
+processor       : 6
+cpu             : POWER5+ (gs)
+clock           : 1900.098000MHz
+revision        : 2.1 (pvr 003b 0201)
+
+processor       : 7
+cpu             : POWER5+ (gs)
+clock           : 1900.098000MHz
+revision        : 2.1 (pvr 003b 0201)
+
+timebase        : 237331000
+platform        : pSeries
+machine         : CHRP IBM,9133-55A";
+
+    #[test]
+    fn power5p() {
+        let cpuinfo = CpuInfo::from_str(POWER5P).unwrap();
+        assert_eq!(cpuinfo.field("cpu"), "POWER5+ (gs)");
+
+        assert!(!cpuinfo.field("cpu").has("altivec"));
+    }
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
new file mode 100644
index 00000000000..7c180326feb
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
@@ -0,0 +1,31 @@
+//! Run-time feature detection for MIPS on Linux.
+
+use crate::detect::{Feature, cache, bit};
+use super::auxvec;
+
+/// Performs run-time feature detection.
+#[inline]
+pub fn check_for(x: Feature) -> bool {
+    cache::test(x as u32, detect_features)
+}
+
+/// Try to read the features from the auxiliary vector, and if that fails, try
+/// to read them from /proc/cpuinfo.
+fn detect_features() -> cache::Initializer {
+    let mut value = cache::Initializer::default();
+    let enable_feature = |value: &mut cache::Initializer, f, enable| {
+        if enable {
+            value.set(f as u32);
+        }
+    };
+
+    // The values are part of the platform-specific [asm/hwcap.h][hwcap]
+    //
+    // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
+    if let Ok(auxv) = auxvec::auxv() {
+        enable_feature(&mut value, Feature::msa, bit::test(auxv.hwcap, 1));
+        return value;
+    }
+    // TODO: fall back via cpuinfo
+    value
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
new file mode 100644
index 00000000000..642dfb46571
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
@@ -0,0 +1,26 @@
+//! Run-time feature detection on Linux
+
+mod auxvec;
+mod cpuinfo;
+
+cfg_if! {
+    if #[cfg(target_arch = "aarch64")] {
+        mod aarch64;
+        pub use self::aarch64::check_for;
+    } else if #[cfg(target_arch = "arm")] {
+        mod arm;
+        pub use self::arm::check_for;
+    } else  if #[cfg(any(target_arch = "mips", target_arch = "mips64"))] {
+        mod mips;
+        pub use self::mips::check_for;
+    } else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] {
+        mod powerpc;
+        pub use self::powerpc::check_for;
+    } else {
+        use crate::detect::Feature;
+        /// Performs run-time feature detection.
+        pub fn check_for(_x: Feature) -> bool {
+            false
+        }
+    }
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
new file mode 100644
index 00000000000..0022a7db983
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
@@ -0,0 +1,41 @@
+//! Run-time feature detection for PowerPC on Linux.
+
+use crate::detect::{Feature, cache};
+use super::{auxvec, cpuinfo};
+
+/// Performs run-time feature detection.
+#[inline]
+pub fn check_for(x: Feature) -> bool {
+    cache::test(x as u32, detect_features)
+}
+
+/// Try to read the features from the auxiliary vector, and if that fails, try
+/// to read them from /proc/cpuinfo.
+fn detect_features() -> cache::Initializer {
+    let mut value = cache::Initializer::default();
+    let enable_feature = |value: &mut cache::Initializer, f, enable| {
+        if enable {
+            value.set(f as u32);
+        }
+    };
+
+    // The values are part of the platform-specific [asm/cputable.h][cputable]
+    //
+    // [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h
+    if let Ok(auxv) = auxvec::auxv() {
+        // note: the PowerPC values are the mask to do the test (instead of the
+        // index of the bit to test like in ARM and Aarch64)
+        enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0);
+        enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0);
+        enable_feature(&mut value, Feature::power8, auxv.hwcap & 0x80000000 != 0);
+        return value;
+    }
+
+    // PowerPC's /proc/cpuinfo lacks a proper Feature field,
+    // but `altivec` support is indicated in the `cpu` field.
+    if let Ok(c) = cpuinfo::CpuInfo::new() {
+        enable_feature(&mut value, Feature::altivec, c.field("cpu").has("altivec"));
+        return value;
+    }
+    value
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/other.rs b/library/stdarch/crates/std_detect/src/detect/os/other.rs
new file mode 100644
index 00000000000..23e399ea790
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/other.rs
@@ -0,0 +1,9 @@
+//! Other operating systems
+
+use crate::detect::Feature;
+
+/// Performs run-time feature detection.
+#[inline]
+pub fn check_for(_x: Feature) -> bool {
+    false
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
new file mode 100644
index 00000000000..9237d5dc0a5
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -0,0 +1,357 @@
+//! x86 run-time feature detection is OS independent.
+
+use core::{prelude::v1::*, mem};
+#[cfg(target_arch = "x86")]
+use core::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64::*;
+
+use crate::detect::{Feature, cache, bit};
+
+/// Performs run-time feature detection.
+#[inline]
+pub fn check_for(x: Feature) -> bool {
+    cache::test(x as u32, detect_features)
+}
+
+/// Run-time feature detection on x86 works by using the CPUID instruction.
+///
+/// The [CPUID Wikipedia page][wiki_cpuid] contains
+/// all the information about which flags to set to query which values, and in
+/// which registers these are reported.
+///
+/// The definitive references are:
+/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
+///   Instruction Set Reference, A-Z][intel64_ref].
+/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
+///   System Instructions][amd64_ref].
+///
+/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
+/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+#[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))]
+fn detect_features() -> cache::Initializer {
+    let mut value = cache::Initializer::default();
+
+    // If the x86 CPU does not support the CPUID instruction then it is too
+    // old to support any of the currently-detectable features.
+    if !has_cpuid() {
+        return value;
+    }
+
+    // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU
+    // has `cpuid` support.
+
+    // 0. EAX = 0: Basic Information:
+    // - EAX returns the "Highest Function Parameter", that is, the maximum
+    // leaf value for subsequent calls of `cpuinfo` in range [0,
+    // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars,
+    // returned in EBX, EDX, and   ECX (in that order):
+    let (max_basic_leaf, vendor_id) = unsafe {
+        let CpuidResult {
+            eax: max_basic_leaf,
+            ebx,
+            ecx,
+            edx,
+        } = __cpuid(0);
+        let vendor_id: [[u8; 4]; 3] = [
+            mem::transmute(ebx),
+            mem::transmute(edx),
+            mem::transmute(ecx),
+        ];
+        let vendor_id: [u8; 12] = mem::transmute(vendor_id);
+        (max_basic_leaf, vendor_id)
+    };
+
+    if max_basic_leaf < 1 {
+        // Earlier Intel 486, CPUID not implemented
+        return value;
+    }
+
+    // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits";
+    // Contains information about most x86 features.
+    let CpuidResult {
+        ecx: proc_info_ecx,
+        edx: proc_info_edx,
+        ..
+    } = unsafe { __cpuid(0x0000_0001_u32) };
+
+    // EAX = 7, ECX = 0: Queries "Extended Features";
+    // Contains information about bmi,bmi2, and avx2 support.
+    let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7
+    {
+        let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
+        (ebx, ecx)
+    } else {
+        (0, 0) // CPUID does not support "Extended Features"
+    };
+
+    // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
+    // - EAX returns the max leaf value for extended information, that is,
+    // `cpuid` calls in range [0x8000_0000; u32::MAX]:
+    let CpuidResult {
+        eax: extended_max_basic_leaf,
+        ..
+    } = unsafe { __cpuid(0x8000_0000_u32) };
+
+    // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature
+    // Bits"
+    let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 {
+        let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) };
+        ecx
+    } else {
+        0
+    };
+
+    {
+        // borrows value till the end of this scope:
+        let mut enable = |r, rb, f| {
+            if bit::test(r as usize, rb) {
+                value.set(f as u32);
+            }
+        };
+
+        enable(proc_info_ecx, 0, Feature::sse3);
+        enable(proc_info_ecx, 9, Feature::ssse3);
+        enable(proc_info_ecx, 13, Feature::cmpxchg16b);
+        enable(proc_info_ecx, 19, Feature::sse4_1);
+        enable(proc_info_ecx, 20, Feature::sse4_2);
+        enable(proc_info_ecx, 23, Feature::popcnt);
+        enable(proc_info_ecx, 25, Feature::aes);
+        enable(proc_info_ecx, 1, Feature::pclmulqdq);
+        enable(proc_info_ecx, 30, Feature::rdrand);
+        enable(extended_features_ebx, 18, Feature::rdseed);
+        enable(extended_features_ebx, 19, Feature::adx);
+        enable(proc_info_edx, 4, Feature::tsc);
+        enable(proc_info_edx, 23, Feature::mmx);
+        enable(proc_info_edx, 24, Feature::fxsr);
+        enable(proc_info_edx, 25, Feature::sse);
+        enable(proc_info_edx, 26, Feature::sse2);
+        enable(extended_features_ebx, 29, Feature::sha);
+
+        enable(extended_features_ebx, 3, Feature::bmi);
+        enable(extended_features_ebx, 8, Feature::bmi2);
+
+        // `XSAVE` and `AVX` support:
+        let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
+        if cpu_xsave {
+            // 0. Here the CPU supports `XSAVE`.
+
+            // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and
+            // supports saving the state of the AVX/AVX2 vector registers on
+            // context-switches, see:
+            //
+            // - [intel: is avx enabled?][is_avx_enabled],
+            // - [mozilla: sse.cpp][mozilla_sse_cpp].
+            //
+            // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
+            // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
+            let cpu_osxsave = bit::test(proc_info_ecx as usize, 27);
+
+            if cpu_osxsave {
+                // 2. The OS must have signaled the CPU that it supports saving and
+                // restoring the:
+                //
+                // * SSE -> `XCR0.SSE[1]`
+                // * AVX -> `XCR0.AVX[2]`
+                // * AVX-512 -> `XCR0.AVX-512[7:5]`.
+                //
+                // by setting the corresponding bits of `XCR0` to `1`.
+                //
+                // This is safe because the CPU supports `xsave`
+                // and the OS has set `osxsave`.
+                let xcr0 = unsafe { _xgetbv(0) };
+                // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`:
+                let os_avx_support = xcr0 & 6 == 6;
+                // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 224`:
+                let os_avx512_support = xcr0 & 224 == 224;
+
+                // Only if the OS and the CPU support saving/restoring the AVX
+                // registers we enable `xsave` support:
+                if os_avx_support {
+                    // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
+                    // FEATURES" in the "Intel® 64 and IA-32 Architectures Software
+                    // Developer’s Manual, Volume 1: Basic Architecture":
+                    //
+                    // "Software enables the XSAVE feature set by setting
+                    // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
+                    // instruction). If this bit is 0, execution of any of XGETBV,
+                    // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
+                    // causes an invalid-opcode exception (#UD)"
+                    //
+                    enable(proc_info_ecx, 26, Feature::xsave);
+
+                    // For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
+                    // Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
+                    // ECX = 1):
+                    if max_basic_leaf >= 0xd {
+                        let CpuidResult {
+                            eax: proc_extended_state1_eax,
+                            ..
+                        } = unsafe { __cpuid_count(0xd_u32, 1) };
+                        enable(proc_extended_state1_eax, 0, Feature::xsaveopt);
+                        enable(proc_extended_state1_eax, 1, Feature::xsavec);
+                        enable(proc_extended_state1_eax, 3, Feature::xsaves);
+                    }
+
+                    // FMA (uses 256-bit wide registers):
+                    enable(proc_info_ecx, 12, Feature::fma);
+
+                    // And AVX/AVX2:
+                    enable(proc_info_ecx, 28, Feature::avx);
+                    enable(extended_features_ebx, 5, Feature::avx2);
+
+                    // For AVX-512 the OS also needs to support saving/restoring
+                    // the extended state, only then we enable AVX-512 support:
+                    if os_avx512_support {
+                        enable(extended_features_ebx, 16, Feature::avx512f);
+                        enable(extended_features_ebx, 17, Feature::avx512dq);
+                        enable(extended_features_ebx, 21, Feature::avx512_ifma);
+                        enable(extended_features_ebx, 26, Feature::avx512pf);
+                        enable(extended_features_ebx, 27, Feature::avx512er);
+                        enable(extended_features_ebx, 28, Feature::avx512cd);
+                        enable(extended_features_ebx, 30, Feature::avx512bw);
+                        enable(extended_features_ebx, 31, Feature::avx512vl);
+                        enable(extended_features_ecx, 1, Feature::avx512_vbmi);
+                        enable(
+                            extended_features_ecx,
+                            14,
+                            Feature::avx512_vpopcntdq,
+                        );
+                    }
+                }
+            }
+        }
+
+        // This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
+        // On intel CPUs with popcnt, lzcnt implements the
+        // "missing part" of ABM, so we map both to the same
+        // internal feature.
+        //
+        // The `is_x86_feature_detected!("lzcnt")` macro then
+        // internally maps to Feature::abm.
+        enable(extended_proc_info_ecx, 5, Feature::abm);
+        if vendor_id == *b"AuthenticAMD" {
+            // These features are only available on AMD CPUs:
+            enable(extended_proc_info_ecx, 6, Feature::sse4a);
+            enable(extended_proc_info_ecx, 21, Feature::tbm);
+        }
+    }
+
+    value
+}
+
+#[cfg(test)]
+mod tests {
+    extern crate cupid;
+
+    #[test]
+    fn dump() {
+        println!("aes: {:?}", is_x86_feature_detected!("aes"));
+        println!("pclmulqdq: {:?}", is_x86_feature_detected!("pclmulqdq"));
+        println!("rdrand: {:?}", is_x86_feature_detected!("rdrand"));
+        println!("rdseed: {:?}", is_x86_feature_detected!("rdseed"));
+        println!("tsc: {:?}", is_x86_feature_detected!("tsc"));
+        println!("sse: {:?}", is_x86_feature_detected!("sse"));
+        println!("sse2: {:?}", is_x86_feature_detected!("sse2"));
+        println!("sse3: {:?}", is_x86_feature_detected!("sse3"));
+        println!("ssse3: {:?}", is_x86_feature_detected!("ssse3"));
+        println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1"));
+        println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2"));
+        println!("sse4a: {:?}", is_x86_feature_detected!("sse4a"));
+        println!("sha: {:?}", is_x86_feature_detected!("sha"));
+        println!("avx: {:?}", is_x86_feature_detected!("avx"));
+        println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
+        println!("avx512f {:?}", is_x86_feature_detected!("avx512f"));
+        println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd"));
+        println!("avx512er {:?}", is_x86_feature_detected!("avx512er"));
+        println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf"));
+        println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw"));
+        println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq"));
+        println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl"));
+        println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma"));
+        println!("avx512_vbmi {:?}", is_x86_feature_detected!("avx512vbmi"));
+        println!(
+            "avx512_vpopcntdq {:?}",
+            is_x86_feature_detected!("avx512vpopcntdq")
+        );
+        println!("fma: {:?}", is_x86_feature_detected!("fma"));
+        println!("abm: {:?}", is_x86_feature_detected!("abm"));
+        println!("bmi: {:?}", is_x86_feature_detected!("bmi1"));
+        println!("bmi2: {:?}", is_x86_feature_detected!("bmi2"));
+        println!("tbm: {:?}", is_x86_feature_detected!("tbm"));
+        println!("popcnt: {:?}", is_x86_feature_detected!("popcnt"));
+        println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt"));
+        println!("fxsr: {:?}", is_x86_feature_detected!("fxsr"));
+        println!("xsave: {:?}", is_x86_feature_detected!("xsave"));
+        println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt"));
+        println!("xsaves: {:?}", is_x86_feature_detected!("xsaves"));
+        println!("xsavec: {:?}", is_x86_feature_detected!("xsavec"));
+        println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b"));
+        println!("adx: {:?}", is_x86_feature_detected!("adx"));
+    }
+
+    #[test]
+    fn compare_with_cupid() {
+        let information = cupid::master().unwrap();
+        assert_eq!(is_x86_feature_detected!("aes"), information.aesni());
+        assert_eq!(is_x86_feature_detected!("pclmulqdq"), information.pclmulqdq());
+        assert_eq!(is_x86_feature_detected!("rdrand"), information.rdrand());
+        assert_eq!(is_x86_feature_detected!("rdseed"), information.rdseed());
+        assert_eq!(is_x86_feature_detected!("tsc"), information.tsc());
+        assert_eq!(is_x86_feature_detected!("sse"), information.sse());
+        assert_eq!(is_x86_feature_detected!("sse2"), information.sse2());
+        assert_eq!(is_x86_feature_detected!("sse3"), information.sse3());
+        assert_eq!(is_x86_feature_detected!("ssse3"), information.ssse3());
+        assert_eq!(is_x86_feature_detected!("sse4.1"), information.sse4_1());
+        assert_eq!(is_x86_feature_detected!("sse4.2"), information.sse4_2());
+        assert_eq!(is_x86_feature_detected!("sse4a"), information.sse4a());
+        assert_eq!(is_x86_feature_detected!("sha"), information.sha());
+        assert_eq!(is_x86_feature_detected!("avx"), information.avx());
+        assert_eq!(is_x86_feature_detected!("avx2"), information.avx2());
+        assert_eq!(is_x86_feature_detected!("avx512f"), information.avx512f());
+        assert_eq!(is_x86_feature_detected!("avx512cd"), information.avx512cd());
+        assert_eq!(is_x86_feature_detected!("avx512er"), information.avx512er());
+        assert_eq!(is_x86_feature_detected!("avx512pf"), information.avx512pf());
+        assert_eq!(is_x86_feature_detected!("avx512bw"), information.avx512bw());
+        assert_eq!(is_x86_feature_detected!("avx512dq"), information.avx512dq());
+        assert_eq!(is_x86_feature_detected!("avx512vl"), information.avx512vl());
+        assert_eq!(
+            is_x86_feature_detected!("avx512ifma"),
+            information.avx512_ifma()
+        );
+        assert_eq!(
+            is_x86_feature_detected!("avx512vbmi"),
+            information.avx512_vbmi()
+        );
+        assert_eq!(
+            is_x86_feature_detected!("avx512vpopcntdq"),
+            information.avx512_vpopcntdq()
+        );
+        assert_eq!(is_x86_feature_detected!("fma"), information.fma());
+        assert_eq!(is_x86_feature_detected!("bmi1"), information.bmi1());
+        assert_eq!(is_x86_feature_detected!("bmi2"), information.bmi2());
+        assert_eq!(is_x86_feature_detected!("popcnt"), information.popcnt());
+        assert_eq!(is_x86_feature_detected!("abm"), information.lzcnt());
+        assert_eq!(is_x86_feature_detected!("tbm"), information.tbm());
+        assert_eq!(is_x86_feature_detected!("lzcnt"), information.lzcnt());
+        assert_eq!(is_x86_feature_detected!("xsave"), information.xsave());
+        assert_eq!(is_x86_feature_detected!("xsaveopt"), information.xsaveopt());
+        assert_eq!(
+            is_x86_feature_detected!("xsavec"),
+            information.xsavec_and_xrstor()
+        );
+        assert_eq!(
+            is_x86_feature_detected!("xsaves"),
+            information.xsaves_xrstors_and_ia32_xss()
+        );
+        assert_eq!(
+            is_x86_feature_detected!("cmpxchg16b"),
+            information.cmpxchg16b(),
+        );
+        assert_eq!(
+            is_x86_feature_detected!("adx"),
+            information.adx(),
+        );
+    }
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv
new file mode 100644
index 00000000000..0538e661f63
Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv differ
diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv
new file mode 100644
index 00000000000..6afe1b3b46a
Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv differ
diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv
new file mode 100644
index 00000000000..75abc02d178
Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv differ
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
new file mode 100644
index 00000000000..af7fc3bdc42
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -0,0 +1,37 @@
+//! Run-time feature detection for the Rust standard library.
+//!
+//! To detect whether a feature is enabled in the system running the binary
+//! use one of the appropriate macro for the target:
+//!
+//! * `x86` and `x86_64`: [`is_x86_feature_detected`]
+//! * `arm`: [`is_arm_feature_detected`]
+//! * `aarch64`: [`is_aarch64_feature_detected`]
+//! * `mips`: [`is_mips_feature_detected`]
+//! * `mips64`: [`is_mips64_feature_detected`]
+//! * `powerpc`: [`is_powerpc_feature_detected`]
+//! * `powerpc64`: [`is_powerpc64_feature_detected`]
+
+#![unstable(feature = "stdsimd", issue = "27731")]
+#![feature(const_fn, integer_atomics, staged_api, stdsimd)]
+#![feature(doc_cfg, allow_internal_unstable)]
+#![cfg_attr(feature = "cargo-clippy", allow(clippy::shadow_reuse))]
+#![cfg_attr(
+    feature = "cargo-clippy",
+    deny(clippy::missing_inline_in_public_items,)
+)]
+#![cfg_attr(target_os = "linux", feature(linkage))]
+#![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))]
+#![no_std]
+
+#[cfg(test)]
+#[macro_use(println)]
+extern crate std;
+
+extern crate libc;
+
+#[macro_use]
+extern crate cfg_if;
+
+#[doc(hidden)]
+#[unstable(feature = "stdsimd", issue = "27731")]
+pub mod detect;
diff --git a/library/stdarch/crates/std_detect/src/mod.rs b/library/stdarch/crates/std_detect/src/mod.rs
new file mode 100644
index 00000000000..b630e7ff383
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/mod.rs
@@ -0,0 +1,5 @@
+//! `std_detect`
+
+#[doc(hidden)] // unstable implementation detail
+#[unstable(feature = "stdsimd", issue = "27731")]
+pub mod detect;
-- 
cgit 1.4.1-3-g733a5


From 5f7006df5a945332e4f2a20a88b3ded8ce5c44fa Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 22 Jan 2019 18:48:36 +0100
Subject: Fix clippy issues

---
 library/stdarch/.travis.yml                               | 6 ++----
 library/stdarch/crates/assert-instr-macro/src/lib.rs      | 2 +-
 library/stdarch/crates/core_arch/src/simd.rs              | 8 ++++----
 library/stdarch/crates/core_arch/src/x86/adx.rs           | 6 +++---
 library/stdarch/crates/core_arch/src/x86/cpuid.rs         | 8 ++++++--
 library/stdarch/crates/core_arch/src/x86/rdrand.rs        | 4 ++--
 library/stdarch/crates/core_arch/src/x86/xsave.rs         | 2 +-
 library/stdarch/crates/core_arch/src/x86_64/adx.rs        | 6 +++---
 library/stdarch/crates/core_arch/src/x86_64/bswap.rs      | 2 +-
 library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs | 2 +-
 library/stdarch/crates/core_arch/src/x86_64/rdrand.rs     | 3 ++-
 library/stdarch/crates/core_arch/src/x86_64/xsave.rs      | 2 +-
 library/stdarch/crates/std_detect/src/detect/cache.rs     | 6 +++++-
 library/stdarch/examples/hex.rs                           | 1 +
 14 files changed, 33 insertions(+), 25 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/.travis.yml b/library/stdarch/.travis.yml
index df49db3791f..dfb4d37db00 100644
--- a/library/stdarch/.travis.yml
+++ b/library/stdarch/.travis.yml
@@ -89,7 +89,7 @@ matrix:
     - name: "clippy"
       install: true
       script: |
-        if rustup component add clippy-preview; then
+        if rustup component add clippy-preview ; then
             cargo clippy --all -- -D clippy::pedantic
         fi
     - name: "Shellcheck"
@@ -97,9 +97,7 @@ matrix:
       script:
         - shellcheck --version
         - shellcheck ci/*.sh
-  allow_failures:
-   - name: "clippy"
- 
+
 install: travis_retry rustup target add $TARGET
 script:
   - cargo generate-lockfile
diff --git a/library/stdarch/crates/assert-instr-macro/src/lib.rs b/library/stdarch/crates/assert-instr-macro/src/lib.rs
index eba00e6b2b4..3a6e0dfb9ce 100644
--- a/library/stdarch/crates/assert-instr-macro/src/lib.rs
+++ b/library/stdarch/crates/assert-instr-macro/src/lib.rs
@@ -173,7 +173,7 @@ impl syn::parse::Parse for Invoc {
             println!("{:?}", input.cursor().token_stream());
             return Err(input.error("expected an instruction"));
         }
-        if instr.len() == 0 {
+        if instr.is_empty() {
             return Err(input.error("expected an instruction before comma"));
         }
         let mut args = Vec::new();
diff --git a/library/stdarch/crates/core_arch/src/simd.rs b/library/stdarch/crates/core_arch/src/simd.rs
index 468b1e380bd..237a5735739 100644
--- a/library/stdarch/crates/core_arch/src/simd.rs
+++ b/library/stdarch/crates/core_arch/src/simd.rs
@@ -12,12 +12,12 @@ macro_rules! simd_ty {
         impl $id {
             #[inline]
             pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self {
-                $id($($elem_name),*)
+                Self($($elem_name),*)
             }
 
             #[inline]
             pub(crate) const fn splat(value: $ety) -> Self {
-                $id($({
+                Self($({
                     #[allow(non_camel_case_types, dead_code)]
                     struct $elem_name;
                     value
@@ -48,12 +48,12 @@ macro_rules! simd_m_ty {
 
             #[inline]
             pub(crate) const fn new($($elem_name: bool),*) -> Self {
-                $id($(Self::bool_to_internal($elem_name)),*)
+                Self($(Self::bool_to_internal($elem_name)),*)
             }
 
             #[inline]
             pub(crate) const fn splat(value: bool) -> Self {
-                $id($({
+                Self($({
                     #[allow(non_camel_case_types, dead_code)]
                     struct $elem_name;
                     Self::bool_to_internal(value)
diff --git a/library/stdarch/crates/core_arch/src/x86/adx.rs b/library/stdarch/crates/core_arch/src/x86/adx.rs
index c59743980f2..f130e7a41da 100644
--- a/library/stdarch/crates/core_arch/src/x86/adx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/adx.rs
@@ -9,7 +9,7 @@ extern "unadjusted" {
     fn llvm_subborrow_u32(a: u8, b: u32, c: u32) -> (u8, u32);
 }
 
-/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in c_in
+/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in `c_in`
 /// (carry flag), and store the unsigned 32-bit result in out, and the carry-out
 /// is returned (carry or overflow flag).
 #[inline]
@@ -21,7 +21,7 @@ pub unsafe fn _addcarry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 {
     a
 }
 
-/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in c_in
+/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in `c_in`
 /// (carry or overflow flag), and store the unsigned 32-bit result in out, and
 /// the carry-out is returned (carry or overflow flag).
 #[inline]
@@ -33,7 +33,7 @@ pub unsafe fn _addcarryx_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 {
     _addcarry_u32(c_in, a, b, out)
 }
 
-/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in c_in
+/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in `c_in`
 /// (carry or overflow flag), and store the unsigned 32-bit result in out, and
 /// the carry-out is returned (carry or overflow flag).
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/cpuid.rs b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
index adf3e127d64..d796995ad38 100644
--- a/library/stdarch/crates/core_arch/src/x86/cpuid.rs
+++ b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
@@ -1,13 +1,17 @@
 //! `cpuid` intrinsics
 
-#![cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))]
+#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))]
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
 /// Result of the `cpuid` instruction.
+#[cfg_attr(
+    feature = "cargo-clippy",
+    // the derived impl of Debug for CpuidResult is not #[inline] and that's OK.
+    allow(clippy::missing_inline_in_public_items)
+)]
 #[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub struct CpuidResult {
     /// EAX register.
diff --git a/library/stdarch/crates/core_arch/src/x86/rdrand.rs b/library/stdarch/crates/core_arch/src/x86/rdrand.rs
index 63573f689d6..90bb9454b0a 100644
--- a/library/stdarch/crates/core_arch/src/x86/rdrand.rs
+++ b/library/stdarch/crates/core_arch/src/x86/rdrand.rs
@@ -2,6 +2,8 @@
 //! on-chip hardware random number generator which has been seeded by an
 //! on-chip entropy source.
 
+#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))]
+
 #[allow(improper_ctypes)]
 extern "unadjusted" {
     #[link_name = "llvm.x86.rdrand.16"]
@@ -24,7 +26,6 @@ use stdsimd_test::assert_instr;
 #[inline]
 #[target_feature(enable = "rdrand")]
 #[cfg_attr(test, assert_instr(rdrand))]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 {
     let (v, flag) = x86_rdrand16_step();
@@ -39,7 +40,6 @@ pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 {
 #[inline]
 #[target_feature(enable = "rdrand")]
 #[cfg_attr(test, assert_instr(rdrand))]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _rdrand32_step(val: &mut u32) -> i32 {
     let (v, flag) = x86_rdrand32_step();
diff --git a/library/stdarch/crates/core_arch/src/x86/xsave.rs b/library/stdarch/crates/core_arch/src/x86/xsave.rs
index c52dcd8c2a0..4c7f5338a96 100644
--- a/library/stdarch/crates/core_arch/src/x86/xsave.rs
+++ b/library/stdarch/crates/core_arch/src/x86/xsave.rs
@@ -1,6 +1,6 @@
 //! `i586`'s `xsave` and `xsaveopt` target feature intrinsics
 
-#![cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))]
+#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))]
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
diff --git a/library/stdarch/crates/core_arch/src/x86_64/adx.rs b/library/stdarch/crates/core_arch/src/x86_64/adx.rs
index 0343351b916..38a90047e40 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/adx.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/adx.rs
@@ -9,7 +9,7 @@ extern "unadjusted" {
     fn llvm_subborrow_u64(a: u8, b: u64, c: u64) -> (u8, u64);
 }
 
-/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in c_in
+/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in `c_in`
 /// (carry flag), and store the unsigned 64-bit result in out, and the carry-out
 /// is returned (carry or overflow flag).
 #[inline]
@@ -21,7 +21,7 @@ pub unsafe fn _addcarry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 {
     a
 }
 
-/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in c_in
+/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in `c_in`
 /// (carry or overflow flag), and store the unsigned 64-bit result in out, and
 /// the carry-out is returned (carry or overflow flag).
 #[inline]
@@ -33,7 +33,7 @@ pub unsafe fn _addcarryx_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 {
     _addcarry_u64(c_in, a, b, out)
 }
 
-/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in c_in
+/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in `c_in`
 /// (carry or overflow flag), and store the unsigned 64-bit result in out, and
 /// the carry-out is returned (carry or overflow flag).
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
index 75bb33c956c..ba121d67006 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
@@ -1,6 +1,6 @@
 //! Byte swap intrinsics.
 
-#![cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))]
+#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))]
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
diff --git a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
index c7b43a4469b..822bfc2fb3d 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
@@ -5,7 +5,7 @@ use stdsimd_test::assert_instr;
 
 /// Compare and exchange 16 bytes (128 bits) of data atomically.
 ///
-/// This intrinsic corresponds to the `cmpxchg16b` instruction on x86_64
+/// This intrinsic corresponds to the `cmpxchg16b` instruction on `x86_64`
 /// processors. It performs an atomic compare-and-swap, updating the `ptr`
 /// memory location to `val` if the current value in memory equals `old`.
 ///
diff --git a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
index 7cc0d710c81..aef4f638ebe 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
@@ -2,6 +2,8 @@
 //! on-chip hardware random number generator which has been seeded by an
 //! on-chip entropy source.
 
+#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))]
+
 #[allow(improper_ctypes)]
 extern "unadjusted" {
     #[link_name = "llvm.x86.rdrand.64"]
@@ -20,7 +22,6 @@ use stdsimd_test::assert_instr;
 #[inline]
 #[target_feature(enable = "rdrand")]
 #[cfg_attr(test, assert_instr(rdrand))]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 {
     let (v, flag) = x86_rdrand64_step();
diff --git a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
index 875b677dbd2..7531ac5832e 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
@@ -1,6 +1,6 @@
 //! `x86_64`'s `xsave` and `xsaveopt` target feature intrinsics
 
-#![cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))]
+#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))]
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index c2de4da7349..a0d009428c8 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -32,7 +32,7 @@ pub(crate) struct Initializer(u64);
 
 impl Default for Initializer {
     fn default() -> Self {
-        Initializer(0)
+        Self(0)
     }
 }
 
@@ -77,6 +77,10 @@ struct Cache(AtomicU64);
 #[cfg(target_pointer_width = "64")]
 impl Cache {
     /// Creates an uninitialized cache.
+    #[cfg_attr(
+        feature = "cargo-clippy",
+        allow(clippy::declare_interior_mutable_const)
+    )]
     const fn uninitialized() -> Self {
         const X: AtomicU64 = AtomicU64::new(u64::max_value());
         Self(X)
diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs
index 37f2ce70160..9e6c84c11d2 100644
--- a/library/stdarch/examples/hex.rs
+++ b/library/stdarch/examples/hex.rs
@@ -22,6 +22,7 @@
         clippy::option_unwrap_used,
         clippy::shadow_reuse,
         clippy::cast_possible_wrap,
+        clippy::cast_ptr_alignment,
         clippy::cast_sign_loss,
         clippy::missing_docs_in_private_items
     )
-- 
cgit 1.4.1-3-g733a5


From af7134fcf1e9eb4dbc380d8e1425de0af0ee12c3 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 22 Jan 2019 19:50:20 +0100
Subject: Do not use Self constructors

---
 library/stdarch/crates/core_arch/src/simd.rs          | 10 ++++++----
 library/stdarch/crates/std_detect/src/detect/cache.rs |  6 ++++--
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/simd.rs b/library/stdarch/crates/core_arch/src/simd.rs
index 237a5735739..5c8425623d6 100644
--- a/library/stdarch/crates/core_arch/src/simd.rs
+++ b/library/stdarch/crates/core_arch/src/simd.rs
@@ -9,15 +9,16 @@ macro_rules! simd_ty {
         #[derive(Copy, Clone, Debug, PartialEq)]
         pub(crate) struct $id($(pub $elem_ty),*);
 
+        #[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))]
         impl $id {
             #[inline]
             pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self {
-                Self($($elem_name),*)
+                $id($($elem_name),*)
             }
 
             #[inline]
             pub(crate) const fn splat(value: $ety) -> Self {
-                Self($({
+                $id($({
                     #[allow(non_camel_case_types, dead_code)]
                     struct $elem_name;
                     value
@@ -40,6 +41,7 @@ macro_rules! simd_m_ty {
         #[derive(Copy, Clone, Debug, PartialEq)]
         pub(crate) struct $id($(pub $elem_ty),*);
 
+        #[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))]
         impl $id {
             #[inline]
             const fn bool_to_internal(x: bool) -> $ety {
@@ -48,12 +50,12 @@ macro_rules! simd_m_ty {
 
             #[inline]
             pub(crate) const fn new($($elem_name: bool),*) -> Self {
-                Self($(Self::bool_to_internal($elem_name)),*)
+                $id($(Self::bool_to_internal($elem_name)),*)
             }
 
             #[inline]
             pub(crate) const fn splat(value: bool) -> Self {
-                Self($({
+                $id($({
                     #[allow(non_camel_case_types, dead_code)]
                     struct $elem_name;
                     Self::bool_to_internal(value)
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index a0d009428c8..944dccbe6df 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -30,9 +30,10 @@ const CACHE_CAPACITY: u32 = 63;
 #[derive(Copy, Clone)]
 pub(crate) struct Initializer(u64);
 
+#[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))]
 impl Default for Initializer {
     fn default() -> Self {
-        Self(0)
+        Initializer(0)
     }
 }
 
@@ -75,6 +76,7 @@ static CACHE: Cache = Cache::uninitialized();
 struct Cache(AtomicU64);
 
 #[cfg(target_pointer_width = "64")]
+#[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))]
 impl Cache {
     /// Creates an uninitialized cache.
     #[cfg_attr(
@@ -83,7 +85,7 @@ impl Cache {
     )]
     const fn uninitialized() -> Self {
         const X: AtomicU64 = AtomicU64::new(u64::max_value());
-        Self(X)
+        Cache(X)
     }
     /// Is the cache uninitialized?
     #[inline]
-- 
cgit 1.4.1-3-g733a5


From e51ee17aa79ec4ccb2901ba128819bfccd5af651 Mon Sep 17 00:00:00 2001
From: Juan Aguilar Santillana <mhpoin@gmail.com>
Date: Sun, 3 Feb 2019 20:20:08 +0100
Subject: Add detect macros should support trailing commas (Fix #443)

---
 .../crates/std_detect/src/detect/arch/aarch64.rs   |  3 ++
 .../crates/std_detect/src/detect/arch/arm.rs       |  3 ++
 .../crates/std_detect/src/detect/arch/mips.rs      |  3 ++
 .../crates/std_detect/src/detect/arch/mips64.rs    |  3 ++
 .../crates/std_detect/src/detect/arch/powerpc.rs   |  3 ++
 .../crates/std_detect/src/detect/arch/powerpc64.rs |  3 ++
 .../crates/std_detect/src/detect/arch/x86.rs       |  3 ++
 .../std_detect/tests/macro_trailing_commas.rs      | 55 ++++++++++++++++++++++
 8 files changed, 76 insertions(+)
 create mode 100644 library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 882c22cc174..7571dfbfec3 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -66,6 +66,9 @@ macro_rules! is_aarch64_feature_detected {
     ("v8.3a") => {
         compile_error!("\"v8.3a\" feature cannot be detected at run-time")
     };
+    ($t:tt,) => {
+        is_aarch64_feature_detected!($t);
+    };
     ($t:tt) => { compile_error!(concat!("unknown aarch64 target feature: ", $t)) };
 }
 
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index cb6ac6badcc..5f91c9269ad 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -17,6 +17,9 @@ macro_rules! is_arm_feature_detected {
     ("vfp2") => { compile_error!("\"vfp2\" feature cannot be detected at run-time") };
     ("vfp3") => { compile_error!("\"vfp3\" feature cannot be detected at run-time") };
     ("vfp4") => { compile_error!("\"vfp4\" feature cannot be detected at run-time") };
+    ($t:tt,) => {
+        is_arm_feature_detected!($t);
+    };
     ($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
 }
 
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
index 876f8dde262..58e3ee6d204 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
@@ -9,6 +9,9 @@ macro_rules! is_mips_feature_detected {
         cfg!(target_feature = "msa") ||
             $crate::detect::check_for($crate::detect::Feature::msa)
     };
+    ($t:tt,) => {
+        is_mips_feature_detected!($t);
+    };
     ($t:tt) => { compile_error!(concat!("unknown mips target feature: ", $t)) };
 }
 
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
index ab837b3d5c9..aa42c0e55df 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
@@ -9,6 +9,9 @@ macro_rules! is_mips64_feature_detected {
         cfg!(target_feature = "msa") ||
             $crate::detect::check_for($crate::detect::Feature::msa)
     };
+    ($t:tt,) => {
+        is_mips64_feature_detected!($t);
+    };
     ($t:tt) => { compile_error!(concat!("unknown mips64 target feature: ", $t)) };
 }
 
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
index 9c440b1d6b0..8270e5bee46 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
@@ -17,6 +17,9 @@ macro_rules! is_powerpc_feature_detected {
         cfg!(target_feature = "power8") ||
             $crate::detect::check_for($crate::detect::Feature::power8)
     };
+    ($t:tt,) => {
+        is_powerpc_feature_detected!($t);
+    };
     ($t:tt) => { compile_error!(concat!("unknown powerpc target feature: ", $t)) };
 }
 
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
index 910940f0bb9..2f754713650 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
@@ -17,6 +17,9 @@ macro_rules! is_powerpc64_feature_detected {
         cfg!(target_feature = "power8") ||
             $crate::detect::check_for($crate::detect::Feature::power8)
     };
+    ($t:tt,) => {
+        is_powerpc64_feature_detected!($t);
+    };
     ($t:tt) => { compile_error!(concat!("unknown powerpc64 target feature: ", $t)) };
 }
 
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 3ef8d31d12b..5425aeecd0c 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -234,6 +234,9 @@ macro_rules! is_x86_feature_detected {
         cfg!(target_feature = "adx") || $crate::detect::check_for(
             $crate::detect::Feature::adx)
     };
+    ($t:tt,) => {
+        is_x86_feature_detected!($t);
+    };
     ($t:tt) => {
         compile_error!(concat!("unknown target feature: ", $t))
     };
diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
new file mode 100644
index 00000000000..d63da6af06a
--- /dev/null
+++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
@@ -0,0 +1,55 @@
+#![feature(stdsimd)]
+#![cfg_attr(stdsimd_strict, deny(warnings))]
+#![cfg_attr(
+    feature = "cargo-clippy",
+    allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout)
+)]
+
+#[cfg(any(
+    target_arch = "arm",
+    target_arch = "aarch64",
+    target_arch = "x86",
+    target_arch = "x86_64",
+    target_arch = "powerpc",
+    target_arch = "powerpc64"
+))]
+#[macro_use]
+extern crate std_detect;
+
+#[test]
+#[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android")))]
+fn arm_linux() {
+    let _ = is_arm_feature_detected!("neon");
+    let _ = is_arm_feature_detected!("neon",);
+}
+
+#[test]
+#[cfg(all(
+    target_arch = "aarch64",
+    any(target_os = "linux", target_os = "android")
+))]
+fn aarch64_linux() {
+    let _ = is_aarch64_feature_detected!("fp");
+    let _ = is_aarch64_feature_detected!("fp",);
+}
+
+#[test]
+#[cfg(all(target_arch = "powerpc", target_os = "linux"))]
+fn powerpc_linux() {
+    let _ = is_powerpc_feature_detected!("altivec");
+    let _ = is_powerpc_feature_detected!("altivec",);
+}
+
+#[test]
+#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
+fn powerpc64_linux() {
+    let _ = is_powerpc64_feature_detected!("altivec");
+    let _ = is_powerpc64_feature_detected!("altivec",);
+}
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn x86_all() {
+    let _ = is_x86_feature_detected!("sse");
+    let _ = is_x86_feature_detected!("sse",);
+}
-- 
cgit 1.4.1-3-g733a5


From eb13680d1aed60d8564f693aabb230ac189524e2 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 5 Feb 2019 08:22:18 +0100
Subject: Remove const workaround in std_detect cache

---
 library/stdarch/crates/std_detect/src/detect/cache.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 944dccbe6df..1ab94dd1cd9 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -84,8 +84,7 @@ impl Cache {
         allow(clippy::declare_interior_mutable_const)
     )]
     const fn uninitialized() -> Self {
-        const X: AtomicU64 = AtomicU64::new(u64::max_value());
-        Cache(X)
+        Cache(AtomicU64::new(u64::max_value()))
     }
     /// Is the cache uninitialized?
     #[inline]
-- 
cgit 1.4.1-3-g733a5


From ff129bff0534fe7cb69125ac5d6a8b6aeb5a6a60 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Thu, 7 Feb 2019 22:56:23 +0100
Subject: Add cargo features to disable usage of file I/O and dlsym in
 std_detect

---
 library/stdarch/ci/run.sh                          | 27 ++++++---
 library/stdarch/crates/std_detect/Cargo.toml       |  9 ++-
 library/stdarch/crates/std_detect/README.md        | 16 +++++
 .../stdarch/crates/std_detect/src/detect/cache.rs  |  6 +-
 .../std_detect/src/detect/os/linux/auxvec.rs       | 69 +++++++++++++++++-----
 .../crates/std_detect/src/detect/os/linux/mod.rs   |  2 +
 .../stdarch/crates/std_detect/src/detect/os/x86.rs |  7 ++-
 library/stdarch/crates/std_detect/src/lib.rs       | 29 ++++++---
 8 files changed, 125 insertions(+), 40 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/ci/run.sh b/library/stdarch/ci/run.sh
index 2d33060f801..db08da4645f 100755
--- a/library/stdarch/ci/run.sh
+++ b/library/stdarch/ci/run.sh
@@ -58,12 +58,6 @@ cargo_test() {
         fi
     fi
     cmd="$cmd ${subcmd} --target=$TARGET $1"
-    if [ "$NOSTD" = "1" ]
-    then
-        cmd="$cmd -p core_arch"
-    else
-        cmd="$cmd -p core_arch -p std_detect -p stdsimd_examples"
-    fi
     cmd="$cmd -- $2"
     # Un-commenting this disables the test output and shows only a summary:
     #if [ "$NORUN" != "1" ]
@@ -88,8 +82,25 @@ cargo_output() {
 }
 
 cargo_setup
-cargo_test
-cargo_test "--release"
+
+CORE_ARCH="--manifest-path=crates/core_arch/Cargo.toml"
+STD_DETECT="--manifest-path=crates/std_detect/Cargo.toml"
+STDSIMD_EXAMPLES="--manifest-path=examples/Cargo.toml"
+cargo_test "${CORE_ARCH}"
+cargo_test "${CORE_ARCH} --release"
+if [ "$NOSTD" != "1" ]; then
+    cargo_test "${STD_DETECT}"
+    cargo_test "${STD_DETECT} --release"
+
+    cargo_test "${STD_DETECT} --no-default-features"
+    cargo_test "${STD_DETECT} --no-default-features --features=std_detect_file_io"
+    cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval"
+    cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval,std_detect_file_io"
+
+    cargo_test "${STDSIMD_EXAMPLES}"
+    cargo_test "${STDSIMD_EXAMPLES} --release"
+fi
+
 cargo_output
 
 # Test targets compiled with extra features.
diff --git a/library/stdarch/crates/std_detect/Cargo.toml b/library/stdarch/crates/std_detect/Cargo.toml
index 47ec3e99708..eee52fb5135 100644
--- a/library/stdarch/crates/std_detect/Cargo.toml
+++ b/library/stdarch/crates/std_detect/Cargo.toml
@@ -23,9 +23,14 @@ is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
 maintenance = { status = "experimental" }
 
 [dependencies]
-libc = "0.2"
+libc = { version = "0.2", optional = true, default-features = false }
 cfg-if = "0.1"
 
 [dev-dependencies]
 auxv = "0.3.3"
-cupid = "0.6.0"
\ No newline at end of file
+cupid = "0.6.0"
+
+[features]
+default = [ "std_detect_dlsym_getauxval", "std_detect_file_io" ]
+std_detect_file_io = []
+std_detect_dlsym_getauxval = [ "libc" ]
\ No newline at end of file
diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md
index 6a8143cb20b..4d2ec7d3448 100644
--- a/library/stdarch/crates/std_detect/README.md
+++ b/library/stdarch/crates/std_detect/README.md
@@ -24,6 +24,22 @@ run-time feature detection support than the one offered by Rust's standard
 library. We intend to make `std_detect` more flexible and configurable in this
 regard to better serve the needs of `#[no_std]` targets. 
 
+# Features
+
+* `std_detect_dlsym_getauxval` (enabled by default, requires `libc`): Enable to
+use `libc::dlsym` to query whether [`getauxval`] is linked into the binary. When
+this is not the case, this feature allows other fallback methods to perform
+run-time feature detection. When this feature is disabled, `std_detect` assumes
+that [`getauxval`] is linked to the binary. If that is not the case the behavior
+is undefined.
+
+* `std_detect_file_io` (enabled by default, requires `std`): Enable to perform run-time feature
+detection using file APIs (e.g. `/proc/cpuinfo`, etc.) if other more performant
+methods fail. This feature requires `libstd` as a dependency, preventing the
+crate from working on applications in which `std` is not available.
+
+[`getauxval`]: http://man7.org/linux/man-pages/man3/getauxval.3.html
+
 # Platform support
 
 * All `x86`/`x86_64` targets are supported on all platforms by querying the
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 1ab94dd1cd9..ee1914875cf 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -3,13 +3,13 @@
 
 #![allow(dead_code)] // not used on all platforms
 
-use core::sync::atomic::Ordering;
+use sync::atomic::Ordering;
 
 #[cfg(target_pointer_width = "64")]
-use core::sync::atomic::AtomicU64;
+use sync::atomic::AtomicU64;
 
 #[cfg(target_pointer_width = "32")]
-use core::sync::atomic::AtomicU32;
+use sync::atomic::AtomicU32;
 
 /// Sets the `bit` of `x`.
 #[inline]
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 31c980fd382..6ccdbbc88a8 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -1,10 +1,8 @@
 //! Parses ELF auxiliary vectors.
 #![cfg_attr(not(target_arch = "aarch64"), allow(dead_code))]
 
-extern crate std;
-use self::std::{prelude::v1::*, fs::File, io::Read};
-
-use core::mem;
+#[cfg(feature = "std_detect_file_io")]
+use ::{fs::File, io::Read};
 
 /// Key to access the CPU Hardware capabilities bitfield.
 pub(crate) const AT_HWCAP: usize = 16;
@@ -34,8 +32,12 @@ pub(crate) struct AuxVec {
 ///
 /// There is no perfect way of reading the auxiliary vector.
 ///
-/// - If the `getauxval` is dynamically linked to this binary, it will be used.
-/// - Otherwise, try to read `/proc/self/auxv`.
+/// - If the `std_detect_dlsym_getauxval` cargo feature is enabled, this will use
+/// `getauxval` if its linked to the binary, and otherwise proceed to a fallback implementation.
+/// When `std_detect_dlsym_getauxval` is disabled, this will assume that `getauxval` is
+/// linked to the binary - if that is not the case the behavior is undefined.
+/// - Otherwise, if the `std_detect_file_io` cargo feature is enabled, it will
+///   try to read `/proc/self/auxv`.
 /// - If that fails, this function returns an error.
 ///
 /// Note that run-time feature detection is not invoked for features that can
@@ -49,8 +51,42 @@ pub(crate) struct AuxVec {
 /// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
 /// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/
 pub(crate) fn auxv() -> Result<AuxVec, ()> {
-    // Try to call a dynamically-linked getauxval function.
-    if let Ok(hwcap) = getauxval(AT_HWCAP) {
+    #[cfg(feature = "std_detect_dlsym_getauxval")] {
+        // Try to call a dynamically-linked getauxval function.
+        if let Ok(hwcap) = getauxval(AT_HWCAP) {
+            // Targets with only AT_HWCAP:
+            #[cfg(any(target_arch = "aarch64", target_arch = "mips",
+                      target_arch = "mips64"))]
+            {
+                if hwcap != 0 {
+                    return Ok(AuxVec { hwcap });
+                }
+            }
+
+            // Targets with AT_HWCAP and AT_HWCAP2:
+            #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+            {
+                if let Ok(hwcap2) = getauxval(AT_HWCAP2) {
+                    if hwcap != 0 && hwcap2 != 0 {
+                        return Ok(AuxVec { hwcap, hwcap2 });
+                    }
+                }
+            }
+            drop(hwcap);
+        }
+        #[cfg(feature = "std_detect_file_io")] {
+            // If calling getauxval fails, try to read the auxiliary vector from
+            // its file:
+            auxv_from_file("/proc/self/auxv")
+        }
+        #[cfg(not(feature = "std_detect_file_io"))] {
+            Err(())
+        }
+    }
+
+    #[cfg(not(feature = "std_detect_dlsym_getauxval"))] {
+        let hwcap = unsafe { ffi_getauxval(AT_HWCAP) };
+
         // Targets with only AT_HWCAP:
         #[cfg(any(target_arch = "aarch64", target_arch = "mips",
                   target_arch = "mips64"))]
@@ -63,22 +99,18 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
         // Targets with AT_HWCAP and AT_HWCAP2:
         #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
         {
-            if let Ok(hwcap2) = getauxval(AT_HWCAP2) {
-                if hwcap != 0 && hwcap2 != 0 {
-                    return Ok(AuxVec { hwcap, hwcap2 });
-                }
+            let hwcap2 = unsafe { ffi_getauxval(AT_HWCAP2) };
+            if hwcap != 0 && hwcap2 != 0 {
+                return Ok(AuxVec { hwcap, hwcap2 });
             }
         }
-        drop(hwcap);
     }
-    // If calling getauxval fails, try to read the auxiliary vector from
-    // its file:
-    auxv_from_file("/proc/self/auxv")
 }
 
 /// Tries to read the `key` from the auxiliary vector by calling the
 /// dynamically-linked `getauxval` function. If the function is not linked,
 /// this function return `Err`.
+#[cfg(feature = "std_detect_dlsym_getauxval")]
 fn getauxval(key: usize) -> Result<usize, ()> {
     use libc;
     pub type F = unsafe extern "C" fn(usize) -> usize;
@@ -98,6 +130,7 @@ fn getauxval(key: usize) -> Result<usize, ()> {
 
 /// Tries to read the auxiliary vector from the `file`. If this fails, this
 /// function returns `Err`.
+#[cfg(feature = "std_detect_file_io")]
 fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
     let mut file = File::open(file).map_err(|_| ())?;
 
@@ -117,6 +150,7 @@ fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
 
 /// Tries to interpret the `buffer` as an auxiliary vector. If that fails, this
 /// function returns `Err`.
+#[cfg(feature = "std_detect_file_io")]
 fn auxv_from_buf(buf: &[usize; 64]) -> Result<AuxVec, ()> {
     // Targets with only AT_HWCAP:
     #[cfg(any(target_arch = "aarch64", target_arch = "mips",
@@ -157,6 +191,7 @@ mod tests {
 
     // Reads the Auxiliary Vector key from /proc/self/auxv
     // using the auxv crate.
+    #[cfg(feature = "std_detect_file_io")]
     fn auxv_crate_getprocfs(key: usize) -> Option<usize> {
         use self::auxv_crate::AuxvType;
         use self::auxv_crate::procfs::search_procfs_auxv;
@@ -210,6 +245,7 @@ mod tests {
         }
     }
 
+    #[cfg(feature = "std_detect_file_io")]
     cfg_if! {
         if #[cfg(target_arch = "arm")] {
             #[test]
@@ -244,6 +280,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(feature = "std_detect_file_io")]
     fn auxv_dump_procfs() {
         if let Ok(auxvec) = auxv_from_file("/proc/self/auxv") {
             println!("{:?}", auxvec);
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
index 642dfb46571..e02d5e6dcda 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
@@ -1,6 +1,8 @@
 //! Run-time feature detection on Linux
 
 mod auxvec;
+
+#[cfg(feature = "std_detect_file_io")]
 mod cpuinfo;
 
 cfg_if! {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 9237d5dc0a5..30199b1f44f 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -1,10 +1,11 @@
 //! x86 run-time feature detection is OS independent.
 
-use core::{prelude::v1::*, mem};
 #[cfg(target_arch = "x86")]
-use core::arch::x86::*;
+use arch::x86::*;
 #[cfg(target_arch = "x86_64")]
-use core::arch::x86_64::*;
+use arch::x86_64::*;
+
+use mem;
 
 use crate::detect::{Feature, cache, bit};
 
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index af7fc3bdc42..f5278f964a9 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -12,8 +12,7 @@
 //! * `powerpc64`: [`is_powerpc64_feature_detected`]
 
 #![unstable(feature = "stdsimd", issue = "27731")]
-#![feature(const_fn, integer_atomics, staged_api, stdsimd)]
-#![feature(doc_cfg, allow_internal_unstable)]
+#![feature(const_fn, staged_api, stdsimd, doc_cfg, allow_internal_unstable)]
 #![cfg_attr(feature = "cargo-clippy", allow(clippy::shadow_reuse))]
 #![cfg_attr(
     feature = "cargo-clippy",
@@ -23,15 +22,29 @@
 #![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))]
 #![no_std]
 
-#[cfg(test)]
-#[macro_use(println)]
-extern crate std;
-
-extern crate libc;
-
 #[macro_use]
 extern crate cfg_if;
 
+cfg_if! {
+    if #[cfg(feature = "std_detect_file_io")] {
+        #[cfg_attr(test, macro_use(println))]
+        extern crate std;
+
+        #[allow(unused_imports)]
+        use std::{arch, fs, io, mem, sync};
+    } else {
+        #[cfg(test)]
+        #[macro_use(println)]
+        extern crate std;
+
+        #[allow(unused_imports)]
+        use core::{arch, mem, sync};
+    }
+}
+
+#[cfg(feature = "std_detect_dlsym_getauxval")]
+extern crate libc;
+
 #[doc(hidden)]
 #[unstable(feature = "stdsimd", issue = "27731")]
 pub mod detect;
-- 
cgit 1.4.1-3-g733a5


From fe06593c6fcbacc92e99fcd94a3b8aba5a2ffb37 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 13 Feb 2019 16:06:45 +0100
Subject: allow_internal_unstable requires feature names

Closes #681 .
---
 library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs   | 2 +-
 library/stdarch/crates/std_detect/src/detect/arch/arm.rs       | 2 +-
 library/stdarch/crates/std_detect/src/detect/arch/mips.rs      | 2 +-
 library/stdarch/crates/std_detect/src/detect/arch/mips64.rs    | 2 +-
 library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs   | 2 +-
 library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs | 2 +-
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs       | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 7571dfbfec3..3270641eb34 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -3,7 +3,7 @@
 /// Checks if `aarch64` feature is enabled.
 #[macro_export]
 #[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable]
+#[allow_internal_unstable(stdsimd_internal)]
 macro_rules! is_aarch64_feature_detected {
     ("neon") => {
         // FIXME: this should be removed once we rename Aarch64 neon to asimd
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index 5f91c9269ad..ebed1f757e3 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -3,7 +3,7 @@
 /// Checks if `arm` feature is enabled.
 #[macro_export]
 #[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable]
+#[allow_internal_unstable(stdsimd_internal)]
 macro_rules! is_arm_feature_detected {
     ("neon") => {
         cfg!(target_feature = "neon") ||
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
index 58e3ee6d204..969d6d05dc5 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
@@ -3,7 +3,7 @@
 /// Checks if `mips` feature is enabled.
 #[macro_export]
 #[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable]
+#[allow_internal_unstable(stdsimd_internal)]
 macro_rules! is_mips_feature_detected {
     ("msa") => {
         cfg!(target_feature = "msa") ||
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
index aa42c0e55df..d421595c9cd 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
@@ -3,7 +3,7 @@
 /// Checks if `mips64` feature is enabled.
 #[macro_export]
 #[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable]
+#[allow_internal_unstable(stdsimd_internal)]
 macro_rules! is_mips64_feature_detected {
     ("msa") => {
         cfg!(target_feature = "msa") ||
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
index 8270e5bee46..9d65437e0ba 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
@@ -3,7 +3,7 @@
 /// Checks if `powerpc` feature is enabled.
 #[macro_export]
 #[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable]
+#[allow_internal_unstable(stdsimd_internal)]
 macro_rules! is_powerpc_feature_detected {
     ("altivec") => {
         cfg!(target_feature = "altivec") ||
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
index 2f754713650..4514100907c 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
@@ -3,7 +3,7 @@
 /// Checks if `powerpc64` feature is enabled.
 #[macro_export]
 #[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable]
+#[allow_internal_unstable(stdsimd_internal)]
 macro_rules! is_powerpc64_feature_detected {
     ("altivec") => {
         cfg!(target_feature = "altivec") ||
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 5425aeecd0c..953bf29d680 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -79,7 +79,7 @@
 /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
 #[macro_export]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[allow_internal_unstable]
+#[allow_internal_unstable(stdsimd_internal)]
 macro_rules! is_x86_feature_detected {
     ("aes") => {
         cfg!(target_feature = "aes") || $crate::detect::check_for(
-- 
cgit 1.4.1-3-g733a5


From e56de7344f45a3488589ff56da4d38c042b7d7ce Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 13 Feb 2019 22:05:49 +0100
Subject: Fix wasm32 build job

---
 library/stdarch/ci/run.sh                    | 2 +-
 library/stdarch/crates/core_arch/Cargo.toml  | 4 ----
 library/stdarch/crates/std_detect/src/lib.rs | 2 ++
 3 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/ci/run.sh b/library/stdarch/ci/run.sh
index 0d45feb846c..bcf9f389090 100755
--- a/library/stdarch/ci/run.sh
+++ b/library/stdarch/ci/run.sh
@@ -80,7 +80,7 @@ case ${TARGET} in
 
 esac
 
-if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then
+if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ] && [ "$TARGET" != "wasm32-unknown-unknown" ]; then
     # Test examples
     (
         cd examples
diff --git a/library/stdarch/crates/core_arch/Cargo.toml b/library/stdarch/crates/core_arch/Cargo.toml
index 56140e0c349..e2e6d62a5e0 100644
--- a/library/stdarch/crates/core_arch/Cargo.toml
+++ b/library/stdarch/crates/core_arch/Cargo.toml
@@ -30,9 +30,5 @@ std_detect = { version = "0.*", path = "../std_detect" }
 [target.wasm32-unknown-unknown.dev-dependencies]
 wasm-bindgen-test = "=0.2.19"
 
-[features]
-# Internal-usage only: denies all warnings.
-strict = []
-
 [package.metadata.docs.rs]
 rustdoc-args = [ "--cfg", "dox" ]
\ No newline at end of file
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index f5278f964a9..da54cc73592 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -20,6 +20,8 @@
 )]
 #![cfg_attr(target_os = "linux", feature(linkage))]
 #![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))]
+#![cfg_attr(stdsimd_strict, deny(warnings))]
+#![cfg_attr(test, allow(unused_imports))]
 #![no_std]
 
 #[macro_use]
-- 
cgit 1.4.1-3-g733a5


From 049071b50f327d28145356f4ddf89470e3b244f7 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Thu, 14 Feb 2019 21:52:41 +0100
Subject: Try to fix upstream

---
 library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs   | 2 +-
 library/stdarch/crates/std_detect/src/detect/arch/arm.rs       | 2 +-
 library/stdarch/crates/std_detect/src/detect/arch/mips.rs      | 2 +-
 library/stdarch/crates/std_detect/src/detect/arch/mips64.rs    | 2 +-
 library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs   | 2 +-
 library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs | 2 +-
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs       | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 3270641eb34..ebae2bd2854 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -3,7 +3,7 @@
 /// Checks if `aarch64` feature is enabled.
 #[macro_export]
 #[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable(stdsimd_internal)]
+#[allow_internal_unstable(stdsimd_internal,stdsimd)]
 macro_rules! is_aarch64_feature_detected {
     ("neon") => {
         // FIXME: this should be removed once we rename Aarch64 neon to asimd
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index ebed1f757e3..b2626bf2923 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -3,7 +3,7 @@
 /// Checks if `arm` feature is enabled.
 #[macro_export]
 #[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable(stdsimd_internal)]
+#[allow_internal_unstable(stdsimd_internal,stdsimd)]
 macro_rules! is_arm_feature_detected {
     ("neon") => {
         cfg!(target_feature = "neon") ||
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
index 969d6d05dc5..f4381b811cd 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
@@ -3,7 +3,7 @@
 /// Checks if `mips` feature is enabled.
 #[macro_export]
 #[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable(stdsimd_internal)]
+#[allow_internal_unstable(stdsimd_internal,stdsimd)]
 macro_rules! is_mips_feature_detected {
     ("msa") => {
         cfg!(target_feature = "msa") ||
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
index d421595c9cd..2663bc68ba9 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
@@ -3,7 +3,7 @@
 /// Checks if `mips64` feature is enabled.
 #[macro_export]
 #[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable(stdsimd_internal)]
+#[allow_internal_unstable(stdsimd_internal,stdsimd)]
 macro_rules! is_mips64_feature_detected {
     ("msa") => {
         cfg!(target_feature = "msa") ||
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
index 9d65437e0ba..a342dc1aacc 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
@@ -3,7 +3,7 @@
 /// Checks if `powerpc` feature is enabled.
 #[macro_export]
 #[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable(stdsimd_internal)]
+#[allow_internal_unstable(stdsimd_internal,stdsimd)]
 macro_rules! is_powerpc_feature_detected {
     ("altivec") => {
         cfg!(target_feature = "altivec") ||
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
index 4514100907c..2e82c569252 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
@@ -3,7 +3,7 @@
 /// Checks if `powerpc64` feature is enabled.
 #[macro_export]
 #[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable(stdsimd_internal)]
+#[allow_internal_unstable(stdsimd_internal,stdsimd)]
 macro_rules! is_powerpc64_feature_detected {
     ("altivec") => {
         cfg!(target_feature = "altivec") ||
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 953bf29d680..6a3e11de3df 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -79,7 +79,7 @@
 /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
 #[macro_export]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[allow_internal_unstable(stdsimd_internal)]
+#[allow_internal_unstable(stdsimd_internal,stdsimd)]
 macro_rules! is_x86_feature_detected {
     ("aes") => {
         cfg!(target_feature = "aes") || $crate::detect::check_for(
-- 
cgit 1.4.1-3-g733a5


From a16a30d796f6753992ef1e5c877e5d456a799d19 Mon Sep 17 00:00:00 2001
From: Alexander Regueiro <alexreg@me.com>
Date: Thu, 31 Jan 2019 00:36:36 +0000
Subject: Various cosmetic improvements.

---
 .../stdarch/crates/core_arch/src/wasm32/simd128.rs |  79 ++---
 library/stdarch/crates/core_arch/src/x86/adx.rs    |  12 +-
 library/stdarch/crates/core_arch/src/x86/aes.rs    |  10 +-
 library/stdarch/crates/core_arch/src/x86/avx.rs    | 356 ++++++++++---------
 library/stdarch/crates/core_arch/src/x86/avx2.rs   | 384 ++++++++++-----------
 .../stdarch/crates/core_arch/src/x86/avx512f.rs    |   8 +-
 library/stdarch/crates/core_arch/src/x86/bmi1.rs   |   4 +-
 library/stdarch/crates/core_arch/src/x86/bmi2.rs   |   2 +-
 library/stdarch/crates/core_arch/src/x86/bswap.rs  |   2 +-
 library/stdarch/crates/core_arch/src/x86/fma.rs    |  68 ++--
 library/stdarch/crates/core_arch/src/x86/mmx.rs    |  52 +--
 .../stdarch/crates/core_arch/src/x86/pclmulqdq.rs  |   2 +-
 library/stdarch/crates/core_arch/src/x86/rdrand.rs |   4 +-
 library/stdarch/crates/core_arch/src/x86/sha.rs    |  12 +-
 library/stdarch/crates/core_arch/src/x86/sse.rs    | 172 ++++-----
 library/stdarch/crates/core_arch/src/x86/sse2.rs   | 370 ++++++++++----------
 library/stdarch/crates/core_arch/src/x86/sse3.rs   |  10 +-
 library/stdarch/crates/core_arch/src/x86/sse41.rs  |  72 ++--
 library/stdarch/crates/core_arch/src/x86/sse42.rs  |  38 +-
 library/stdarch/crates/core_arch/src/x86/ssse3.rs  |  60 ++--
 library/stdarch/crates/core_arch/src/x86/xsave.rs  |  14 +-
 library/stdarch/crates/core_arch/src/x86_64/adx.rs |  12 +-
 library/stdarch/crates/core_arch/src/x86_64/avx.rs |   2 +-
 .../stdarch/crates/core_arch/src/x86_64/avx2.rs    |   2 +-
 library/stdarch/crates/core_arch/src/x86_64/bmi.rs |   4 +-
 .../stdarch/crates/core_arch/src/x86_64/bmi2.rs    |   2 +-
 .../stdarch/crates/core_arch/src/x86_64/bswap.rs   |   2 +-
 .../crates/core_arch/src/x86_64/cmpxchg16b.rs      |   2 +-
 .../stdarch/crates/core_arch/src/x86_64/rdrand.rs  |   2 +-
 library/stdarch/crates/core_arch/src/x86_64/sse.rs |   6 +-
 .../stdarch/crates/core_arch/src/x86_64/sse2.rs    |  16 +-
 .../stdarch/crates/core_arch/src/x86_64/sse41.rs   |   4 +-
 .../stdarch/crates/core_arch/src/x86_64/xsave.rs   |  12 +-
 .../crates/std_detect/src/detect/arch/x86.rs       |   7 +-
 .../stdarch/crates/std_detect/src/detect/cache.rs  |   2 +-
 .../std_detect/src/detect/os/linux/auxvec.rs       |   2 +-
 library/stdarch/crates/stdsimd-test/src/lib.rs     |  29 +-
 37 files changed, 916 insertions(+), 921 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
index 591d8062ca5..b742f12e185 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@@ -18,8 +18,9 @@ use stdsimd_test::assert_instr;
 use wasm_bindgen_test::wasm_bindgen_test;
 
 types! {
-    /// WASM-specific 128-bit wide SIMD vector type
-    pub struct v128(i32, i32, i32, i32); // NB: internals here are arbitrary
+    /// WASM-specific 128-bit wide SIMD vector type.
+    // N.B., internals here are arbitrary.
+    pub struct v128(i32, i32, i32, i32);
 }
 
 #[allow(non_camel_case_types)]
@@ -144,21 +145,21 @@ extern "C" {
     fn llvm_bitselect(a: i8x16, b: i8x16, c: i8x16) -> i8x16;
 }
 
-/// Load a `v128` vector from the given heap address.
+/// Loads a `v128` vector from the given heap address.
 #[inline]
 #[cfg_attr(test, assert_instr(v128.load))]
 pub unsafe fn v128_load(m: *const v128) -> v128 {
     ptr::read(m)
 }
 
-/// Store a `v128` vector to the given heap address.
+/// Stores a `v128` vector to the given heap address.
 #[inline]
 #[cfg_attr(test, assert_instr(v128.store))]
 pub unsafe fn v128_store(m: *mut v128, a: v128) {
     ptr::write(m, a)
 }
 
-/// Materialize a constant SIMD value from the immediate operands.
+/// Materializes a constant SIMD value from the immediate operands.
 ///
 /// The `v128.const` instruction is encoded with 16 immediate bytes
 /// `imm` which provide the bits of the vector directly.
@@ -215,18 +216,18 @@ pub const fn v128_const(
     }
 }
 
-/// Create vector with identical lanes
+/// Creates a vector with identical lanes.
 ///
-/// Construct a vector with `x` replicated to all 16 lanes.
+/// Constructs a vector with `x` replicated to all 16 lanes.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.splat))]
 pub fn i8x16_splat(a: i8) -> v128 {
     unsafe { mem::transmute(i8x16::splat(a)) }
 }
 
-/// Extract lane from a 128-bit vector interpreted as 16 packed i8 numbers.
+/// Extracts a lane from a 128-bit vector interpreted as 16 packed i8 numbers.
 ///
-/// Extract the scalar value of lane specified in the immediate mode operand
+/// Extracts the scalar value of lane specified in the immediate mode operand
 /// `imm` from `a`.
 ///
 /// # Unsafety
@@ -249,9 +250,9 @@ pub unsafe fn i8x16_extract_lane(a: v128, imm: usize) -> i8 {
     simd_extract(a.as_i8x16(), imm as u32)
 }
 
-/// Replace a lane from a 128-bit vector interpreted as 16 packed i8 numbers.
+/// Replaces a lane from a 128-bit vector interpreted as 16 packed i8 numbers.
 ///
-/// Replace the scalar value of lane specified in the immediate mode operand
+/// Replaces the scalar value of lane specified in the immediate mode operand
 /// `imm` with `a`.
 ///
 /// # Unsafety
@@ -265,7 +266,7 @@ pub unsafe fn i8x16_replace_lane(a: v128, imm: usize, val: i8) -> v128 {
     mem::transmute(simd_insert(a.as_i8x16(), imm as u32, val))
 }
 
-/// Create vector with identical lanes
+/// Creates a vector with identical lanes.
 ///
 /// Construct a vector with `x` replicated to all 8 lanes.
 #[inline]
@@ -274,9 +275,9 @@ pub fn i16x8_splat(a: i16) -> v128 {
     unsafe { mem::transmute(i16x8::splat(a)) }
 }
 
-/// Extract lane from a 128-bit vector interpreted as 8 packed i16 numbers.
+/// Extracts a lane from a 128-bit vector interpreted as 8 packed i16 numbers.
 ///
-/// Extract the scalar value of lane specified in the immediate mode operand
+/// Extracts a the scalar value of lane specified in the immediate mode operand
 /// `imm` from `a`.
 ///
 /// # Unsafety
@@ -299,9 +300,9 @@ pub unsafe fn i16x8_extract_lane(a: v128, imm: usize) -> i16 {
     simd_extract(a.as_i16x8(), imm as u32)
 }
 
-/// Replace a lane from a 128-bit vector interpreted as 8 packed i16 numbers.
+/// Replaces a lane from a 128-bit vector interpreted as 8 packed i16 numbers.
 ///
-/// Replace the scalar value of lane specified in the immediate mode operand
+/// Replaces the scalar value of lane specified in the immediate mode operand
 /// `imm` with `a`.
 ///
 /// # Unsafety
@@ -315,18 +316,18 @@ pub unsafe fn i16x8_replace_lane(a: v128, imm: usize, val: i16) -> v128 {
     mem::transmute(simd_insert(a.as_i16x8(), imm as u32, val))
 }
 
-/// Create vector with identical lanes
+/// Creates a vector with identical lanes.
 ///
-/// Construct a vector with `x` replicated to all 4 lanes.
+/// Constructs a vector with `x` replicated to all 4 lanes.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.splat))]
 pub fn i32x4_splat(a: i32) -> v128 {
     unsafe { mem::transmute(i32x4::splat(a)) }
 }
 
-/// Extract lane from a 128-bit vector interpreted as 4 packed i32 numbers.
+/// Extracts a lane from a 128-bit vector interpreted as 4 packed i32 numbers.
 ///
-/// Extract the scalar value of lane specified in the immediate mode operand
+/// Extracts the scalar value of lane specified in the immediate mode operand
 /// `imm` from `a`.
 ///
 /// # Unsafety
@@ -340,9 +341,9 @@ pub unsafe fn i32x4_extract_lane(a: v128, imm: usize) -> i32 {
     simd_extract(a.as_i32x4(), imm as u32)
 }
 
-/// Replace a lane from a 128-bit vector interpreted as 4 packed i32 numbers.
+/// Replaces a lane from a 128-bit vector interpreted as 4 packed i32 numbers.
 ///
-/// Replace the scalar value of lane specified in the immediate mode operand
+/// Replaces the scalar value of lane specified in the immediate mode operand
 /// `imm` with `a`.
 ///
 /// # Unsafety
@@ -356,7 +357,7 @@ pub unsafe fn i32x4_replace_lane(a: v128, imm: usize, val: i32) -> v128 {
     mem::transmute(simd_insert(a.as_i32x4(), imm as u32, val))
 }
 
-/// Create vector with identical lanes
+/// Creates a vector with identical lanes.
 ///
 /// Construct a vector with `x` replicated to all 2 lanes.
 #[inline]
@@ -365,9 +366,9 @@ pub fn i64x2_splat(a: i64) -> v128 {
     unsafe { mem::transmute(i64x2::splat(a)) }
 }
 
-/// Extract lane from a 128-bit vector interpreted as 2 packed i64 numbers.
+/// Extracts a lane from a 128-bit vector interpreted as 2 packed i64 numbers.
 ///
-/// Extract the scalar value of lane specified in the immediate mode operand
+/// Extracts the scalar value of lane specified in the immediate mode operand
 /// `imm` from `a`.
 ///
 /// # Unsafety
@@ -381,9 +382,9 @@ pub unsafe fn i64x2_extract_lane(a: v128, imm: usize) -> i64 {
     simd_extract(a.as_i64x2(), imm as u32)
 }
 
-/// Replace a lane from a 128-bit vector interpreted as 2 packed i64 numbers.
+/// Replaces a lane from a 128-bit vector interpreted as 2 packed i64 numbers.
 ///
-/// Replace the scalar value of lane specified in the immediate mode operand
+/// Replaces the scalar value of lane specified in the immediate mode operand
 /// `imm` with `a`.
 ///
 /// # Unsafety
@@ -397,18 +398,18 @@ pub unsafe fn i64x2_replace_lane(a: v128, imm: usize, val: i64) -> v128 {
     mem::transmute(simd_insert(a.as_i64x2(), imm as u32, val))
 }
 
-/// Create vector with identical lanes
+/// Creates a vector with identical lanes.
 ///
-/// Construct a vector with `x` replicated to all 4 lanes.
+/// Constructs a vector with `x` replicated to all 4 lanes.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.splat))]
 pub fn f32x4_splat(a: f32) -> v128 {
     unsafe { mem::transmute(f32x4::splat(a)) }
 }
 
-/// Extract lane from a 128-bit vector interpreted as 4 packed f32 numbers.
+/// Extracts a lane from a 128-bit vector interpreted as 4 packed f32 numbers.
 ///
-/// Extract the scalar value of lane specified in the immediate mode operand
+/// Extracts the scalar value of lane specified in the immediate mode operand
 /// `imm` from `a`.
 ///
 /// # Unsafety
@@ -422,9 +423,9 @@ pub unsafe fn f32x4_extract_lane(a: v128, imm: usize) -> f32 {
     simd_extract(a.as_f32x4(), imm as u32)
 }
 
-/// Replace a lane from a 128-bit vector interpreted as 4 packed f32 numbers.
+/// Replaces a lane from a 128-bit vector interpreted as 4 packed f32 numbers.
 ///
-/// Replace the scalar value of lane specified in the immediate mode operand
+/// Replaces the scalar value of lane specified in the immediate mode operand
 /// `imm` with `a`.
 ///
 /// # Unsafety
@@ -438,18 +439,18 @@ pub unsafe fn f32x4_replace_lane(a: v128, imm: usize, val: f32) -> v128 {
     mem::transmute(simd_insert(a.as_f32x4(), imm as u32, val))
 }
 
-/// Create vector with identical lanes
+/// Creates a vector with identical lanes.
 ///
-/// Construct a vector with `x` replicated to all 2 lanes.
+/// Constructs a vector with `x` replicated to all 2 lanes.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.splat))]
 pub fn f64x2_splat(a: f64) -> v128 {
     unsafe { mem::transmute(f64x2::splat(a)) }
 }
 
-/// Extract lane from a 128-bit vector interpreted as 2 packed f64 numbers.
+/// Extracts lane from a 128-bit vector interpreted as 2 packed f64 numbers.
 ///
-/// Extract the scalar value of lane specified in the immediate mode operand
+/// Extracts the scalar value of lane specified in the immediate mode operand
 /// `imm` from `a`.
 ///
 /// # Unsafety
@@ -463,9 +464,9 @@ pub unsafe fn f64x2_extract_lane(a: v128, imm: usize) -> f64 {
     simd_extract(a.as_f64x2(), imm as u32)
 }
 
-/// Replace a lane from a 128-bit vector interpreted as 2 packed f64 numbers.
+/// Replaces a lane from a 128-bit vector interpreted as 2 packed f64 numbers.
 ///
-/// Replace the scalar value of lane specified in the immediate mode operand
+/// Replaces the scalar value of lane specified in the immediate mode operand
 /// `imm` with `a`.
 ///
 /// # Unsafety
diff --git a/library/stdarch/crates/core_arch/src/x86/adx.rs b/library/stdarch/crates/core_arch/src/x86/adx.rs
index 8d4c0508388..5800d27c18c 100644
--- a/library/stdarch/crates/core_arch/src/x86/adx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/adx.rs
@@ -11,8 +11,8 @@ extern "unadjusted" {
     fn llvm_subborrow_u32(a: u8, b: u32, c: u32) -> (u8, u32);
 }
 
-/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in `c_in`
-/// (carry flag), and store the unsigned 32-bit result in out, and the carry-out
+/// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`
+/// (carry flag), and store the unsigned 32-bit result in `out`, and the carry-out
 /// is returned (carry or overflow flag).
 #[inline]
 #[cfg_attr(test, assert_instr(adc))]
@@ -23,8 +23,8 @@ pub unsafe fn _addcarry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 {
     a
 }
 
-/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in `c_in`
-/// (carry or overflow flag), and store the unsigned 32-bit result in out, and
+/// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`
+/// (carry or overflow flag), and store the unsigned 32-bit result in `out`, and
 /// the carry-out is returned (carry or overflow flag).
 #[inline]
 #[target_feature(enable = "adx")]
@@ -36,8 +36,8 @@ pub unsafe fn _addcarryx_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 {
     r
 }
 
-/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in `c_in`
-/// (carry or overflow flag), and store the unsigned 32-bit result in out, and
+/// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`
+/// (carry or overflow flag), and store the unsigned 32-bit result in `out`, and
 /// the carry-out is returned (carry or overflow flag).
 #[inline]
 #[cfg_attr(test, assert_instr(sbb))]
diff --git a/library/stdarch/crates/core_arch/src/x86/aes.rs b/library/stdarch/crates/core_arch/src/x86/aes.rs
index c90bb122d5a..2fc6dc67455 100644
--- a/library/stdarch/crates/core_arch/src/x86/aes.rs
+++ b/library/stdarch/crates/core_arch/src/x86/aes.rs
@@ -28,7 +28,7 @@ extern "C" {
     fn aeskeygenassist(a: __m128i, imm8: u8) -> __m128i;
 }
 
-/// Perform one round of an AES decryption flow on data (state) in `a`.
+/// Performs one round of an AES decryption flow on data (state) in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdec_si128)
 #[inline]
@@ -39,7 +39,7 @@ pub unsafe fn _mm_aesdec_si128(a: __m128i, round_key: __m128i) -> __m128i {
     aesdec(a, round_key)
 }
 
-/// Perform the last round of an AES decryption flow on data (state) in `a`.
+/// Performs the last round of an AES decryption flow on data (state) in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdeclast_si128)
 #[inline]
@@ -50,7 +50,7 @@ pub unsafe fn _mm_aesdeclast_si128(a: __m128i, round_key: __m128i) -> __m128i {
     aesdeclast(a, round_key)
 }
 
-/// Perform one round of an AES encryption flow on data (state) in `a`.
+/// Performs one round of an AES encryption flow on data (state) in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenc_si128)
 #[inline]
@@ -61,7 +61,7 @@ pub unsafe fn _mm_aesenc_si128(a: __m128i, round_key: __m128i) -> __m128i {
     aesenc(a, round_key)
 }
 
-/// Perform the last round of an AES encryption flow on data (state) in `a`.
+/// Performs the last round of an AES encryption flow on data (state) in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128)
 #[inline]
@@ -72,7 +72,7 @@ pub unsafe fn _mm_aesenclast_si128(a: __m128i, round_key: __m128i) -> __m128i {
     aesenclast(a, round_key)
 }
 
-/// Perform the `InvMixColumns` transformation on `a`.
+/// Performs the `InvMixColumns` transformation on `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesimc_si128)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs
index a26eb8f12b9..28e53c9e8bc 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx.rs
@@ -23,7 +23,7 @@ use ptr;
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
-/// Add packed double-precision (64-bit) floating-point elements
+/// Adds packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_pd)
@@ -35,7 +35,7 @@ pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
     simd_add(a, b)
 }
 
-/// Add packed single-precision (32-bit) floating-point elements in `a` and
+/// Adds packed single-precision (32-bit) floating-point elements in `a` and
 /// `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ps)
@@ -47,9 +47,8 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
     simd_add(a, b)
 }
 
-/// Compute the bitwise AND of a packed double-precision (64-bit)
-/// floating-point elements
-/// in `a` and `b`.
+/// Computes the bitwise AND of a packed double-precision (64-bit)
+/// floating-point elements in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_pd)
 #[inline]
@@ -64,7 +63,7 @@ pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
     mem::transmute(simd_and(a, b))
 }
 
-/// Compute the bitwise AND of packed single-precision (32-bit) floating-point
+/// Computes the bitwise AND of packed single-precision (32-bit) floating-point
 /// elements in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_ps)
@@ -78,14 +77,14 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
     mem::transmute(simd_and(a, b))
 }
 
-/// Compute the bitwise OR packed double-precision (64-bit) floating-point
+/// Computes the bitwise OR packed double-precision (64-bit) floating-point
 /// elements in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-// FIXME: Should be 'vorpd' instuction.
-// See https://github.com/rust-lang-nursery/stdsimd/issues/71
+// FIXME: should be `vorpd` instuction.
+// See <https://github.com/rust-lang-nursery/stdsimd/issues/71>.
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
@@ -94,7 +93,7 @@ pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
     mem::transmute(simd_or(a, b))
 }
 
-/// Compute the bitwise OR packed single-precision (32-bit) floating-point
+/// Computes the bitwise OR packed single-precision (32-bit) floating-point
 /// elements in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_ps)
@@ -108,7 +107,7 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
     mem::transmute(simd_or(a, b))
 }
 
-/// Shuffle double-precision (64-bit) floating-point elements within 128-bit
+/// Shuffles double-precision (64-bit) floating-point elements within 128-bit
 /// lanes using the control in `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_pd)
@@ -154,7 +153,7 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
     }
 }
 
-/// Shuffle single-precision (32-bit) floating-point elements in `a` within
+/// Shuffles single-precision (32-bit) floating-point elements in `a` within
 /// 128-bit lanes using the control in `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_ps)
@@ -217,14 +216,13 @@ pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
     }
 }
 
-/// Compute the bitwise NOT of packed double-precision (64-bit) floating-point
-/// elements in `a`
-/// and then AND with `b`.
+/// Computes the bitwise NOT of packed double-precision (64-bit) floating-point
+/// elements in `a`, and then AND with `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_andnot_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-// FIXME: Should be 'vandnpd' instruction.
+// FIXME: should be `vandnpd` instruction.
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
@@ -233,7 +231,7 @@ pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
     mem::transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b))
 }
 
-/// Compute the bitwise NOT of packed single-precision (32-bit) floating-point
+/// Computes the bitwise NOT of packed single-precision (32-bit) floating-point
 /// elements in `a`
 /// and then AND with `b`.
 ///
@@ -248,8 +246,8 @@ pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
     mem::transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b))
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements
-/// in `a` and `b`, and return packed maximum values
+/// Compares packed double-precision (64-bit) floating-point elements
+/// in `a` and `b`, and returns packed maximum values
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_pd)
 #[inline]
@@ -260,8 +258,8 @@ pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d {
     maxpd256(a, b)
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in `a`
-/// and `b`, and return packed maximum values
+/// Compares packed single-precision (32-bit) floating-point elements in `a`
+/// and `b`, and returns packed maximum values
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ps)
 #[inline]
@@ -272,8 +270,8 @@ pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 {
     maxps256(a, b)
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements
-/// in `a` and `b`, and return packed minimum values
+/// Compares packed double-precision (64-bit) floating-point elements
+/// in `a` and `b`, and returns packed minimum values
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_pd)
 #[inline]
@@ -284,8 +282,8 @@ pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d {
     minpd256(a, b)
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in `a`
-/// and `b`, and return packed minimum values
+/// Compares packed single-precision (32-bit) floating-point elements in `a`
+/// and `b`, and returns packed minimum values
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ps)
 #[inline]
@@ -296,7 +294,7 @@ pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 {
     minps256(a, b)
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements
+/// Multiplies packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pd)
@@ -308,7 +306,7 @@ pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
     simd_mul(a, b)
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in `a` and
+/// Multiplies packed single-precision (32-bit) floating-point elements in `a` and
 /// `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ps)
@@ -320,7 +318,7 @@ pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
     simd_mul(a, b)
 }
 
-/// Alternatively add and subtract packed double-precision (64-bit)
+/// Alternatively adds and subtracts packed double-precision (64-bit)
 /// floating-point elements in `a` to/from packed elements in `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_addsub_pd)
@@ -332,7 +330,7 @@ pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
     addsubpd256(a, b)
 }
 
-/// Alternatively add and subtract packed single-precision (32-bit)
+/// Alternatively adds and subtracts packed single-precision (32-bit)
 /// floating-point elements in `a` to/from packed elements in `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_addsub_ps)
@@ -344,7 +342,7 @@ pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
     addsubps256(a, b)
 }
 
-/// Subtract packed double-precision (64-bit) floating-point elements in `b`
+/// Subtracts packed double-precision (64-bit) floating-point elements in `b`
 /// from packed elements in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_pd)
@@ -356,7 +354,7 @@ pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
     simd_sub(a, b)
 }
 
-/// Subtract packed single-precision (32-bit) floating-point elements in `b`
+/// Subtracts packed single-precision (32-bit) floating-point elements in `b`
 /// from packed elements in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ps)
@@ -368,7 +366,7 @@ pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
     simd_sub(a, b)
 }
 
-/// Compute the division of each of the 8 packed 32-bit floating-point elements
+/// Computes the division of each of the 8 packed 32-bit floating-point elements
 /// in `a` by the corresponding packed elements in `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ps)
@@ -380,7 +378,7 @@ pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
     simd_div(a, b)
 }
 
-/// Compute the division of each of the 4 packed 64-bit floating-point elements
+/// Computes the division of each of the 4 packed 64-bit floating-point elements
 /// in `a` by the corresponding packed elements in `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_pd)
@@ -392,7 +390,7 @@ pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
     simd_div(a, b)
 }
 
-/// Round packed double-precision (64-bit) floating point elements in `a`
+/// Rounds packed double-precision (64-bit) floating point elements in `a`
 /// according to the flag `b`. The value of `b` may be as follows:
 ///
 /// - `0x00`: Round to the nearest whole number.
@@ -419,7 +417,7 @@ pub unsafe fn _mm256_round_pd(a: __m256d, b: i32) -> __m256d {
     constify_imm8!(b, call)
 }
 
-/// Round packed double-precision (64-bit) floating point elements in `a`
+/// Rounds packed double-precision (64-bit) floating point elements in `a`
 /// toward positive infinity.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ceil_pd)
@@ -431,7 +429,7 @@ pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d {
     roundpd256(a, 0x02)
 }
 
-/// Round packed double-precision (64-bit) floating point elements in `a`
+/// Rounds packed double-precision (64-bit) floating point elements in `a`
 /// toward negative infinity.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_floor_pd)
@@ -443,7 +441,7 @@ pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d {
     roundpd256(a, 0x01)
 }
 
-/// Round packed single-precision (32-bit) floating point elements in `a`
+/// Rounds packed single-precision (32-bit) floating point elements in `a`
 /// according to the flag `b`. The value of `b` may be as follows:
 ///
 /// - `0x00`: Round to the nearest whole number.
@@ -470,7 +468,7 @@ pub unsafe fn _mm256_round_ps(a: __m256, b: i32) -> __m256 {
     constify_imm8!(b, call)
 }
 
-/// Round packed single-precision (32-bit) floating point elements in `a`
+/// Rounds packed single-precision (32-bit) floating point elements in `a`
 /// toward positive infinity.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ceil_ps)
@@ -482,7 +480,7 @@ pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 {
     roundps256(a, 0x02)
 }
 
-/// Round packed single-precision (32-bit) floating point elements in `a`
+/// Rounds packed single-precision (32-bit) floating point elements in `a`
 /// toward negative infinity.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_floor_ps)
@@ -494,7 +492,7 @@ pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 {
     roundps256(a, 0x01)
 }
 
-/// Return the square root of packed single-precision (32-bit) floating point
+/// Returns the square root of packed single-precision (32-bit) floating point
 /// elements in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ps)
@@ -506,7 +504,7 @@ pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 {
     sqrtps256(a)
 }
 
-/// Return the square root of packed double-precision (64-bit) floating point
+/// Returns the square root of packed double-precision (64-bit) floating point
 /// elements in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_pd)
@@ -518,7 +516,7 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
     sqrtpd256(a)
 }
 
-/// Blend packed double-precision (64-bit) floating-point elements from
+/// Blends packed double-precision (64-bit) floating-point elements from
 /// `a` and `b` using control mask `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_pd)
@@ -567,7 +565,7 @@ pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
     }
 }
 
-/// Blend packed single-precision (32-bit) floating-point elements from
+/// Blends packed single-precision (32-bit) floating-point elements from
 /// `a` and `b` using control mask `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_ps)
@@ -630,7 +628,7 @@ pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
     }
 }
 
-/// Blend packed double-precision (64-bit) floating-point elements from
+/// Blends packed double-precision (64-bit) floating-point elements from
 /// `a` and `b` using `c` as a mask.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_pd)
@@ -642,7 +640,7 @@ pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     vblendvpd(a, b, c)
 }
 
-/// Blend packed single-precision (32-bit) floating-point elements from
+/// Blends packed single-precision (32-bit) floating-point elements from
 /// `a` and `b` using `c` as a mask.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_ps)
@@ -654,7 +652,7 @@ pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     vblendvps(a, b, c)
 }
 
-/// Conditionally multiply the packed single-precision (32-bit) floating-point
+/// Conditionally multiplies the packed single-precision (32-bit) floating-point
 /// elements in `a` and `b` using the high 4 bits in `imm8`,
 /// sum the four products, and conditionally return the sum
 ///  using the low 4 bits of `imm8`.
@@ -732,7 +730,7 @@ pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
     vhsubps(a, b)
 }
 
-/// Compute the bitwise XOR of packed double-precision (64-bit) floating-point
+/// Computes the bitwise XOR of packed double-precision (64-bit) floating-point
 /// elements in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_pd)
@@ -747,7 +745,7 @@ pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
     mem::transmute(simd_xor(a, b))
 }
 
-/// Compute the bitwise XOR of packed single-precision (32-bit) floating-point
+/// Computes the bitwise XOR of packed single-precision (32-bit) floating-point
 /// elements in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_ps)
@@ -858,7 +856,7 @@ pub const _CMP_GT_OQ: i32 = 0x1e;
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub const _CMP_TRUE_US: i32 = 0x1f;
 
-/// Compare packed double-precision (64-bit) floating-point
+/// Compares packed double-precision (64-bit) floating-point
 /// elements in `a` and `b` based on the comparison operand
 /// specified by `imm8`.
 ///
@@ -877,7 +875,7 @@ pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
     constify_imm6!(imm8, call)
 }
 
-/// Compare packed double-precision (64-bit) floating-point
+/// Compares packed double-precision (64-bit) floating-point
 /// elements in `a` and `b` based on the comparison operand
 /// specified by `imm8`.
 ///
@@ -896,7 +894,7 @@ pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
     constify_imm6!(imm8, call)
 }
 
-/// Compare packed single-precision (32-bit) floating-point
+/// Compares packed single-precision (32-bit) floating-point
 /// elements in `a` and `b` based on the comparison operand
 /// specified by `imm8`.
 ///
@@ -915,7 +913,7 @@ pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
     constify_imm6!(imm8, call)
 }
 
-/// Compare packed single-precision (32-bit) floating-point
+/// Compares packed single-precision (32-bit) floating-point
 /// elements in `a` and `b` based on the comparison operand
 /// specified by `imm8`.
 ///
@@ -934,10 +932,10 @@ pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
     constify_imm6!(imm8, call)
 }
 
-/// Compare the lower double-precision (64-bit) floating-point element in
+/// Compares the lower double-precision (64-bit) floating-point element in
 /// `a` and `b` based on the comparison operand specified by `imm8`,
 /// store the result in the lower element of returned vector,
-/// and copy the upper element from `a` to the upper element of returned
+/// and copies the upper element from `a` to the upper element of returned
 /// vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd)
@@ -955,10 +953,10 @@ pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
     constify_imm6!(imm8, call)
 }
 
-/// Compare the lower single-precision (32-bit) floating-point element in
+/// Compares the lower single-precision (32-bit) floating-point element in
 /// `a` and `b` based on the comparison operand specified by `imm8`,
 /// store the result in the lower element of returned vector,
-/// and copy the upper 3 packed elements from `a` to the upper elements of
+/// and copies the upper 3 packed elements from `a` to the upper elements of
 /// returned vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss)
@@ -976,7 +974,7 @@ pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128, imm8: i32) -> __m128 {
     constify_imm6!(imm8, call)
 }
 
-/// Convert packed 32-bit integers in `a` to packed double-precision (64-bit)
+/// Converts packed 32-bit integers in `a` to packed double-precision (64-bit)
 /// floating-point elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_pd)
@@ -988,7 +986,7 @@ pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
     simd_cast(a.as_i32x4())
 }
 
-/// Convert packed 32-bit integers in `a` to packed single-precision (32-bit)
+/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
 /// floating-point elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ps)
@@ -1000,7 +998,7 @@ pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
     vcvtdq2ps(a.as_i32x8())
 }
 
-/// Convert packed double-precision (64-bit) floating-point elements in `a`
+/// Converts packed double-precision (64-bit) floating-point elements in `a`
 /// to packed single-precision (32-bit) floating-point elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ps)
@@ -1012,7 +1010,7 @@ pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
     vcvtpd2ps(a)
 }
 
-/// Convert packed single-precision (32-bit) floating-point elements in `a`
+/// Converts packed single-precision (32-bit) floating-point elements in `a`
 /// to packed 32-bit integers.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_epi32)
@@ -1024,7 +1022,7 @@ pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i {
     mem::transmute(vcvtps2dq(a))
 }
 
-/// Convert packed single-precision (32-bit) floating-point elements in `a`
+/// Converts packed single-precision (32-bit) floating-point elements in `a`
 /// to packed double-precision (64-bit) floating-point elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_pd)
@@ -1036,7 +1034,7 @@ pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d {
     simd_cast(a)
 }
 
-/// Convert packed double-precision (64-bit) floating-point elements in `a`
+/// Converts packed double-precision (64-bit) floating-point elements in `a`
 /// to packed 32-bit integers with truncation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttpd_epi32)
@@ -1048,7 +1046,7 @@ pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i {
     mem::transmute(vcvttpd2dq(a))
 }
 
-/// Convert packed double-precision (64-bit) floating-point elements in `a`
+/// Converts packed double-precision (64-bit) floating-point elements in `a`
 /// to packed 32-bit integers.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_epi32)
@@ -1060,7 +1058,7 @@ pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i {
     mem::transmute(vcvtpd2dq(a))
 }
 
-/// Convert packed single-precision (32-bit) floating-point elements in `a`
+/// Converts packed single-precision (32-bit) floating-point elements in `a`
 /// to packed 32-bit integers with truncation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttps_epi32)
@@ -1072,7 +1070,7 @@ pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
     mem::transmute(vcvttps2dq(a))
 }
 
-/// Extract 128 bits (composed of 4 packed single-precision (32-bit)
+/// Extracts 128 bits (composed of 4 packed single-precision (32-bit)
 /// floating-point elements) from `a`, selected with `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extractf128_ps)
@@ -1091,7 +1089,7 @@ pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 {
     }
 }
 
-/// Extract 128 bits (composed of 2 packed double-precision (64-bit)
+/// Extracts 128 bits (composed of 2 packed double-precision (64-bit)
 /// floating-point elements) from `a`, selected with `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extractf128_pd)
@@ -1110,7 +1108,7 @@ pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d {
     }
 }
 
-/// Extract 128 bits (composed of integer data) from `a`, selected with `imm8`.
+/// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extractf128_si256)
 #[inline]
@@ -1130,7 +1128,7 @@ pub unsafe fn _mm256_extractf128_si256(a: __m256i, imm8: i32) -> __m128i {
     mem::transmute(dst)
 }
 
-/// Zero the contents of all XMM or YMM registers.
+/// Zeroes the contents of all XMM or YMM registers.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zeroall)
 #[inline]
@@ -1141,7 +1139,7 @@ pub unsafe fn _mm256_zeroall() {
     vzeroall()
 }
 
-/// Zero the upper 128 bits of all YMM registers;
+/// Zeroes the upper 128 bits of all YMM registers;
 /// the lower 128-bits of the registers are unmodified.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zeroupper)
@@ -1153,7 +1151,7 @@ pub unsafe fn _mm256_zeroupper() {
     vzeroupper()
 }
 
-/// Shuffle single-precision (32-bit) floating-point elements in `a`
+/// Shuffles single-precision (32-bit) floating-point elements in `a`
 /// within 128-bit lanes using the control in `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar_ps)
@@ -1165,7 +1163,7 @@ pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 {
     vpermilps256(a, b.as_i32x8())
 }
 
-/// Shuffle single-precision (32-bit) floating-point elements in `a`
+/// Shuffles single-precision (32-bit) floating-point elements in `a`
 /// using the control in `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutevar_ps)
@@ -1177,7 +1175,7 @@ pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
     vpermilps(a, b.as_i32x4())
 }
 
-/// Shuffle single-precision (32-bit) floating-point elements in `a`
+/// Shuffles single-precision (32-bit) floating-point elements in `a`
 /// within 128-bit lanes using the control in `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_ps)
@@ -1235,7 +1233,7 @@ pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 {
     }
 }
 
-/// Shuffle single-precision (32-bit) floating-point elements in `a`
+/// Shuffles single-precision (32-bit) floating-point elements in `a`
 /// using the control in `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_ps)
@@ -1289,7 +1287,7 @@ pub unsafe fn _mm_permute_ps(a: __m128, imm8: i32) -> __m128 {
     }
 }
 
-/// Shuffle double-precision (64-bit) floating-point elements in `a`
+/// Shuffles double-precision (64-bit) floating-point elements in `a`
 /// within 256-bit lanes using the control in `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar_pd)
@@ -1301,7 +1299,7 @@ pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d {
     vpermilpd256(a, b.as_i64x4())
 }
 
-/// Shuffle double-precision (64-bit) floating-point elements in `a`
+/// Shuffles double-precision (64-bit) floating-point elements in `a`
 /// using the control in `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutevar_pd)
@@ -1313,7 +1311,7 @@ pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
     vpermilpd(a, b.as_i64x2())
 }
 
-/// Shuffle double-precision (64-bit) floating-point elements in `a`
+/// Shuffles double-precision (64-bit) floating-point elements in `a`
 /// within 128-bit lanes using the control in `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_pd)
@@ -1359,7 +1357,7 @@ pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d {
     }
 }
 
-/// Shuffle double-precision (64-bit) floating-point elements in `a`
+/// Shuffles double-precision (64-bit) floating-point elements in `a`
 /// using the control in `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_pd)
@@ -1389,7 +1387,7 @@ pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d {
     }
 }
 
-/// Shuffle 256-bits (composed of 8 packed single-precision (32-bit)
+/// Shuffles 256 bits (composed of 8 packed single-precision (32-bit)
 /// floating-point elements) selected by `imm8` from `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2f128_ps)
@@ -1407,7 +1405,7 @@ pub unsafe fn _mm256_permute2f128_ps(a: __m256, b: __m256, imm8: i32) -> __m256
     constify_imm8!(imm8, call)
 }
 
-/// Shuffle 256-bits (composed of 4 packed double-precision (64-bit)
+/// Shuffles 256 bits (composed of 4 packed double-precision (64-bit)
 /// floating-point elements) selected by `imm8` from `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2f128_pd)
@@ -1425,7 +1423,7 @@ pub unsafe fn _mm256_permute2f128_pd(a: __m256d, b: __m256d, imm8: i32) -> __m25
     constify_imm8!(imm8, call)
 }
 
-/// Shuffle 258-bits (composed of integer data) selected by `imm8`
+/// Shuffles 258-bits (composed of integer data) selected by `imm8`
 /// from `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2f128_si256)
@@ -1446,7 +1444,7 @@ pub unsafe fn _mm256_permute2f128_si256(a: __m256i, b: __m256i, imm8: i32) -> __
     mem::transmute(r)
 }
 
-/// Broadcast a single-precision (32-bit) floating-point element from memory
+/// Broadcasts a single-precision (32-bit) floating-point element from memory
 /// to all elements of the returned vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_ss)
@@ -1459,7 +1457,7 @@ pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
     _mm256_set1_ps(*f)
 }
 
-/// Broadcast a single-precision (32-bit) floating-point element from memory
+/// Broadcasts a single-precision (32-bit) floating-point element from memory
 /// to all elements of the returned vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcast_ss)
@@ -1472,7 +1470,7 @@ pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
     _mm_set1_ps(*f)
 }
 
-/// Broadcast a double-precision (64-bit) floating-point element from memory
+/// Broadcasts a double-precision (64-bit) floating-point element from memory
 /// to all elements of the returned vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_sd)
@@ -1485,7 +1483,7 @@ pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
     _mm256_set1_pd(*f)
 }
 
-/// Broadcast 128 bits from memory (composed of 4 packed single-precision
+/// Broadcasts 128 bits from memory (composed of 4 packed single-precision
 /// (32-bit) floating-point elements) to all elements of the returned vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_ps)
@@ -1497,7 +1495,7 @@ pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
     vbroadcastf128ps256(a)
 }
 
-/// Broadcast 128 bits from memory (composed of 2 packed double-precision
+/// Broadcasts 128 bits from memory (composed of 2 packed double-precision
 /// (64-bit) floating-point elements) to all elements of the returned vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_pd)
@@ -1509,7 +1507,7 @@ pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
     vbroadcastf128pd256(a)
 }
 
-/// Copy `a` to result, then insert 128 bits (composed of 4 packed
+/// Copies `a` to result, then inserts 128 bits (composed of 4 packed
 /// single-precision (32-bit) floating-point elements) from `b` into result
 /// at the location specified by `imm8`.
 ///
@@ -1530,7 +1528,7 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 {
     }
 }
 
-/// Copy `a` to result, then insert 128 bits (composed of 2 packed
+/// Copies `a` to result, then inserts 128 bits (composed of 2 packed
 /// double-precision (64-bit) floating-point elements) from `b` into result
 /// at the location specified by `imm8`.
 ///
@@ -1550,7 +1548,7 @@ pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d, imm8: i32) -> __m256d
     }
 }
 
-/// Copy `a` to result, then insert 128 bits from `b` into result
+/// Copies `a` to result, then inserts 128 bits from `b` into result
 /// at the location specified by `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insertf128_si256)
@@ -1571,7 +1569,7 @@ pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i, imm8: i32) -> __m2
     mem::transmute(dst)
 }
 
-/// Copy `a` to result, and insert the 8-bit integer `i` into result
+/// Copies `a` to result, and inserts the 8-bit integer `i` into result
 /// at the location specified by `index`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi8)
@@ -1584,7 +1582,7 @@ pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i {
     mem::transmute(simd_insert(a.as_i8x32(), (index as u32) & 31, i))
 }
 
-/// Copy `a` to result, and insert the 16-bit integer `i` into result
+/// Copies `a` to result, and inserts the 16-bit integer `i` into result
 /// at the location specified by `index`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi16)
@@ -1597,7 +1595,7 @@ pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16, index: i32) -> __m256i {
     mem::transmute(simd_insert(a.as_i16x16(), (index as u32) & 15, i))
 }
 
-/// Copy `a` to result, and insert the 32-bit integer `i` into result
+/// Copies `a` to result, and inserts the 32-bit integer `i` into result
 /// at the location specified by `index`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi32)
@@ -1610,7 +1608,7 @@ pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32, index: i32) -> __m256i {
     mem::transmute(simd_insert(a.as_i32x8(), (index as u32) & 7, i))
 }
 
-/// Load 256-bits (composed of 4 packed double-precision (64-bit)
+/// Loads 256-bits (composed of 4 packed double-precision (64-bit)
 /// floating-point elements) from memory into result.
 /// `mem_addr` must be aligned on a 32-byte boundary or a
 /// general-protection exception may be generated.
@@ -1625,7 +1623,7 @@ pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
     *(mem_addr as *const __m256d)
 }
 
-/// Store 256-bits (composed of 4 packed double-precision (64-bit)
+/// Stores 256-bits (composed of 4 packed double-precision (64-bit)
 /// floating-point elements) from `a` into memory.
 /// `mem_addr` must be aligned on a 32-byte boundary or a
 /// general-protection exception may be generated.
@@ -1640,7 +1638,7 @@ pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: __m256d) {
     *(mem_addr as *mut __m256d) = a;
 }
 
-/// Load 256-bits (composed of 8 packed single-precision (32-bit)
+/// Loads 256-bits (composed of 8 packed single-precision (32-bit)
 /// floating-point elements) from memory into result.
 /// `mem_addr` must be aligned on a 32-byte boundary or a
 /// general-protection exception may be generated.
@@ -1655,7 +1653,7 @@ pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
     *(mem_addr as *const __m256)
 }
 
-/// Store 256-bits (composed of 8 packed single-precision (32-bit)
+/// Stores 256-bits (composed of 8 packed single-precision (32-bit)
 /// floating-point elements) from `a` into memory.
 /// `mem_addr` must be aligned on a 32-byte boundary or a
 /// general-protection exception may be generated.
@@ -1670,7 +1668,7 @@ pub unsafe fn _mm256_store_ps(mem_addr: *const f32, a: __m256) {
     *(mem_addr as *mut __m256) = a;
 }
 
-/// Load 256-bits (composed of 4 packed double-precision (64-bit)
+/// Loads 256-bits (composed of 4 packed double-precision (64-bit)
 /// floating-point elements) from memory into result.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 ///
@@ -1689,7 +1687,7 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
     dst
 }
 
-/// Store 256-bits (composed of 4 packed double-precision (64-bit)
+/// Stores 256-bits (composed of 4 packed double-precision (64-bit)
 /// floating-point elements) from `a` into memory.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 ///
@@ -1702,7 +1700,7 @@ pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
     storeupd256(mem_addr, a);
 }
 
-/// Load 256-bits (composed of 8 packed single-precision (32-bit)
+/// Loads 256-bits (composed of 8 packed single-precision (32-bit)
 /// floating-point elements) from memory into result.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 ///
@@ -1721,7 +1719,7 @@ pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
     dst
 }
 
-/// Store 256-bits (composed of 8 packed single-precision (32-bit)
+/// Stores 256-bits (composed of 8 packed single-precision (32-bit)
 /// floating-point elements) from `a` into memory.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 ///
@@ -1734,7 +1732,7 @@ pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
     storeups256(mem_addr, a);
 }
 
-/// Load 256-bits of integer data from memory into result.
+/// Loads 256-bits of integer data from memory into result.
 /// `mem_addr` must be aligned on a 32-byte boundary or a
 /// general-protection exception may be generated.
 ///
@@ -1747,7 +1745,7 @@ pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
     *mem_addr
 }
 
-/// Store 256-bits of integer data from `a` into memory.
+/// Stores 256-bits of integer data from `a` into memory.
 /// `mem_addr` must be aligned on a 32-byte boundary or a
 /// general-protection exception may be generated.
 ///
@@ -1760,7 +1758,7 @@ pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
     *mem_addr = a;
 }
 
-/// Load 256-bits of integer data from memory into result.
+/// Loads 256-bits of integer data from memory into result.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_si256)
@@ -1778,7 +1776,7 @@ pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
     dst
 }
 
-/// Store 256-bits of integer data from `a` into memory.
+/// Stores 256-bits of integer data from `a` into memory.
 /// 	`mem_addr` does not need to be aligned on any particular boundary.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_si256)
@@ -1790,7 +1788,7 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
     storeudq256(mem_addr as *mut i8, a.as_i8x32());
 }
 
-/// Load packed double-precision (64-bit) floating-point elements from memory
+/// Loads packed double-precision (64-bit) floating-point elements from memory
 /// into result using `mask` (elements are zeroed out when the high bit of the
 /// corresponding element is not set).
 ///
@@ -1803,7 +1801,7 @@ pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d
     maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
 }
 
-/// Store packed double-precision (64-bit) floating-point elements from `a`
+/// Stores packed double-precision (64-bit) floating-point elements from `a`
 /// into memory using `mask`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_pd)
@@ -1815,7 +1813,7 @@ pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d)
     maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
 }
 
-/// Load packed double-precision (64-bit) floating-point elements from memory
+/// Loads packed double-precision (64-bit) floating-point elements from memory
 /// into result using `mask` (elements are zeroed out when the high bit of the
 /// corresponding element is not set).
 ///
@@ -1828,7 +1826,7 @@ pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
     maskloadpd(mem_addr as *const i8, mask.as_i64x2())
 }
 
-/// Store packed double-precision (64-bit) floating-point elements from `a`
+/// Stores packed double-precision (64-bit) floating-point elements from `a`
 /// into memory using `mask`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_pd)
@@ -1840,7 +1838,7 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
     maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a);
 }
 
-/// Load packed single-precision (32-bit) floating-point elements from memory
+/// Loads packed single-precision (32-bit) floating-point elements from memory
 /// into result using `mask` (elements are zeroed out when the high bit of the
 /// corresponding element is not set).
 ///
@@ -1853,7 +1851,7 @@ pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256
     maskloadps256(mem_addr as *const i8, mask.as_i32x8())
 }
 
-/// Store packed single-precision (32-bit) floating-point elements from `a`
+/// Stores packed single-precision (32-bit) floating-point elements from `a`
 /// into memory using `mask`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_ps)
@@ -1865,7 +1863,7 @@ pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256)
     maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
 }
 
-/// Load packed single-precision (32-bit) floating-point elements from memory
+/// Loads packed single-precision (32-bit) floating-point elements from memory
 /// into result using `mask` (elements are zeroed out when the high bit of the
 /// corresponding element is not set).
 ///
@@ -1878,7 +1876,7 @@ pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
     maskloadps(mem_addr as *const i8, mask.as_i32x4())
 }
 
-/// Store packed single-precision (32-bit) floating-point elements from `a`
+/// Stores packed single-precision (32-bit) floating-point elements from `a`
 /// into memory using `mask`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_ps)
@@ -1891,7 +1889,7 @@ pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
 }
 
 /// Duplicate odd-indexed single-precision (32-bit) floating-point elements
-/// from `a`, and return the results.
+/// from `a`, and returns the results.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movehdup_ps)
 #[inline]
@@ -1903,7 +1901,7 @@ pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
 }
 
 /// Duplicate even-indexed single-precision (32-bit) floating-point elements
-/// from `a`, and return the results.
+/// from `a`, and returns the results.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_moveldup_ps)
 #[inline]
@@ -1915,7 +1913,7 @@ pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
 }
 
 /// Duplicate even-indexed double-precision (64-bit) floating-point elements
-/// from "a", and return the results.
+/// from "a", and returns the results.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movedup_pd)
 #[inline]
@@ -1926,7 +1924,7 @@ pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d {
     simd_shuffle4(a, a, [0, 0, 2, 2])
 }
 
-/// Load 256-bits of integer data from unaligned memory into result.
+/// Loads 256-bits of integer data from unaligned memory into result.
 /// This intrinsic may perform better than `_mm256_loadu_si256` when the
 /// data crosses a cache line boundary.
 ///
@@ -1981,8 +1979,8 @@ pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) {
     intrinsics::nontemporal_store(mem_addr as *mut __m256, a);
 }
 
-/// Compute the approximate reciprocal of packed single-precision (32-bit)
-/// floating-point elements in `a`, and return the results. The maximum
+/// Computes the approximate reciprocal of packed single-precision (32-bit)
+/// floating-point elements in `a`, and returns the results. The maximum
 /// relative error for this approximation is less than 1.5*2^-12.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ps)
@@ -1994,8 +1992,8 @@ pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 {
     vrcpps(a)
 }
 
-/// Compute the approximate reciprocal square root of packed single-precision
-/// (32-bit) floating-point elements in `a`, and return the results.
+/// Computes the approximate reciprocal square root of packed single-precision
+/// (32-bit) floating-point elements in `a`, and returns the results.
 /// The maximum relative error for this approximation is less than 1.5*2^-12.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ps)
@@ -2007,7 +2005,7 @@ pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
     vrsqrtps(a)
 }
 
-/// Unpack and interleave double-precision (64-bit) floating-point elements
+/// Unpacks and interleave double-precision (64-bit) floating-point elements
 /// from the high half of each 128-bit lane in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_pd)
@@ -2019,7 +2017,7 @@ pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
     simd_shuffle4(a, b, [1, 5, 3, 7])
 }
 
-/// Unpack and interleave single-precision (32-bit) floating-point elements
+/// Unpacks and interleave single-precision (32-bit) floating-point elements
 /// from the high half of each 128-bit lane in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_ps)
@@ -2031,7 +2029,7 @@ pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
     simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
 }
 
-/// Unpack and interleave double-precision (64-bit) floating-point elements
+/// Unpacks and interleave double-precision (64-bit) floating-point elements
 /// from the low half of each 128-bit lane in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_pd)
@@ -2043,7 +2041,7 @@ pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
     simd_shuffle4(a, b, [0, 4, 2, 6])
 }
 
-/// Unpack and interleave single-precision (32-bit) floating-point elements
+/// Unpacks and interleave single-precision (32-bit) floating-point elements
 /// from the low half of each 128-bit lane in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_ps)
@@ -2055,9 +2053,9 @@ pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
     simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
 }
 
-/// Compute the bitwise AND of 256 bits (representing integer data) in `a` and
+/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
 /// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
-/// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
+/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
 /// the result is zero, otherwise set `CF` to 0. Return the `ZF` value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testz_si256)
@@ -2069,9 +2067,9 @@ pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
     ptestz256(a.as_i64x4(), b.as_i64x4())
 }
 
-/// Compute the bitwise AND of 256 bits (representing integer data) in `a` and
+/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
 /// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
-/// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
+/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
 /// the result is zero, otherwise set `CF` to 0. Return the `CF` value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testc_si256)
@@ -2083,9 +2081,9 @@ pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
     ptestc256(a.as_i64x4(), b.as_i64x4())
 }
 
-/// Compute the bitwise AND of 256 bits (representing integer data) in `a` and
+/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
 /// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
-/// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
+/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
 /// the result is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and
 /// `CF` values are zero, otherwise return 0.
 ///
@@ -2098,7 +2096,7 @@ pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 {
     ptestnzc256(a.as_i64x4(), b.as_i64x4())
 }
 
-/// Compute the bitwise AND of 256 bits (representing double-precision (64-bit)
+/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
 /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
 /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
 /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
@@ -2115,7 +2113,7 @@ pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 {
     vtestzpd256(a, b)
 }
 
-/// Compute the bitwise AND of 256 bits (representing double-precision (64-bit)
+/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
 /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
 /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
 /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
@@ -2132,7 +2130,7 @@ pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 {
     vtestcpd256(a, b)
 }
 
-/// Compute the bitwise AND of 256 bits (representing double-precision (64-bit)
+/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
 /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
 /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
 /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
@@ -2150,7 +2148,7 @@ pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 {
     vtestnzcpd256(a, b)
 }
 
-/// Compute the bitwise AND of 128 bits (representing double-precision (64-bit)
+/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
 /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
 /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
 /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
@@ -2167,7 +2165,7 @@ pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
     vtestzpd(a, b)
 }
 
-/// Compute the bitwise AND of 128 bits (representing double-precision (64-bit)
+/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
 /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
 /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
 /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
@@ -2184,7 +2182,7 @@ pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
     vtestcpd(a, b)
 }
 
-/// Compute the bitwise AND of 128 bits (representing double-precision (64-bit)
+/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
 /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
 /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
 /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
@@ -2202,7 +2200,7 @@ pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 {
     vtestnzcpd(a, b)
 }
 
-/// Compute the bitwise AND of 256 bits (representing single-precision (32-bit)
+/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
 /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
 /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
 /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
@@ -2219,7 +2217,7 @@ pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 {
     vtestzps256(a, b)
 }
 
-/// Compute the bitwise AND of 256 bits (representing single-precision (32-bit)
+/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
 /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
 /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
 /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
@@ -2236,7 +2234,7 @@ pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 {
     vtestcps256(a, b)
 }
 
-/// Compute the bitwise AND of 256 bits (representing single-precision (32-bit)
+/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
 /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
 /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
 /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
@@ -2254,7 +2252,7 @@ pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 {
     vtestnzcps256(a, b)
 }
 
-/// Compute the bitwise AND of 128 bits (representing single-precision (32-bit)
+/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
 /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
 /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
 /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
@@ -2271,7 +2269,7 @@ pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
     vtestzps(a, b)
 }
 
-/// Compute the bitwise AND of 128 bits (representing single-precision (32-bit)
+/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
 /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
 /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
 /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
@@ -2288,7 +2286,7 @@ pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
     vtestcps(a, b)
 }
 
-/// Compute the bitwise AND of 128 bits (representing single-precision (32-bit)
+/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
 /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
 /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
 /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
@@ -2306,7 +2304,7 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
     vtestnzcps(a, b)
 }
 
-/// Set each bit of the returned mask based on the most significant bit of the
+/// Sets each bit of the returned mask based on the most significant bit of the
 /// corresponding packed double-precision (64-bit) floating-point element in
 /// `a`.
 ///
@@ -2319,7 +2317,7 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
     movmskpd256(a)
 }
 
-/// Set each bit of the returned mask based on the most significant bit of the
+/// Sets each bit of the returned mask based on the most significant bit of the
 /// corresponding packed single-precision (32-bit) floating-point element in
 /// `a`.
 ///
@@ -2332,7 +2330,7 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
     movmskps256(a)
 }
 
-/// Return vector of type __m256d with all elements set to zero.
+/// Returns vector of type __m256d with all elements set to zero.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_pd)
 #[inline]
@@ -2343,7 +2341,7 @@ pub unsafe fn _mm256_setzero_pd() -> __m256d {
     _mm256_set1_pd(0.0)
 }
 
-/// Return vector of type __m256 with all elements set to zero.
+/// Returns vector of type __m256 with all elements set to zero.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ps)
 #[inline]
@@ -2354,7 +2352,7 @@ pub unsafe fn _mm256_setzero_ps() -> __m256 {
     _mm256_set1_ps(0.0)
 }
 
-/// Return vector of type __m256i with all elements set to zero.
+/// Returns vector of type __m256i with all elements set to zero.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_si256)
 #[inline]
@@ -2365,7 +2363,7 @@ pub unsafe fn _mm256_setzero_si256() -> __m256i {
     _mm256_set1_epi8(0)
 }
 
-/// Set packed double-precision (64-bit) floating-point elements in returned
+/// Sets packed double-precision (64-bit) floating-point elements in returned
 /// vector with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_pd)
@@ -2378,7 +2376,7 @@ pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
     _mm256_setr_pd(d, c, b, a)
 }
 
-/// Set packed single-precision (32-bit) floating-point elements in returned
+/// Sets packed single-precision (32-bit) floating-point elements in returned
 /// vector with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ps)
@@ -2399,7 +2397,7 @@ pub unsafe fn _mm256_set_ps(
     _mm256_setr_ps(h, g, f, e, d, c, b, a)
 }
 
-/// Set packed 8-bit integers in returned vector with the supplied values in
+/// Sets packed 8-bit integers in returned vector with the supplied values in
 /// reverse order.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi8)
@@ -2450,7 +2448,7 @@ pub unsafe fn _mm256_set_epi8(
     )
 }
 
-/// Set packed 16-bit integers in returned vector with the supplied values.
+/// Sets packed 16-bit integers in returned vector with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi16)
 #[inline]
@@ -2484,7 +2482,7 @@ pub unsafe fn _mm256_set_epi16(
     )
 }
 
-/// Set packed 32-bit integers in returned vector with the supplied values.
+/// Sets packed 32-bit integers in returned vector with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi32)
 #[inline]
@@ -2504,7 +2502,7 @@ pub unsafe fn _mm256_set_epi32(
     _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0)
 }
 
-/// Set packed 64-bit integers in returned vector with the supplied values.
+/// Sets packed 64-bit integers in returned vector with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi64x)
 #[inline]
@@ -2515,7 +2513,7 @@ pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
     _mm256_setr_epi64x(d, c, b, a)
 }
 
-/// Set packed double-precision (64-bit) floating-point elements in returned
+/// Sets packed double-precision (64-bit) floating-point elements in returned
 /// vector with the supplied values in reverse order.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_pd)
@@ -2527,7 +2525,7 @@ pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
     __m256d(a, b, c, d)
 }
 
-/// Set packed single-precision (32-bit) floating-point elements in returned
+/// Sets packed single-precision (32-bit) floating-point elements in returned
 /// vector with the supplied values in reverse order.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ps)
@@ -2548,7 +2546,7 @@ pub unsafe fn _mm256_setr_ps(
     __m256(a, b, c, d, e, f, g, h)
 }
 
-/// Set packed 8-bit integers in returned vector with the supplied values in
+/// Sets packed 8-bit integers in returned vector with the supplied values in
 /// reverse order.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi8)
@@ -2599,7 +2597,7 @@ pub unsafe fn _mm256_setr_epi8(
     ))
 }
 
-/// Set packed 16-bit integers in returned vector with the supplied values in
+/// Sets packed 16-bit integers in returned vector with the supplied values in
 /// reverse order.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi16)
@@ -2634,7 +2632,7 @@ pub unsafe fn _mm256_setr_epi16(
     ))
 }
 
-/// Set packed 32-bit integers in returned vector with the supplied values in
+/// Sets packed 32-bit integers in returned vector with the supplied values in
 /// reverse order.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi32)
@@ -2655,7 +2653,7 @@ pub unsafe fn _mm256_setr_epi32(
     mem::transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
 }
 
-/// Set packed 64-bit integers in returned vector with the supplied values in
+/// Sets packed 64-bit integers in returned vector with the supplied values in
 /// reverse order.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi64x)
@@ -2667,7 +2665,7 @@ pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
     mem::transmute(i64x4::new(a, b, c, d))
 }
 
-/// Broadcast double-precision (64-bit) floating-point value `a` to all
+/// Broadcasts double-precision (64-bit) floating-point value `a` to all
 /// elements of returned vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_pd)
@@ -2679,7 +2677,7 @@ pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d {
     _mm256_setr_pd(a, a, a, a)
 }
 
-/// Broadcast single-precision (32-bit) floating-point value `a` to all
+/// Broadcasts single-precision (32-bit) floating-point value `a` to all
 /// elements of returned vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ps)
@@ -2691,7 +2689,7 @@ pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 {
     _mm256_setr_ps(a, a, a, a, a, a, a, a)
 }
 
-/// Broadcast 8-bit integer `a` to all elements of returned vector.
+/// Broadcasts 8-bit integer `a` to all elements of returned vector.
 /// This intrinsic may generate the `vpbroadcastb`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi8)
@@ -2711,7 +2709,7 @@ pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i {
     )
 }
 
-/// Broadcast 16-bit integer `a` to all all elements of returned vector.
+/// Broadcasts 16-bit integer `a` to all all elements of returned vector.
 /// This intrinsic may generate the `vpbroadcastw`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi16)
@@ -2725,7 +2723,7 @@ pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i {
     _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
 }
 
-/// Broadcast 32-bit integer `a` to all elements of returned vector.
+/// Broadcasts 32-bit integer `a` to all elements of returned vector.
 /// This intrinsic may generate the `vpbroadcastd`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi32)
@@ -2737,7 +2735,7 @@ pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i {
     _mm256_setr_epi32(a, a, a, a, a, a, a, a)
 }
 
-/// Broadcast 64-bit integer `a` to all elements of returned vector.
+/// Broadcasts 64-bit integer `a` to all elements of returned vector.
 /// This intrinsic may generate the `vpbroadcastq`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi64x)
@@ -2950,7 +2948,7 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
     simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3])
 }
 
-/// Return vector of type `__m256` with undefined elements.
+/// Returns vector of type `__m256` with undefined elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ps)
 #[inline]
@@ -2962,7 +2960,7 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 {
     mem::MaybeUninit::<__m256>::uninitialized().into_initialized()
 }
 
-/// Return vector of type `__m256d` with undefined elements.
+/// Returns vector of type `__m256d` with undefined elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_pd)
 #[inline]
@@ -2974,7 +2972,7 @@ pub unsafe fn _mm256_undefined_pd() -> __m256d {
     mem::MaybeUninit::<__m256d>::uninitialized().into_initialized()
 }
 
-/// Return vector of type __m256i with undefined elements.
+/// Returns vector of type __m256i with undefined elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_si256)
 #[inline]
@@ -2986,7 +2984,7 @@ pub unsafe fn _mm256_undefined_si256() -> __m256i {
     mem::MaybeUninit::<__m256i>::uninitialized().into_initialized()
 }
 
-/// Set packed __m256 returned vector with the supplied values.
+/// Sets packed __m256 returned vector with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_m128)
 #[inline]
@@ -2997,7 +2995,7 @@ pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
     simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Set packed __m256d returned vector with the supplied values.
+/// Sets packed __m256d returned vector with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_m128d)
 #[inline]
@@ -3010,7 +3008,7 @@ pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
     mem::transmute(_mm256_set_m128(hi, lo))
 }
 
-/// Set packed __m256i returned vector with the supplied values.
+/// Sets packed __m256i returned vector with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_m128i)
 #[inline]
@@ -3023,7 +3021,7 @@ pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
     mem::transmute(_mm256_set_m128(hi, lo))
 }
 
-/// Set packed __m256 returned vector with the supplied values.
+/// Sets packed __m256 returned vector with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_m128)
 #[inline]
@@ -3034,7 +3032,7 @@ pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
     _mm256_set_m128(hi, lo)
 }
 
-/// Set packed __m256d returned vector with the supplied values.
+/// Sets packed __m256d returned vector with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_m128d)
 #[inline]
@@ -3045,7 +3043,7 @@ pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
     _mm256_set_m128d(hi, lo)
 }
 
-/// Set packed __m256i returned vector with the supplied values.
+/// Sets packed __m256i returned vector with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_m128i)
 #[inline]
@@ -3056,7 +3054,7 @@ pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
     _mm256_set_m128i(hi, lo)
 }
 
-/// Load two 128-bit values (composed of 4 packed single-precision (32-bit)
+/// Loads two 128-bit values (composed of 4 packed single-precision (32-bit)
 /// floating-point elements) from memory, and combine them into a 256-bit
 /// value.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
@@ -3071,7 +3069,7 @@ pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m2
     _mm256_insertf128_ps(a, _mm_loadu_ps(hiaddr), 1)
 }
 
-/// Load two 128-bit values (composed of 2 packed double-precision (64-bit)
+/// Loads two 128-bit values (composed of 2 packed double-precision (64-bit)
 /// floating-point elements) from memory, and combine them into a 256-bit
 /// value.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
@@ -3086,7 +3084,7 @@ pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m
     _mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1)
 }
 
-/// Load two 128-bit values (composed of integer data) from memory, and combine
+/// Loads two 128-bit values (composed of integer data) from memory, and combine
 /// them into a 256-bit value.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 ///
@@ -3100,7 +3098,7 @@ pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i
     _mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1)
 }
 
-/// Store the high and low 128-bit halves (each composed of 4 packed
+/// Stores the high and low 128-bit halves (each composed of 4 packed
 /// single-precision (32-bit) floating-point elements) from `a` into memory two
 /// different 128-bit locations.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
@@ -3117,7 +3115,7 @@ pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256)
     _mm_storeu_ps(hiaddr, hi);
 }
 
-/// Store the high and low 128-bit halves (each composed of 2 packed
+/// Stores the high and low 128-bit halves (each composed of 2 packed
 /// double-precision (64-bit) floating-point elements) from `a` into memory two
 /// different 128-bit locations.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
@@ -3134,7 +3132,7 @@ pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256
     _mm_storeu_pd(hiaddr, hi);
 }
 
-/// Store the high and low 128-bit halves (each composed of integer data) from
+/// Stores the high and low 128-bit halves (each composed of integer data) from
 /// `a` into memory two different 128-bit locations.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 ///
diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs
index 1043e96b95a..6a08d10a9ff 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs
@@ -59,7 +59,7 @@ pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i {
     mem::transmute(pabsb(a.as_i8x32()))
 }
 
-/// Add packed 64-bit integers in `a` and `b`.
+/// Adds packed 64-bit integers in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi64)
 #[inline]
@@ -70,7 +70,7 @@ pub unsafe fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(simd_add(a.as_i64x4(), b.as_i64x4()))
 }
 
-/// Add packed 32-bit integers in `a` and `b`.
+/// Adds packed 32-bit integers in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi32)
 #[inline]
@@ -81,7 +81,7 @@ pub unsafe fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(simd_add(a.as_i32x8(), b.as_i32x8()))
 }
 
-/// Add packed 16-bit integers in `a` and `b`.
+/// Adds packed 16-bit integers in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi16)
 #[inline]
@@ -92,7 +92,7 @@ pub unsafe fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(simd_add(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Add packed 8-bit integers in `a` and `b`.
+/// Adds packed 8-bit integers in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi8)
 #[inline]
@@ -103,7 +103,7 @@ pub unsafe fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(simd_add(a.as_i8x32(), b.as_i8x32()))
 }
 
-/// Add packed 8-bit integers in `a` and `b` using saturation.
+/// Adds packed 8-bit integers in `a` and `b` using saturation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi8)
 #[inline]
@@ -114,7 +114,7 @@ pub unsafe fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(paddsb(a.as_i8x32(), b.as_i8x32()))
 }
 
-/// Add packed 16-bit integers in `a` and `b` using saturation.
+/// Adds packed 16-bit integers in `a` and `b` using saturation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi16)
 #[inline]
@@ -125,7 +125,7 @@ pub unsafe fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(paddsw(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
+/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu8)
 #[inline]
@@ -136,7 +136,7 @@ pub unsafe fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(paddusb(a.as_u8x32(), b.as_u8x32()))
 }
 
-/// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
+/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu16)
 #[inline]
@@ -147,8 +147,8 @@ pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(paddusw(a.as_u16x16(), b.as_u16x16()))
 }
 
-/// Concatenate pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
-/// result, shift the result right by `n` bytes, and return the low 16 bytes.
+/// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
+/// result, shifts the result right by `n` bytes, and returns the low 16 bytes.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_alignr_epi8)
 #[inline]
@@ -158,12 +158,12 @@ pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i {
     let n = n as u32;
-    // If palignr is shifting the pair of vectors more than the size of two
+    // If `palignr` is shifting the pair of vectors more than the size of two
     // lanes, emit zero.
     if n > 32 {
         return _mm256_set1_epi8(0);
     }
-    // If palignr is shifting the pair of input vectors more than one lane,
+    // If `palignr` is shifting the pair of input vectors more than one lane,
     // but less than two lanes, convert to shifting in zeroes.
     let (a, b, n) = if n > 16 {
         (_mm256_set1_epi8(0), a, n - 16)
@@ -308,7 +308,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i {
     mem::transmute(r)
 }
 
-/// Compute the bitwise AND of 256 bits (representing integer data)
+/// Computes the bitwise AND of 256 bits (representing integer data)
 /// in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_si256)
@@ -320,7 +320,7 @@ pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(simd_and(a.as_i64x4(), b.as_i64x4()))
 }
 
-/// Compute the bitwise NOT of 256 bits (representing integer data)
+/// Computes the bitwise NOT of 256 bits (representing integer data)
 /// in `a` and then AND with `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_andnot_si256)
@@ -336,7 +336,7 @@ pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
     ))
 }
 
-/// Average packed unsigned 16-bit integers in `a` and `b`.
+/// Averages packed unsigned 16-bit integers in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu16)
 #[inline]
@@ -347,7 +347,7 @@ pub unsafe fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pavgw(a.as_u16x16(), b.as_u16x16()))
 }
 
-/// Average packed unsigned 8-bit integers in `a` and `b`.
+/// Averages packed unsigned 8-bit integers in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu8)
 #[inline]
@@ -358,7 +358,7 @@ pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pavgb(a.as_u8x32(), b.as_u8x32()))
 }
 
-/// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`.
+/// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi32)
 #[inline]
@@ -394,7 +394,7 @@ pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
     mem::transmute(r)
 }
 
-/// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`.
+/// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi32)
 #[inline]
@@ -459,7 +459,7 @@ pub unsafe fn _mm256_blend_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
     mem::transmute(r)
 }
 
-/// Blend packed 16-bit integers from `a` and `b` using control mask `imm8`.
+/// Blends packed 16-bit integers from `a` and `b` using control mask `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi16)
 #[inline]
@@ -564,7 +564,7 @@ pub unsafe fn _mm256_blend_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
     mem::transmute(r)
 }
 
-/// Blend packed 8-bit integers from `a` and `b` using `mask`.
+/// Blends packed 8-bit integers from `a` and `b` using `mask`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_epi8)
 #[inline]
@@ -575,7 +575,7 @@ pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m25
     mem::transmute(pblendvb(a.as_i8x32(), b.as_i8x32(), mask.as_i8x32()))
 }
 
-/// Broadcast the low packed 8-bit integer from `a` to all elements of
+/// Broadcasts the low packed 8-bit integer from `a` to all elements of
 /// the 128-bit returned value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastb_epi8)
@@ -589,7 +589,7 @@ pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
     mem::transmute::<i8x16, _>(ret)
 }
 
-/// Broadcast the low packed 8-bit integer from `a` to all elements of
+/// Broadcasts the low packed 8-bit integer from `a` to all elements of
 /// the 256-bit returned value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastb_epi8)
@@ -603,9 +603,9 @@ pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
     mem::transmute::<i8x32, _>(ret)
 }
 
-// NB: simd_shuffle4 with integer data types for `a` and `b` is
-// often compiled to vbroadcastss.
-/// Broadcast the low packed 32-bit integer from `a` to all elements of
+// N.B., `simd_shuffle4` with integer data types for `a` and `b` is
+// often compiled to `vbroadcastss`.
+/// Broadcasts the low packed 32-bit integer from `a` to all elements of
 /// the 128-bit returned value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastd_epi32)
@@ -619,9 +619,9 @@ pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
     mem::transmute::<i32x4, _>(ret)
 }
 
-// NB: simd_shuffle4 with integer data types for `a` and `b` is
-// often compiled to vbroadcastss.
-/// Broadcast the low packed 32-bit integer from `a` to all elements of
+// N.B., `simd_shuffle4`` with integer data types for `a` and `b` is
+// often compiled to `vbroadcastss`.
+/// Broadcasts the low packed 32-bit integer from `a` to all elements of
 /// the 256-bit returned value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastd_epi32)
@@ -635,7 +635,7 @@ pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
     mem::transmute::<i32x8, _>(ret)
 }
 
-/// Broadcast the low packed 64-bit integer from `a` to all elements of
+/// Broadcasts the low packed 64-bit integer from `a` to all elements of
 /// the 128-bit returned value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastq_epi64)
@@ -649,9 +649,9 @@ pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
     mem::transmute::<i64x2, _>(ret)
 }
 
-// NB: simd_shuffle4 with integer data types for `a` and `b` is
-// often compiled to vbroadcastsd.
-/// Broadcast the low packed 64-bit integer from `a` to all elements of
+// N.B. `simd_shuffle4` with integer data types for `a` and `b` is
+// often compiled to `vbroadcastsd`.
+/// Broadcasts the low packed 64-bit integer from `a` to all elements of
 /// the 256-bit returned value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastq_epi64)
@@ -665,7 +665,7 @@ pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
     mem::transmute::<i64x4, _>(ret)
 }
 
-/// Broadcast the low double-precision (64-bit) floating-point element
+/// Broadcasts the low double-precision (64-bit) floating-point element
 /// from `a` to all elements of the 128-bit returned value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastsd_pd)
@@ -677,7 +677,7 @@ pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
     simd_shuffle2(a, _mm_setzero_pd(), [0_u32; 2])
 }
 
-/// Broadcast the low double-precision (64-bit) floating-point element
+/// Broadcasts the low double-precision (64-bit) floating-point element
 /// from `a` to all elements of the 256-bit returned value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsd_pd)
@@ -689,9 +689,9 @@ pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
     simd_shuffle4(a, _mm_setzero_pd(), [0_u32; 4])
 }
 
-// NB: broadcastsi128_si256 is often compiled to vinsertf128 or
-// vbroadcastf128.
-/// Broadcast 128 bits of integer data from a to all 128-bit lanes in
+// N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or
+// `vbroadcastf128`.
+/// Broadcasts 128 bits of integer data from a to all 128-bit lanes in
 /// the 256-bit returned value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsi128_si256)
@@ -704,7 +704,7 @@ pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
     mem::transmute::<i64x4, _>(ret)
 }
 
-/// Broadcast the low single-precision (32-bit) floating-point element
+/// Broadcasts the low single-precision (32-bit) floating-point element
 /// from `a` to all elements of the 128-bit returned value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastss_ps)
@@ -716,7 +716,7 @@ pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 {
     simd_shuffle4(a, _mm_setzero_ps(), [0_u32; 4])
 }
 
-/// Broadcast the low single-precision (32-bit) floating-point element
+/// Broadcasts the low single-precision (32-bit) floating-point element
 /// from `a` to all elements of the 256-bit returned value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastss_ps)
@@ -728,7 +728,7 @@ pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
     simd_shuffle8(a, _mm_setzero_ps(), [0_u32; 8])
 }
 
-/// Broadcast the low packed 16-bit integer from a to all elements of
+/// Broadcasts the low packed 16-bit integer from a to all elements of
 /// the 128-bit returned value
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastw_epi16)
@@ -742,7 +742,7 @@ pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
     mem::transmute::<i16x8, _>(ret)
 }
 
-/// Broadcast the low packed 16-bit integer from a to all elements of
+/// Broadcasts the low packed 16-bit integer from a to all elements of
 /// the 256-bit returned value
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastw_epi16)
@@ -756,7 +756,7 @@ pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
     mem::transmute::<i16x16, _>(ret)
 }
 
-/// Compare packed 64-bit integers in `a` and `b` for equality.
+/// Compares packed 64-bit integers in `a` and `b` for equality.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi64)
 #[inline]
@@ -767,7 +767,7 @@ pub unsafe fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4()))
 }
 
-/// Compare packed 32-bit integers in `a` and `b` for equality.
+/// Compares packed 32-bit integers in `a` and `b` for equality.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi32)
 #[inline]
@@ -778,7 +778,7 @@ pub unsafe fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8()))
 }
 
-/// Compare packed 16-bit integers in `a` and `b` for equality.
+/// Compares packed 16-bit integers in `a` and `b` for equality.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi16)
 #[inline]
@@ -789,7 +789,7 @@ pub unsafe fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Compare packed 8-bit integers in `a` and `b` for equality.
+/// Compares packed 8-bit integers in `a` and `b` for equality.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi8)
 #[inline]
@@ -800,7 +800,7 @@ pub unsafe fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32()))
 }
 
-/// Compare packed 64-bit integers in `a` and `b` for greater-than.
+/// Compares packed 64-bit integers in `a` and `b` for greater-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi64)
 #[inline]
@@ -811,7 +811,7 @@ pub unsafe fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4()))
 }
 
-/// Compare packed 32-bit integers in `a` and `b` for greater-than.
+/// Compares packed 32-bit integers in `a` and `b` for greater-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi32)
 #[inline]
@@ -822,7 +822,7 @@ pub unsafe fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8()))
 }
 
-/// Compare packed 16-bit integers in `a` and `b` for greater-than.
+/// Compares packed 16-bit integers in `a` and `b` for greater-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi16)
 #[inline]
@@ -833,7 +833,7 @@ pub unsafe fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Compare packed 8-bit integers in `a` and `b` for greater-than.
+/// Compares packed 8-bit integers in `a` and `b` for greater-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi8)
 #[inline]
@@ -916,8 +916,8 @@ pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
     mem::transmute::<i64x4, _>(simd_cast(v32))
 }
 
-/// Zero extend packed unsigned 16-bit integers in `a` to packed 32-bit
-/// integers, and store the results in dst.
+/// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit
+/// integers, and stores the results in `dst`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_epi32)
 #[inline]
@@ -992,7 +992,7 @@ pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
     mem::transmute::<i64x4, _>(simd_cast(v32))
 }
 
-/// Extract 128 bits (of integer data) from `a` selected with `imm8`.
+/// Extracts 128 bits (of integer data) from `a` selected with `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extracti128_si256)
 #[inline]
@@ -1013,7 +1013,7 @@ pub unsafe fn _mm256_extracti128_si256(a: __m256i, imm8: i32) -> __m128i {
     mem::transmute(dst)
 }
 
-/// Horizontally add adjacent pairs of 16-bit integers in `a` and `b`.
+/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi16)
 #[inline]
@@ -1024,7 +1024,7 @@ pub unsafe fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(phaddw(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Horizontally add adjacent pairs of 32-bit integers in `a` and `b`.
+/// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi32)
 #[inline]
@@ -1035,7 +1035,7 @@ pub unsafe fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(phaddd(a.as_i32x8(), b.as_i32x8()))
 }
 
-/// Horizontally add adjacent pairs of 16-bit integers in `a` and `b`
+/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`
 /// using saturation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadds_epi16)
@@ -1081,7 +1081,7 @@ pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(phsubsw(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1105,7 +1105,7 @@ pub unsafe fn _mm_i32gather_epi32(slice: *const i32, offsets: __m128i, scale: i3
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1136,7 +1136,7 @@ pub unsafe fn _mm_mask_i32gather_epi32(
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1160,7 +1160,7 @@ pub unsafe fn _mm256_i32gather_epi32(slice: *const i32, offsets: __m256i, scale:
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1191,7 +1191,7 @@ pub unsafe fn _mm256_mask_i32gather_epi32(
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1214,7 +1214,7 @@ pub unsafe fn _mm_i32gather_ps(slice: *const f32, offsets: __m128i, scale: i32)
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1242,7 +1242,7 @@ pub unsafe fn _mm_mask_i32gather_ps(
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1265,7 +1265,7 @@ pub unsafe fn _mm256_i32gather_ps(slice: *const f32, offsets: __m256i, scale: i3
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1293,7 +1293,7 @@ pub unsafe fn _mm256_mask_i32gather_ps(
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1317,7 +1317,7 @@ pub unsafe fn _mm_i32gather_epi64(slice: *const i64, offsets: __m128i, scale: i3
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1348,7 +1348,7 @@ pub unsafe fn _mm_mask_i32gather_epi64(
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1372,7 +1372,7 @@ pub unsafe fn _mm256_i32gather_epi64(slice: *const i64, offsets: __m128i, scale:
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1403,7 +1403,7 @@ pub unsafe fn _mm256_mask_i32gather_epi64(
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1426,7 +1426,7 @@ pub unsafe fn _mm_i32gather_pd(slice: *const f64, offsets: __m128i, scale: i32)
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1454,7 +1454,7 @@ pub unsafe fn _mm_mask_i32gather_pd(
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1477,7 +1477,7 @@ pub unsafe fn _mm256_i32gather_pd(slice: *const f64, offsets: __m128i, scale: i3
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1505,7 +1505,7 @@ pub unsafe fn _mm256_mask_i32gather_pd(
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1529,7 +1529,7 @@ pub unsafe fn _mm_i64gather_epi32(slice: *const i32, offsets: __m128i, scale: i3
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1560,7 +1560,7 @@ pub unsafe fn _mm_mask_i64gather_epi32(
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1584,7 +1584,7 @@ pub unsafe fn _mm256_i64gather_epi32(slice: *const i32, offsets: __m256i, scale:
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1615,7 +1615,7 @@ pub unsafe fn _mm256_mask_i64gather_epi32(
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1638,7 +1638,7 @@ pub unsafe fn _mm_i64gather_ps(slice: *const f32, offsets: __m128i, scale: i32)
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1666,7 +1666,7 @@ pub unsafe fn _mm_mask_i64gather_ps(
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1689,7 +1689,7 @@ pub unsafe fn _mm256_i64gather_ps(slice: *const f32, offsets: __m256i, scale: i3
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1717,7 +1717,7 @@ pub unsafe fn _mm256_mask_i64gather_ps(
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1741,7 +1741,7 @@ pub unsafe fn _mm_i64gather_epi64(slice: *const i64, offsets: __m128i, scale: i3
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1772,7 +1772,7 @@ pub unsafe fn _mm_mask_i64gather_epi64(
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1796,7 +1796,7 @@ pub unsafe fn _mm256_i64gather_epi64(slice: *const i64, offsets: __m256i, scale:
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1827,7 +1827,7 @@ pub unsafe fn _mm256_mask_i64gather_epi64(
     mem::transmute(r)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1850,7 +1850,7 @@ pub unsafe fn _mm_i64gather_pd(slice: *const f64, offsets: __m128i, scale: i32)
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1878,7 +1878,7 @@ pub unsafe fn _mm_mask_i64gather_pd(
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8.
 ///
@@ -1901,7 +1901,7 @@ pub unsafe fn _mm256_i64gather_pd(slice: *const f64, offsets: __m256i, scale: i3
     constify_imm8!(scale, call)
 }
 
-/// Return values from `slice` at offsets determined by `offsets * scale`,
+/// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
@@ -1929,7 +1929,7 @@ pub unsafe fn _mm256_mask_i64gather_pd(
     constify_imm8!(scale, call)
 }
 
-/// Copy `a` to `dst`, then insert 128 bits (of integer data) from `b` at the
+/// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the
 /// location specified by `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_inserti128_si256)
@@ -1951,7 +1951,7 @@ pub unsafe fn _mm256_inserti128_si256(a: __m256i, b: __m128i, imm8: i32) -> __m2
     mem::transmute(dst)
 }
 
-/// Multiply packed signed 16-bit integers in `a` and `b`, producing
+/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
 /// intermediate signed 32-bit integers. Horizontally add adjacent pairs
 /// of intermediate 32-bit integers.
 ///
@@ -1964,7 +1964,7 @@ pub unsafe fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pmaddwd(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Vertically multiply each unsigned 8-bit integer from `a` with the
+/// Vertically multiplies each unsigned 8-bit integer from `a` with the
 /// corresponding signed 8-bit integer from `b`, producing intermediate
 /// signed 16-bit integers. Horizontally add adjacent pairs of intermediate
 /// signed 16-bit integers
@@ -1978,7 +1978,7 @@ pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32()))
 }
 
-/// Load packed 32-bit integers from memory pointed by `mem_addr` using `mask`
+/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
 /// (elements are zeroed out when the highest bit is not set in the
 /// corresponding element).
 ///
@@ -1991,7 +1991,7 @@ pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i
     mem::transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
 }
 
-/// Load packed 32-bit integers from memory pointed by `mem_addr` using `mask`
+/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
 /// (elements are zeroed out when the highest bit is not set in the
 /// corresponding element).
 ///
@@ -2004,7 +2004,7 @@ pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m2
     mem::transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8()))
 }
 
-/// Load packed 64-bit integers from memory pointed by `mem_addr` using `mask`
+/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
 /// (elements are zeroed out when the highest bit is not set in the
 /// corresponding element).
 ///
@@ -2017,7 +2017,7 @@ pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i
     mem::transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
 }
 
-/// Load packed 64-bit integers from memory pointed by `mem_addr` using `mask`
+/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
 /// (elements are zeroed out when the highest bit is not set in the
 /// corresponding element).
 ///
@@ -2030,7 +2030,7 @@ pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m2
     mem::transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4()))
 }
 
-/// Store packed 32-bit integers from `a` into memory pointed by `mem_addr`
+/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
 /// using `mask` (elements are not stored when the highest bit is not set
 /// in the corresponding element).
 ///
@@ -2043,7 +2043,7 @@ pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i)
     maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
 }
 
-/// Store packed 32-bit integers from `a` into memory pointed by `mem_addr`
+/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
 /// using `mask` (elements are not stored when the highest bit is not set
 /// in the corresponding element).
 ///
@@ -2056,7 +2056,7 @@ pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m25
     maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8())
 }
 
-/// Store packed 64-bit integers from `a` into memory pointed by `mem_addr`
+/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
 /// using `mask` (elements are not stored when the highest bit is not set
 /// in the corresponding element).
 ///
@@ -2069,7 +2069,7 @@ pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i)
     maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
 }
 
-/// Store packed 64-bit integers from `a` into memory pointed by `mem_addr`
+/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
 /// using `mask` (elements are not stored when the highest bit is not set
 /// in the corresponding element).
 ///
@@ -2082,7 +2082,7 @@ pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m25
     maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4())
 }
 
-/// Compare packed 16-bit integers in `a` and `b`, and return the packed
+/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
 /// maximum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi16)
@@ -2094,7 +2094,7 @@ pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pmaxsw(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Compare packed 32-bit integers in `a` and `b`, and return the packed
+/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
 /// maximum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi32)
@@ -2106,7 +2106,7 @@ pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pmaxsd(a.as_i32x8(), b.as_i32x8()))
 }
 
-/// Compare packed 8-bit integers in `a` and `b`, and return the packed
+/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
 /// maximum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi8)
@@ -2118,7 +2118,7 @@ pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pmaxsb(a.as_i8x32(), b.as_i8x32()))
 }
 
-/// Compare packed unsigned 16-bit integers in `a` and `b`, and return
+/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
 /// the packed maximum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu16)
@@ -2130,7 +2130,7 @@ pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pmaxuw(a.as_u16x16(), b.as_u16x16()))
 }
 
-/// Compare packed unsigned 32-bit integers in `a` and `b`, and return
+/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
 /// the packed maximum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu32)
@@ -2142,7 +2142,7 @@ pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pmaxud(a.as_u32x8(), b.as_u32x8()))
 }
 
-/// Compare packed unsigned 8-bit integers in `a` and `b`, and return
+/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
 /// the packed maximum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu8)
@@ -2154,7 +2154,7 @@ pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pmaxub(a.as_u8x32(), b.as_u8x32()))
 }
 
-/// Compare packed 16-bit integers in `a` and `b`, and return the packed
+/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
 /// minimum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi16)
@@ -2166,7 +2166,7 @@ pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pminsw(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Compare packed 32-bit integers in `a` and `b`, and return the packed
+/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
 /// minimum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi32)
@@ -2178,7 +2178,7 @@ pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pminsd(a.as_i32x8(), b.as_i32x8()))
 }
 
-/// Compare packed 8-bit integers in `a` and `b`, and return the packed
+/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
 /// minimum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi8)
@@ -2190,7 +2190,7 @@ pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pminsb(a.as_i8x32(), b.as_i8x32()))
 }
 
-/// Compare packed unsigned 16-bit integers in `a` and `b`, and return
+/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
 /// the packed minimum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu16)
@@ -2202,7 +2202,7 @@ pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pminuw(a.as_u16x16(), b.as_u16x16()))
 }
 
-/// Compare packed unsigned 32-bit integers in `a` and `b`, and return
+/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
 /// the packed minimum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu32)
@@ -2214,7 +2214,7 @@ pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pminud(a.as_u32x8(), b.as_u32x8()))
 }
 
-/// Compare packed unsigned 8-bit integers in `a` and `b`, and return
+/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
 /// the packed minimum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu8)
@@ -2226,7 +2226,7 @@ pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pminub(a.as_u8x32(), b.as_u8x32()))
 }
 
-/// Create mask from the most significant bit of each 8-bit element in `a`,
+/// Creates mask from the most significant bit of each 8-bit element in `a`,
 /// return the result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movemask_epi8)
@@ -2238,8 +2238,8 @@ pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 {
     pmovmskb(a.as_i8x32())
 }
 
-/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned
-/// 8-bit integers in `a` compared to those in `b`, and store the 16-bit
+/// Computes the sum of absolute differences (SADs) of quadruplets of unsigned
+/// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit
 /// results in dst. Eight SADs are performed for each 128-bit lane using one
 /// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is
 /// selected from `b` starting at on the offset specified in `imm8`. Eight
@@ -2264,10 +2264,10 @@ pub unsafe fn _mm256_mpsadbw_epu8(a: __m256i, b: __m256i, imm8: i32) -> __m256i
     mem::transmute(r)
 }
 
-/// Multiply the low 32-bit integers from each packed 64-bit element in
+/// Multiplies the low 32-bit integers from each packed 64-bit element in
 /// `a` and `b`
 ///
-/// Return the 64-bit results.
+/// Returns the 64-bit results.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epi32)
 #[inline]
@@ -2278,10 +2278,10 @@ pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pmuldq(a.as_i32x8(), b.as_i32x8()))
 }
 
-/// Multiply the low unsigned 32-bit integers from each packed 64-bit
+/// Multiplies the low unsigned 32-bit integers from each packed 64-bit
 /// element in `a` and `b`
 ///
-/// Return the unsigned 64-bit results.
+/// Returns the unsigned 64-bit results.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epu32)
 #[inline]
@@ -2292,7 +2292,7 @@ pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pmuludq(a.as_u32x8(), b.as_u32x8()))
 }
 
-/// Multiply the packed 16-bit integers in `a` and `b`, producing
+/// Multiplies the packed 16-bit integers in `a` and `b`, producing
 /// intermediate 32-bit integers and returning the high 16 bits of the
 /// intermediate integers.
 ///
@@ -2305,7 +2305,7 @@ pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pmulhw(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Multiply the packed unsigned 16-bit integers in `a` and `b`, producing
+/// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing
 /// intermediate 32-bit integers and returning the high 16 bits of the
 /// intermediate integers.
 ///
@@ -2318,8 +2318,8 @@ pub unsafe fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pmulhuw(a.as_u16x16(), b.as_u16x16()))
 }
 
-/// Multiply the packed 16-bit integers in `a` and `b`, producing
-/// intermediate 32-bit integers, and return the low 16 bits of the
+/// Multiplies the packed 16-bit integers in `a` and `b`, producing
+/// intermediate 32-bit integers, and returns the low 16 bits of the
 /// intermediate integers
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi16)
@@ -2331,8 +2331,8 @@ pub unsafe fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(simd_mul(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Multiply the packed 32-bit integers in `a` and `b`, producing
-/// intermediate 64-bit integers, and return the low 16 bits of the
+/// Multiplies the packed 32-bit integers in `a` and `b`, producing
+/// intermediate 64-bit integers, and returns the low 16 bits of the
 /// intermediate integers
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi32)
@@ -2344,7 +2344,7 @@ pub unsafe fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(simd_mul(a.as_i32x8(), b.as_i32x8()))
 }
 
-/// Multiply packed 16-bit integers in `a` and `b`, producing
+/// Multiplies packed 16-bit integers in `a` and `b`, producing
 /// intermediate signed 32-bit integers. Truncate each intermediate
 /// integer to the 18 most significant bits, round by adding 1, and
 /// return bits `[16:1]`.
@@ -2358,7 +2358,7 @@ pub unsafe fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Compute the bitwise OR of 256 bits (representing integer data) in `a`
+/// Computes the bitwise OR of 256 bits (representing integer data) in `a`
 /// and `b`
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_si256)
@@ -2370,7 +2370,7 @@ pub unsafe fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(simd_or(a.as_i32x8(), b.as_i32x8()))
 }
 
-/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
+/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using signed saturation
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi16)
@@ -2382,7 +2382,7 @@ pub unsafe fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(packsswb(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using signed saturation
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi32)
@@ -2394,7 +2394,7 @@ pub unsafe fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(packssdw(a.as_i32x8(), b.as_i32x8()))
 }
 
-/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
+/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using unsigned saturation
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi16)
@@ -2406,7 +2406,7 @@ pub unsafe fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(packuswb(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using unsigned saturation
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi32)
@@ -2488,7 +2488,7 @@ pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(r)
 }
 
-/// Shuffle 128-bits of integer data selected by `imm8` from `a` and `b`.
+/// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2x128_si256)
 #[inline]
@@ -2507,7 +2507,7 @@ pub unsafe fn _mm256_permute2x128_si256(a: __m256i, b: __m256i, imm8: i32) -> __
     mem::transmute(constify_imm8!(imm8, call))
 }
 
-/// Shuffle 64-bit floating-point elements in `a` across lanes using the
+/// Shuffles 64-bit floating-point elements in `a` across lanes using the
 /// control in `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_pd)
@@ -2562,7 +2562,7 @@ pub unsafe fn _mm256_permute4x64_pd(a: __m256d, imm8: i32) -> __m256d {
     }
 }
 
-/// Shuffle eight 32-bit foating-point elements in `a` across lanes using
+/// Shuffles eight 32-bit foating-point elements in `a` across lanes using
 /// the corresponding 32-bit integer index in `idx`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar8x32_ps)
@@ -2574,7 +2574,7 @@ pub unsafe fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
     permps(a, idx.as_i32x8())
 }
 
-/// Compute the absolute differences of packed unsigned 8-bit integers in `a`
+/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
 /// and `b`, then horizontally sum each consecutive 8 differences to
 /// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
 /// integers in the low 16 bits of the 64-bit return value
@@ -2588,7 +2588,7 @@ pub unsafe fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(psadbw(a.as_u8x32(), b.as_u8x32()))
 }
 
-/// Shuffle bytes from `a` according to the content of `b`.
+/// Shuffles bytes from `a` according to the content of `b`.
 ///
 /// The last 4 bits of each byte of `b` are used as addresses into the 32 bytes
 /// of `a`.
@@ -2627,7 +2627,7 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(pshufb(a.as_u8x32(), b.as_u8x32()))
 }
 
-/// Shuffle 32-bit integers in 128-bit lanes of `a` using the control in
+/// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in
 /// `imm8`.
 ///
 /// ```rust
@@ -2730,7 +2730,7 @@ pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(r)
 }
 
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of `a` using
+/// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using
 /// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied
 /// to the output.
 ///
@@ -2791,7 +2791,7 @@ pub unsafe fn _mm256_shufflehi_epi16(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(r)
 }
 
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of `a` using
+/// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using
 /// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied
 /// to the output.
 ///
@@ -2852,8 +2852,8 @@ pub unsafe fn _mm256_shufflelo_epi16(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(r)
 }
 
-/// Negate packed 16-bit integers in `a` when the corresponding signed
-/// 16-bit integer in `b` is negative, and return the results.
+/// Negates packed 16-bit integers in `a` when the corresponding signed
+/// 16-bit integer in `b` is negative, and returns the results.
 /// Results are zeroed out when the corresponding element in `b` is zero.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi16)
@@ -2865,8 +2865,8 @@ pub unsafe fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(psignw(a.as_i16x16(), b.as_i16x16()))
 }
 
-/// Negate packed 32-bit integers in `a` when the corresponding signed
-/// 32-bit integer in `b` is negative, and return the results.
+/// Negates packed 32-bit integers in `a` when the corresponding signed
+/// 32-bit integer in `b` is negative, and returns the results.
 /// Results are zeroed out when the corresponding element in `b` is zero.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi32)
@@ -2878,8 +2878,8 @@ pub unsafe fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(psignd(a.as_i32x8(), b.as_i32x8()))
 }
 
-/// Negate packed 8-bit integers in `a` when the corresponding signed
-/// 8-bit integer in `b` is negative, and return the results.
+/// Negates packed 8-bit integers in `a` when the corresponding signed
+/// 8-bit integer in `b` is negative, and returns the results.
 /// Results are zeroed out when the corresponding element in `b` is zero.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi8)
@@ -2891,8 +2891,8 @@ pub unsafe fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(psignb(a.as_i8x32(), b.as_i8x32()))
 }
 
-/// Shift packed 16-bit integers in `a` left by `count` while
-/// shifting in zeros, and return the result
+/// Shifts packed 16-bit integers in `a` left by `count` while
+/// shifting in zeros, and returns the result
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi16)
 #[inline]
@@ -2903,8 +2903,8 @@ pub unsafe fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
     mem::transmute(psllw(a.as_i16x16(), count.as_i16x8()))
 }
 
-/// Shift packed 32-bit integers in `a` left by `count` while
-/// shifting in zeros, and return the result
+/// Shifts packed 32-bit integers in `a` left by `count` while
+/// shifting in zeros, and returns the result
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi32)
 #[inline]
@@ -2915,8 +2915,8 @@ pub unsafe fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
     mem::transmute(pslld(a.as_i32x8(), count.as_i32x4()))
 }
 
-/// Shift packed 64-bit integers in `a` left by `count` while
-/// shifting in zeros, and return the result
+/// Shifts packed 64-bit integers in `a` left by `count` while
+/// shifting in zeros, and returns the result
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi64)
 #[inline]
@@ -2927,7 +2927,7 @@ pub unsafe fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
     mem::transmute(psllq(a.as_i64x4(), count.as_i64x2()))
 }
 
-/// Shift packed 16-bit integers in `a` left by `imm8` while
+/// Shifts packed 16-bit integers in `a` left by `imm8` while
 /// shifting in zeros, return the results;
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi16)
@@ -2939,7 +2939,7 @@ pub unsafe fn _mm256_slli_epi16(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(pslliw(a.as_i16x16(), imm8))
 }
 
-/// Shift packed 32-bit integers in `a` left by `imm8` while
+/// Shifts packed 32-bit integers in `a` left by `imm8` while
 /// shifting in zeros, return the results;
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi32)
@@ -2951,7 +2951,7 @@ pub unsafe fn _mm256_slli_epi32(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(psllid(a.as_i32x8(), imm8))
 }
 
-/// Shift packed 64-bit integers in `a` left by `imm8` while
+/// Shifts packed 64-bit integers in `a` left by `imm8` while
 /// shifting in zeros, return the results;
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi64)
@@ -2963,7 +2963,7 @@ pub unsafe fn _mm256_slli_epi64(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(pslliq(a.as_i64x4(), imm8))
 }
 
-/// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
+/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_si256)
 #[inline]
@@ -2981,7 +2981,7 @@ pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(constify_imm8!(imm8 * 8, call))
 }
 
-/// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
+/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bslli_epi128)
 #[inline]
@@ -2999,9 +2999,9 @@ pub unsafe fn _mm256_bslli_epi128(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(constify_imm8!(imm8 * 8, call))
 }
 
-/// Shift packed 32-bit integers in `a` left by the amount
+/// Shifts packed 32-bit integers in `a` left by the amount
 /// specified by the corresponding element in `count` while
-/// shifting in zeros, and return the result.
+/// shifting in zeros, and returns the result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi32)
 #[inline]
@@ -3012,9 +3012,9 @@ pub unsafe fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
     mem::transmute(psllvd(a.as_i32x4(), count.as_i32x4()))
 }
 
-/// Shift packed 32-bit integers in `a` left by the amount
+/// Shifts packed 32-bit integers in `a` left by the amount
 /// specified by the corresponding element in `count` while
-/// shifting in zeros, and return the result.
+/// shifting in zeros, and returns the result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi32)
 #[inline]
@@ -3025,9 +3025,9 @@ pub unsafe fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
     mem::transmute(psllvd256(a.as_i32x8(), count.as_i32x8()))
 }
 
-/// Shift packed 64-bit integers in `a` left by the amount
+/// Shifts packed 64-bit integers in `a` left by the amount
 /// specified by the corresponding element in `count` while
-/// shifting in zeros, and return the result.
+/// shifting in zeros, and returns the result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi64)
 #[inline]
@@ -3038,9 +3038,9 @@ pub unsafe fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
     mem::transmute(psllvq(a.as_i64x2(), count.as_i64x2()))
 }
 
-/// Shift packed 64-bit integers in `a` left by the amount
+/// Shifts packed 64-bit integers in `a` left by the amount
 /// specified by the corresponding element in `count` while
-/// shifting in zeros, and return the result.
+/// shifting in zeros, and returns the result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi64)
 #[inline]
@@ -3051,7 +3051,7 @@ pub unsafe fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
     mem::transmute(psllvq256(a.as_i64x4(), count.as_i64x4()))
 }
 
-/// Shift packed 16-bit integers in `a` right by `count` while
+/// Shifts packed 16-bit integers in `a` right by `count` while
 /// shifting in sign bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi16)
@@ -3063,7 +3063,7 @@ pub unsafe fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
     mem::transmute(psraw(a.as_i16x16(), count.as_i16x8()))
 }
 
-/// Shift packed 32-bit integers in `a` right by `count` while
+/// Shifts packed 32-bit integers in `a` right by `count` while
 /// shifting in sign bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi32)
@@ -3075,7 +3075,7 @@ pub unsafe fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
     mem::transmute(psrad(a.as_i32x8(), count.as_i32x4()))
 }
 
-/// Shift packed 16-bit integers in `a` right by `imm8` while
+/// Shifts packed 16-bit integers in `a` right by `imm8` while
 /// shifting in sign bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi16)
@@ -3087,7 +3087,7 @@ pub unsafe fn _mm256_srai_epi16(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(psraiw(a.as_i16x16(), imm8))
 }
 
-/// Shift packed 32-bit integers in `a` right by `imm8` while
+/// Shifts packed 32-bit integers in `a` right by `imm8` while
 /// shifting in sign bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi32)
@@ -3099,7 +3099,7 @@ pub unsafe fn _mm256_srai_epi32(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(psraid(a.as_i32x8(), imm8))
 }
 
-/// Shift packed 32-bit integers in `a` right by the amount specified by the
+/// Shifts packed 32-bit integers in `a` right by the amount specified by the
 /// corresponding element in `count` while shifting in sign bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srav_epi32)
@@ -3111,7 +3111,7 @@ pub unsafe fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
     mem::transmute(psravd(a.as_i32x4(), count.as_i32x4()))
 }
 
-/// Shift packed 32-bit integers in `a` right by the amount specified by the
+/// Shifts packed 32-bit integers in `a` right by the amount specified by the
 /// corresponding element in `count` while shifting in sign bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srav_epi32)
@@ -3123,7 +3123,7 @@ pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
     mem::transmute(psravd256(a.as_i32x8(), count.as_i32x8()))
 }
 
-/// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
+/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_si256)
 #[inline]
@@ -3141,7 +3141,7 @@ pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(constify_imm8!(imm8 * 8, call))
 }
 
-/// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
+/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bsrli_epi128)
 #[inline]
@@ -3159,7 +3159,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(constify_imm8!(imm8 * 8, call))
 }
 
-/// Shift packed 16-bit integers in `a` right by `count` while shifting in
+/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
 /// zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi16)
@@ -3171,7 +3171,7 @@ pub unsafe fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
     mem::transmute(psrlw(a.as_i16x16(), count.as_i16x8()))
 }
 
-/// Shift packed 32-bit integers in `a` right by `count` while shifting in
+/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
 /// zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi32)
@@ -3183,7 +3183,7 @@ pub unsafe fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
     mem::transmute(psrld(a.as_i32x8(), count.as_i32x4()))
 }
 
-/// Shift packed 64-bit integers in `a` right by `count` while shifting in
+/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
 /// zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi64)
@@ -3195,7 +3195,7 @@ pub unsafe fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
     mem::transmute(psrlq(a.as_i64x4(), count.as_i64x2()))
 }
 
-/// Shift packed 16-bit integers in `a` right by `imm8` while shifting in
+/// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in
 /// zeros
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi16)
@@ -3207,7 +3207,7 @@ pub unsafe fn _mm256_srli_epi16(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(psrliw(a.as_i16x16(), imm8))
 }
 
-/// Shift packed 32-bit integers in `a` right by `imm8` while shifting in
+/// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in
 /// zeros
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi32)
@@ -3219,7 +3219,7 @@ pub unsafe fn _mm256_srli_epi32(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(psrlid(a.as_i32x8(), imm8))
 }
 
-/// Shift packed 64-bit integers in `a` right by `imm8` while shifting in
+/// Shifts packed 64-bit integers in `a` right by `imm8` while shifting in
 /// zeros
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi64)
@@ -3231,7 +3231,7 @@ pub unsafe fn _mm256_srli_epi64(a: __m256i, imm8: i32) -> __m256i {
     mem::transmute(psrliq(a.as_i64x4(), imm8))
 }
 
-/// Shift packed 32-bit integers in `a` right by the amount specified by
+/// Shifts packed 32-bit integers in `a` right by the amount specified by
 /// the corresponding element in `count` while shifting in zeros,
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi32)
@@ -3243,7 +3243,7 @@ pub unsafe fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
     mem::transmute(psrlvd(a.as_i32x4(), count.as_i32x4()))
 }
 
-/// Shift packed 32-bit integers in `a` right by the amount specified by
+/// Shifts packed 32-bit integers in `a` right by the amount specified by
 /// the corresponding element in `count` while shifting in zeros,
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi32)
@@ -3255,7 +3255,7 @@ pub unsafe fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
     mem::transmute(psrlvd256(a.as_i32x8(), count.as_i32x8()))
 }
 
-/// Shift packed 64-bit integers in `a` right by the amount specified by
+/// Shifts packed 64-bit integers in `a` right by the amount specified by
 /// the corresponding element in `count` while shifting in zeros,
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi64)
@@ -3267,7 +3267,7 @@ pub unsafe fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
     mem::transmute(psrlvq(a.as_i64x2(), count.as_i64x2()))
 }
 
-/// Shift packed 64-bit integers in `a` right by the amount specified by
+/// Shifts packed 64-bit integers in `a` right by the amount specified by
 /// the corresponding element in `count` while shifting in zeros,
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi64)
@@ -3373,7 +3373,7 @@ pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(psubusb(a.as_u8x32(), b.as_u8x32()))
 }
 
-/// Unpack and interleave 8-bit integers from the high half of each
+/// Unpacks and interleave 8-bit integers from the high half of each
 /// 128-bit lane in `a` and `b`.
 ///
 /// ```rust
@@ -3427,7 +3427,7 @@ pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(r)
 }
 
-/// Unpack and interleave 8-bit integers from the low half of each
+/// Unpacks and interleave 8-bit integers from the low half of each
 /// 128-bit lane of `a` and `b`.
 ///
 /// ```rust
@@ -3480,7 +3480,7 @@ pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(r)
 }
 
-/// Unpack and interleave 16-bit integers from the high half of each
+/// Unpacks and interleave 16-bit integers from the high half of each
 /// 128-bit lane of `a` and `b`.
 ///
 /// ```rust
@@ -3527,7 +3527,7 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(r)
 }
 
-/// Unpack and interleave 16-bit integers from the low half of each
+/// Unpacks and interleave 16-bit integers from the low half of each
 /// 128-bit lane of `a` and `b`.
 ///
 /// ```rust
@@ -3575,7 +3575,7 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(r)
 }
 
-/// Unpack and interleave 32-bit integers from the high half of each
+/// Unpacks and interleave 32-bit integers from the high half of each
 /// 128-bit lane of `a` and `b`.
 ///
 /// ```rust
@@ -3612,7 +3612,7 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(r)
 }
 
-/// Unpack and interleave 32-bit integers from the low half of each
+/// Unpacks and interleave 32-bit integers from the low half of each
 /// 128-bit lane of `a` and `b`.
 ///
 /// ```rust
@@ -3649,7 +3649,7 @@ pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(r)
 }
 
-/// Unpack and interleave 64-bit integers from the high half of each
+/// Unpacks and interleave 64-bit integers from the high half of each
 /// 128-bit lane of `a` and `b`.
 ///
 /// ```rust
@@ -3686,7 +3686,7 @@ pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(r)
 }
 
-/// Unpack and interleave 64-bit integers from the low half of each
+/// Unpacks and interleave 64-bit integers from the low half of each
 /// 128-bit lane of `a` and `b`.
 ///
 /// ```rust
@@ -3723,7 +3723,7 @@ pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(r)
 }
 
-/// Compute the bitwise XOR of 256 bits (representing integer data)
+/// Computes the bitwise XOR of 256 bits (representing integer data)
 /// in `a` and `b`
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_si256)
@@ -3735,7 +3735,7 @@ pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
     mem::transmute(simd_xor(a.as_i64x4(), b.as_i64x4()))
 }
 
-/// Extract an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit
+/// Extracts an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit
 /// integer containing the zero-extended integer data.
 ///
 /// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
@@ -3751,7 +3751,7 @@ pub unsafe fn _mm256_extract_epi8(a: __m256i, imm8: i32) -> i8 {
     simd_extract(a.as_i8x32(), imm8)
 }
 
-/// Extract a 16-bit integer from `a`, selected with `imm8`. Returns a 32-bit
+/// Extracts a 16-bit integer from `a`, selected with `imm8`. Returns a 32-bit
 /// integer containing the zero-extended integer data.
 ///
 /// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
@@ -3767,7 +3767,7 @@ pub unsafe fn _mm256_extract_epi16(a: __m256i, imm8: i32) -> i16 {
     simd_extract(a.as_i16x16(), imm8)
 }
 
-/// Extract a 32-bit integer from `a`, selected with `imm8`.
+/// Extracts a 32-bit integer from `a`, selected with `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi32)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index 166ca179a52..4e04f0b5079 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -21,7 +21,7 @@ pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
     mem::transmute(simd_select(cmp, a, sub))
 }
 
-/// Compute the absolute value of packed 32-bit integers in `a`, and store the
+/// Computes the absolute value of packed 32-bit integers in `a`, and store the
 /// unsigned results in `dst` using writemask `k` (elements are copied from
 /// `src` when the corresponding mask bit is not set).
 ///
@@ -34,7 +34,7 @@ pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> _
     mem::transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
 }
 
-/// Compute the absolute value of packed 32-bit integers in `a`, and store the
+/// Computes the absolute value of packed 32-bit integers in `a`, and store the
 /// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
 /// the corresponding mask bit is not set).
 ///
@@ -48,7 +48,7 @@ pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
     mem::transmute(simd_select_bitmask(k, abs, zero))
 }
 
-/// Return vector of type `__m512i` with all elements set to zero.
+/// Returns vector of type `__m512i` with all elements set to zero.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512)
 #[inline]
@@ -59,7 +59,7 @@ pub unsafe fn _mm512_setzero_si512() -> __m512i {
     mem::zeroed()
 }
 
-/// Set packed 32-bit integers in `dst` with the supplied values in reverse
+/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
 /// order.
 #[inline]
 #[target_feature(enable = "avx512f")]
diff --git a/library/stdarch/crates/core_arch/src/x86/bmi1.rs b/library/stdarch/crates/core_arch/src/x86/bmi1.rs
index ece4d76dc3d..47a62d1f64b 100644
--- a/library/stdarch/crates/core_arch/src/x86/bmi1.rs
+++ b/library/stdarch/crates/core_arch/src/x86/bmi1.rs
@@ -50,7 +50,7 @@ pub unsafe fn _andn_u32(a: u32, b: u32) -> u32 {
     !a & b
 }
 
-/// Extract lowest set isolated bit.
+/// Extracts lowest set isolated bit.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u32)
 #[inline]
@@ -61,7 +61,7 @@ pub unsafe fn _blsi_u32(x: u32) -> u32 {
     x & x.wrapping_neg()
 }
 
-/// Get mask up to lowest set bit.
+/// Gets mask up to lowest set bit.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u32)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/bmi2.rs b/library/stdarch/crates/core_arch/src/x86/bmi2.rs
index 7215e5a41d2..d3b9434a458 100644
--- a/library/stdarch/crates/core_arch/src/x86/bmi2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/bmi2.rs
@@ -31,7 +31,7 @@ pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 {
     result as u32
 }
 
-/// Zero higher bits of `a` >= `index`.
+/// Zeroes higher bits of `a` >= `index`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u32)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/bswap.rs b/library/stdarch/crates/core_arch/src/x86/bswap.rs
index ee6d6615b14..7d51082ff49 100644
--- a/library/stdarch/crates/core_arch/src/x86/bswap.rs
+++ b/library/stdarch/crates/core_arch/src/x86/bswap.rs
@@ -5,7 +5,7 @@
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
-/// Return an integer with the reversed byte order of x
+/// Returns an integer with the reversed byte order of x
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/fma.rs b/library/stdarch/crates/core_arch/src/x86/fma.rs
index 4915c44f574..f88abf115de 100644
--- a/library/stdarch/crates/core_arch/src/x86/fma.rs
+++ b/library/stdarch/crates/core_arch/src/x86/fma.rs
@@ -23,7 +23,7 @@ use core_arch::x86::*;
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
-/// Multiply packed double-precision (64-bit) floating-point elements in `a`
+/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`, and add the intermediate result to packed elements in `c`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pd)
@@ -35,7 +35,7 @@ pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     vfmaddpd(a, b, c)
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements in `a`
+/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`, and add the intermediate result to packed elements in `c`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pd)
@@ -47,7 +47,7 @@ pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     vfmaddpd256(a, b, c)
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in `a`
+/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and add the intermediate result to packed elements in `c`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ps)
@@ -59,7 +59,7 @@ pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
     vfmaddps(a, b, c)
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in `a`
+/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and add the intermediate result to packed elements in `c`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ps)
@@ -71,9 +71,9 @@ pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     vfmaddps256(a, b, c)
 }
 
-/// Multiply the lower double-precision (64-bit) floating-point elements in
+/// Multiplies the lower double-precision (64-bit) floating-point elements in
 /// `a` and `b`, and add the intermediate result to the lower element in `c`.
-/// Store the result in the lower element of the returned value, and copy the
+/// Stores the result in the lower element of the returned value, and copy the
 /// upper element from `a` to the upper elements of the result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sd)
@@ -85,9 +85,9 @@ pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     vfmaddsd(a, b, c)
 }
 
-/// Multiply the lower single-precision (32-bit) floating-point elements in
+/// Multiplies the lower single-precision (32-bit) floating-point elements in
 /// `a` and `b`, and add the intermediate result to the lower element in `c`.
-/// Store the result in the lower element of the returned value, and copy the
+/// Stores the result in the lower element of the returned value, and copy the
 /// 3 upper elements from `a` to the upper elements of the result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ss)
@@ -99,7 +99,7 @@ pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
     vfmaddss(a, b, c)
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements in `a`
+/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`, and alternatively add and subtract packed elements in `c` to/from
 /// the intermediate result.
 ///
@@ -112,7 +112,7 @@ pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     vfmaddsubpd(a, b, c)
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements in `a`
+/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`, and alternatively add and subtract packed elements in `c` to/from
 /// the intermediate result.
 ///
@@ -125,7 +125,7 @@ pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
     vfmaddsubpd256(a, b, c)
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in `a`
+/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and alternatively add and subtract packed elements in `c` to/from
 /// the intermediate result.
 ///
@@ -138,7 +138,7 @@ pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
     vfmaddsubps(a, b, c)
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in `a`
+/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and alternatively add and subtract packed elements in `c` to/from
 /// the intermediate result.
 ///
@@ -151,7 +151,7 @@ pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     vfmaddsubps256(a, b, c)
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements in `a`
+/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`, and subtract packed elements in `c` from the intermediate result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_pd)
@@ -163,7 +163,7 @@ pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     vfmsubpd(a, b, c)
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements in `a`
+/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`, and subtract packed elements in `c` from the intermediate result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_pd)
@@ -175,7 +175,7 @@ pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     vfmsubpd256(a, b, c)
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in `a`
+/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and subtract packed elements in `c` from the intermediate result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ps)
@@ -187,7 +187,7 @@ pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
     vfmsubps(a, b, c)
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in `a`
+/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and subtract packed elements in `c` from the intermediate result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ps)
@@ -199,7 +199,7 @@ pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     vfmsubps256(a, b, c)
 }
 
-/// Multiply the lower double-precision (64-bit) floating-point elements in
+/// Multiplies the lower double-precision (64-bit) floating-point elements in
 /// `a` and `b`, and subtract the lower element in `c` from the intermediate
 /// result. Store the result in the lower element of the returned value, and
 /// copy the upper element from `a` to the upper elements of the result.
@@ -213,7 +213,7 @@ pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     vfmsubsd(a, b, c)
 }
 
-/// Multiply the lower single-precision (32-bit) floating-point elements in
+/// Multiplies the lower single-precision (32-bit) floating-point elements in
 /// `a` and `b`,  and subtract the lower element in `c` from the intermediate
 /// result. Store the result in the lower element of the returned value, and
 /// copy the 3 upper elements from `a` to the upper elements of the result.
@@ -227,7 +227,7 @@ pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
     vfmsubss(a, b, c)
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements in `a`
+/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`, and alternatively subtract and add packed elements in `c` from/to
 /// the intermediate result.
 ///
@@ -240,7 +240,7 @@ pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     vfmsubaddpd(a, b, c)
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements in `a`
+/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`, and alternatively subtract and add packed elements in `c` from/to
 /// the intermediate result.
 ///
@@ -253,7 +253,7 @@ pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
     vfmsubaddpd256(a, b, c)
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in `a`
+/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and alternatively subtract and add packed elements in `c` from/to
 /// the intermediate result.
 ///
@@ -266,7 +266,7 @@ pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
     vfmsubaddps(a, b, c)
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in `a`
+/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and alternatively subtract and add packed elements in `c` from/to
 /// the intermediate result.
 ///
@@ -279,7 +279,7 @@ pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     vfmsubaddps256(a, b, c)
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements in `a`
+/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`, and add the negated intermediate result to packed elements in `c`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_pd)
@@ -291,7 +291,7 @@ pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     vfnmaddpd(a, b, c)
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements in `a`
+/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`, and add the negated intermediate result to packed elements in `c`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_pd)
@@ -303,7 +303,7 @@ pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     vfnmaddpd256(a, b, c)
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in `a`
+/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and add the negated intermediate result to packed elements in `c`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ps)
@@ -315,7 +315,7 @@ pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
     vfnmaddps(a, b, c)
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in `a`
+/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and add the negated intermediate result to packed elements in `c`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ps)
@@ -327,7 +327,7 @@ pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     vfnmaddps256(a, b, c)
 }
 
-/// Multiply the lower double-precision (64-bit) floating-point elements in
+/// Multiplies the lower double-precision (64-bit) floating-point elements in
 /// `a` and `b`, and add the negated intermediate result to the lower element
 /// in `c`. Store the result in the lower element of the returned value, and
 /// copy the upper element from `a` to the upper elements of the result.
@@ -341,7 +341,7 @@ pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     vfnmaddsd(a, b, c)
 }
 
-/// Multiply the lower single-precision (32-bit) floating-point elements in
+/// Multiplies the lower single-precision (32-bit) floating-point elements in
 /// `a` and `b`, and add the negated intermediate result to the lower element
 /// in `c`. Store the result in the lower element of the returned value, and
 /// copy the 3 upper elements from `a` to the upper elements of the result.
@@ -355,7 +355,7 @@ pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
     vfnmaddss(a, b, c)
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements in `a`
+/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`, and subtract packed elements in `c` from the negated intermediate
 /// result.
 ///
@@ -368,7 +368,7 @@ pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     vfnmsubpd(a, b, c)
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements in `a`
+/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`, and subtract packed elements in `c` from the negated intermediate
 /// result.
 ///
@@ -381,7 +381,7 @@ pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     vfnmsubpd256(a, b, c)
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in `a`
+/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and subtract packed elements in `c` from the negated intermediate
 /// result.
 ///
@@ -394,7 +394,7 @@ pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
     vfnmsubps(a, b, c)
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in `a`
+/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and subtract packed elements in `c` from the negated intermediate
 /// result.
 ///
@@ -407,7 +407,7 @@ pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     vfnmsubps256(a, b, c)
 }
 
-/// Multiply the lower double-precision (64-bit) floating-point elements in
+/// Multiplies the lower double-precision (64-bit) floating-point elements in
 /// `a` and `b`, and subtract packed elements in `c` from the negated
 /// intermediate result. Store the result in the lower element of the returned
 /// value, and copy the upper element from `a` to the upper elements of the
@@ -422,7 +422,7 @@ pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     vfnmsubsd(a, b, c)
 }
 
-/// Multiply the lower single-precision (32-bit) floating-point elements in
+/// Multiplies the lower single-precision (32-bit) floating-point elements in
 /// `a` and `b`, and subtract packed elements in `c` from the negated
 /// intermediate result. Store the result in the lower element of the
 /// returned value, and copy the 3 upper elements from `a` to the upper
diff --git a/library/stdarch/crates/core_arch/src/x86/mmx.rs b/library/stdarch/crates/core_arch/src/x86/mmx.rs
index 82f085cf95f..6308fcd36d7 100644
--- a/library/stdarch/crates/core_arch/src/x86/mmx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mmx.rs
@@ -25,7 +25,7 @@ pub unsafe fn _mm_setzero_si64() -> __m64 {
     mem::transmute(0_i64)
 }
 
-/// Add packed 8-bit integers in `a` and `b`.
+/// Adds packed 8-bit integers in `a` and `b`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddb))]
@@ -33,7 +33,7 @@ pub unsafe fn _mm_add_pi8(a: __m64, b: __m64) -> __m64 {
     paddb(a, b)
 }
 
-/// Add packed 8-bit integers in `a` and `b`.
+/// Adds packed 8-bit integers in `a` and `b`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddb))]
@@ -41,7 +41,7 @@ pub unsafe fn _m_paddb(a: __m64, b: __m64) -> __m64 {
     _mm_add_pi8(a, b)
 }
 
-/// Add packed 16-bit integers in `a` and `b`.
+/// Adds packed 16-bit integers in `a` and `b`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddw))]
@@ -49,7 +49,7 @@ pub unsafe fn _mm_add_pi16(a: __m64, b: __m64) -> __m64 {
     paddw(a, b)
 }
 
-/// Add packed 16-bit integers in `a` and `b`.
+/// Adds packed 16-bit integers in `a` and `b`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddw))]
@@ -57,7 +57,7 @@ pub unsafe fn _m_paddw(a: __m64, b: __m64) -> __m64 {
     _mm_add_pi16(a, b)
 }
 
-/// Add packed 32-bit integers in `a` and `b`.
+/// Adds packed 32-bit integers in `a` and `b`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddd))]
@@ -65,7 +65,7 @@ pub unsafe fn _mm_add_pi32(a: __m64, b: __m64) -> __m64 {
     paddd(a, b)
 }
 
-/// Add packed 32-bit integers in `a` and `b`.
+/// Adds packed 32-bit integers in `a` and `b`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddd))]
@@ -73,7 +73,7 @@ pub unsafe fn _m_paddd(a: __m64, b: __m64) -> __m64 {
     _mm_add_pi32(a, b)
 }
 
-/// Add packed 8-bit integers in `a` and `b` using saturation.
+/// Adds packed 8-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddsb))]
@@ -81,7 +81,7 @@ pub unsafe fn _mm_adds_pi8(a: __m64, b: __m64) -> __m64 {
     paddsb(a, b)
 }
 
-/// Add packed 8-bit integers in `a` and `b` using saturation.
+/// Adds packed 8-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddsb))]
@@ -89,7 +89,7 @@ pub unsafe fn _m_paddsb(a: __m64, b: __m64) -> __m64 {
     _mm_adds_pi8(a, b)
 }
 
-/// Add packed 16-bit integers in `a` and `b` using saturation.
+/// Adds packed 16-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddsw))]
@@ -97,7 +97,7 @@ pub unsafe fn _mm_adds_pi16(a: __m64, b: __m64) -> __m64 {
     paddsw(a, b)
 }
 
-/// Add packed 16-bit integers in `a` and `b` using saturation.
+/// Adds packed 16-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddsw))]
@@ -105,7 +105,7 @@ pub unsafe fn _m_paddsw(a: __m64, b: __m64) -> __m64 {
     _mm_adds_pi16(a, b)
 }
 
-/// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
+/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddusb))]
@@ -113,7 +113,7 @@ pub unsafe fn _mm_adds_pu8(a: __m64, b: __m64) -> __m64 {
     paddusb(a, b)
 }
 
-/// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
+/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddusb))]
@@ -121,7 +121,7 @@ pub unsafe fn _m_paddusb(a: __m64, b: __m64) -> __m64 {
     _mm_adds_pu8(a, b)
 }
 
-/// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
+/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddusw))]
@@ -129,7 +129,7 @@ pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 {
     paddusw(a, b)
 }
 
-/// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
+/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddusw))]
@@ -257,7 +257,7 @@ pub unsafe fn _m_psubusw(a: __m64, b: __m64) -> __m64 {
     _mm_subs_pu16(a, b)
 }
 
-/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
+/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using signed saturation.
 ///
 /// Positive values greater than 0x7F are saturated to 0x7F. Negative values
@@ -269,7 +269,7 @@ pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
     packsswb(a, b)
 }
 
-/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using signed saturation.
 ///
 /// Positive values greater than 0x7F are saturated to 0x7F. Negative values
@@ -362,49 +362,49 @@ pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
     punpckldq(a, b)
 }
 
-/// Set packed 16-bit integers in dst with the supplied values.
+/// Sets packed 16-bit integers in dst with the supplied values.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_set_pi16(e3: i16, e2: i16, e1: i16, e0: i16) -> __m64 {
     _mm_setr_pi16(e0, e1, e2, e3)
 }
 
-/// Set packed 32-bit integers in dst with the supplied values.
+/// Sets packed 32-bit integers in dst with the supplied values.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 {
     _mm_setr_pi32(e0, e1)
 }
 
-/// Set packed 8-bit integers in dst with the supplied values.
+/// Sets packed 8-bit integers in dst with the supplied values.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_set_pi8(e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m64 {
     _mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7)
 }
 
-/// Broadcast 16-bit integer a to all all elements of dst.
+/// Broadcasts 16-bit integer a to all all elements of dst.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_set1_pi16(a: i16) -> __m64 {
     _mm_setr_pi16(a, a, a, a)
 }
 
-/// Broadcast 32-bit integer a to all all elements of dst.
+/// Broadcasts 32-bit integer a to all all elements of dst.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_set1_pi32(a: i32) -> __m64 {
     _mm_setr_pi32(a, a)
 }
 
-/// Broadcast 8-bit integer a to all all elements of dst.
+/// Broadcasts 8-bit integer a to all all elements of dst.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 {
     _mm_setr_pi8(a, a, a, a, a, a, a, a)
 }
 
-/// Set packed 16-bit integers in dst with the supplied values in reverse
+/// Sets packed 16-bit integers in dst with the supplied values in reverse
 /// order.
 #[inline]
 #[target_feature(enable = "mmx")]
@@ -412,7 +412,7 @@ pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 {
     mem::transmute(i16x4::new(e0, e1, e2, e3))
 }
 
-/// Set packed 32-bit integers in dst with the supplied values in reverse
+/// Sets packed 32-bit integers in dst with the supplied values in reverse
 /// order.
 #[inline]
 #[target_feature(enable = "mmx")]
@@ -420,7 +420,7 @@ pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 {
     mem::transmute(i32x2::new(e0, e1))
 }
 
-/// Set packed 8-bit integers in dst with the supplied values in reverse order.
+/// Sets packed 8-bit integers in dst with the supplied values in reverse order.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_setr_pi8(
@@ -456,7 +456,7 @@ pub unsafe fn _m_empty() {
     emms()
 }
 
-/// Copy 32-bit integer `a` to the lower elements of the return value, and zero
+/// Copies 32-bit integer `a` to the lower elements of the return value, and zero
 /// the upper element of the return value.
 #[inline]
 #[target_feature(enable = "mmx")]
diff --git a/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs
index c047b3b1cd6..26131e4e89e 100644
--- a/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs
+++ b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs
@@ -16,7 +16,7 @@ extern "C" {
     fn pclmulqdq(a: __m128i, round_key: __m128i, imm8: u8) -> __m128i;
 }
 
-/// Perform a carry-less multiplication of two 64-bit polynomials over the
+/// Performs a carry-less multiplication of two 64-bit polynomials over the
 /// finite field GF(2^k).
 ///
 /// The immediate byte is used for determining which halves of `a` and `b`
diff --git a/library/stdarch/crates/core_arch/src/x86/rdrand.rs b/library/stdarch/crates/core_arch/src/x86/rdrand.rs
index 90bb9454b0a..49abc3d0497 100644
--- a/library/stdarch/crates/core_arch/src/x86/rdrand.rs
+++ b/library/stdarch/crates/core_arch/src/x86/rdrand.rs
@@ -20,7 +20,7 @@ extern "unadjusted" {
 use stdsimd_test::assert_instr;
 
 /// Read a hardware generated 16-bit random value and store the result in val.
-/// Return 1 if a random value was generated, and 0 otherwise.
+/// Returns 1 if a random value was generated, and 0 otherwise.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand16_step)
 #[inline]
@@ -34,7 +34,7 @@ pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 {
 }
 
 /// Read a hardware generated 32-bit random value and store the result in val.
-/// Return 1 if a random value was generated, and 0 otherwise.
+/// Returns 1 if a random value was generated, and 0 otherwise.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand32_step)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/sha.rs b/library/stdarch/crates/core_arch/src/x86/sha.rs
index 98bf4707f8c..3ff4e89d23e 100644
--- a/library/stdarch/crates/core_arch/src/x86/sha.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sha.rs
@@ -23,7 +23,7 @@ extern "C" {
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
-/// Perform an intermediate calculation for the next four SHA1 message values
+/// Performs an intermediate calculation for the next four SHA1 message values
 /// (unsigned 32-bit integers) using previous message values from `a` and `b`,
 /// and returning the result.
 ///
@@ -36,7 +36,7 @@ pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(sha1msg1(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Perform the final calculation for the next four SHA1 message values
+/// Performs the final calculation for the next four SHA1 message values
 /// (unsigned 32-bit integers) using the intermediate result in `a` and the
 /// previous message values in `b`, and returns the result.
 ///
@@ -62,7 +62,7 @@ pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(sha1nexte(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D)
+/// Performs four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D)
 /// from `a` and some pre-computed sum of the next 4 round message values
 /// (unsigned 32-bit integers), and state variable E from `b`, and return the
 /// updated SHA1 state (A,B,C,D). `func` contains the logic functions and round
@@ -86,7 +86,7 @@ pub unsafe fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i, func: i32) -> __m128i
     mem::transmute(ret)
 }
 
-/// Perform an intermediate calculation for the next four SHA256 message values
+/// Performs an intermediate calculation for the next four SHA256 message values
 /// (unsigned 32-bit integers) using previous message values from `a` and `b`,
 /// and return the result.
 ///
@@ -99,7 +99,7 @@ pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(sha256msg1(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Perform the final calculation for the next four SHA256 message values
+/// Performs the final calculation for the next four SHA256 message values
 /// (unsigned 32-bit integers) using previous message values from `a` and `b`,
 /// and return the result.
 ///
@@ -112,7 +112,7 @@ pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(sha256msg2(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Perform 2 rounds of SHA256 operation using an initial SHA256 state
+/// Performs 2 rounds of SHA256 operation using an initial SHA256 state
 /// (C,D,G,H) from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a
 /// pre-computed sum of the next 2 round message values (unsigned 32-bit
 /// integers) and the corresponding round constants from `k`, and store the
diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index 22bdc118c2b..96ea08697c0 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -102,7 +102,7 @@ pub unsafe fn _mm_div_ps(a: __m128, b: __m128) -> __m128 {
     simd_div(a, b)
 }
 
-/// Return the square root of the first single-precision (32-bit)
+/// Returns the square root of the first single-precision (32-bit)
 /// floating-point element in `a`, the other elements are unchanged.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ss)
@@ -114,7 +114,7 @@ pub unsafe fn _mm_sqrt_ss(a: __m128) -> __m128 {
     sqrtss(a)
 }
 
-/// Return the square root of packed single-precision (32-bit) floating-point
+/// Returns the square root of packed single-precision (32-bit) floating-point
 /// elements in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ps)
@@ -126,7 +126,7 @@ pub unsafe fn _mm_sqrt_ps(a: __m128) -> __m128 {
     sqrtps(a)
 }
 
-/// Return the approximate reciprocal of the first single-precision
+/// Returns the approximate reciprocal of the first single-precision
 /// (32-bit) floating-point element in `a`, the other elements are unchanged.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ss)
@@ -138,7 +138,7 @@ pub unsafe fn _mm_rcp_ss(a: __m128) -> __m128 {
     rcpss(a)
 }
 
-/// Return the approximate reciprocal of packed single-precision (32-bit)
+/// Returns the approximate reciprocal of packed single-precision (32-bit)
 /// floating-point elements in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ps)
@@ -150,7 +150,7 @@ pub unsafe fn _mm_rcp_ps(a: __m128) -> __m128 {
     rcpps(a)
 }
 
-/// Return the approximate reciprocal square root of the fist single-precision
+/// Returns the approximate reciprocal square root of the fist single-precision
 /// (32-bit) floating-point elements in `a`, the other elements are unchanged.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ss)
@@ -162,7 +162,7 @@ pub unsafe fn _mm_rsqrt_ss(a: __m128) -> __m128 {
     rsqrtss(a)
 }
 
-/// Return the approximate reciprocal square root of packed single-precision
+/// Returns the approximate reciprocal square root of packed single-precision
 /// (32-bit) floating-point elements in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ps)
@@ -174,7 +174,7 @@ pub unsafe fn _mm_rsqrt_ps(a: __m128) -> __m128 {
     rsqrtps(a)
 }
 
-/// Compare the first single-precision (32-bit) floating-point element of `a`
+/// Compares the first single-precision (32-bit) floating-point element of `a`
 /// and `b`, and return the minimum value in the first element of the return
 /// value, the other elements are copied from `a`.
 ///
@@ -187,7 +187,7 @@ pub unsafe fn _mm_min_ss(a: __m128, b: __m128) -> __m128 {
     minss(a, b)
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in `a` and
+/// Compares packed single-precision (32-bit) floating-point elements in `a` and
 /// `b`, and return the corresponding minimum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ps)
@@ -199,7 +199,7 @@ pub unsafe fn _mm_min_ps(a: __m128, b: __m128) -> __m128 {
     minps(a, b)
 }
 
-/// Compare the first single-precision (32-bit) floating-point element of `a`
+/// Compares the first single-precision (32-bit) floating-point element of `a`
 /// and `b`, and return the maximum value in the first element of the return
 /// value, the other elements are copied from `a`.
 ///
@@ -212,7 +212,7 @@ pub unsafe fn _mm_max_ss(a: __m128, b: __m128) -> __m128 {
     maxss(a, b)
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in `a` and
+/// Compares packed single-precision (32-bit) floating-point elements in `a` and
 /// `b`, and return the corresponding maximum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ps)
@@ -298,7 +298,7 @@ pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 {
     mem::transmute(simd_xor(a, b))
 }
 
-/// Compare the lowest `f32` of both inputs for equality. The lowest 32 bits of
+/// Compares the lowest `f32` of both inputs for equality. The lowest 32 bits of
 /// the result will be `0xffffffff` if the two inputs are equal, or `0`
 /// otherwise. The upper 96 bits of the result are the upper 96 bits of `a`.
 ///
@@ -311,7 +311,7 @@ pub unsafe fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 {
     cmpss(a, b, 0)
 }
 
-/// Compare the lowest `f32` of both inputs for less than. The lowest 32 bits
+/// Compares the lowest `f32` of both inputs for less than. The lowest 32 bits
 /// of the result will be `0xffffffff` if `a.extract(0)` is less than
 /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
 /// upper 96 bits of `a`.
@@ -325,7 +325,7 @@ pub unsafe fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 {
     cmpss(a, b, 1)
 }
 
-/// Compare the lowest `f32` of both inputs for less than or equal. The lowest
+/// Compares the lowest `f32` of both inputs for less than or equal. The lowest
 /// 32 bits of the result will be `0xffffffff` if `a.extract(0)` is less than
 /// or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result
 /// are the upper 96 bits of `a`.
@@ -339,7 +339,7 @@ pub unsafe fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 {
     cmpss(a, b, 2)
 }
 
-/// Compare the lowest `f32` of both inputs for greater than. The lowest 32
+/// Compares the lowest `f32` of both inputs for greater than. The lowest 32
 /// bits of the result will be `0xffffffff` if `a.extract(0)` is greater
 /// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result
 /// are the upper 96 bits of `a`.
@@ -353,7 +353,7 @@ pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 {
     simd_shuffle4(a, cmpss(b, a, 1), [4, 1, 2, 3])
 }
 
-/// Compare the lowest `f32` of both inputs for greater than or equal. The
+/// Compares the lowest `f32` of both inputs for greater than or equal. The
 /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is
 /// greater than or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits
 /// of the result are the upper 96 bits of `a`.
@@ -367,7 +367,7 @@ pub unsafe fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 {
     simd_shuffle4(a, cmpss(b, a, 2), [4, 1, 2, 3])
 }
 
-/// Compare the lowest `f32` of both inputs for inequality. The lowest 32 bits
+/// Compares the lowest `f32` of both inputs for inequality. The lowest 32 bits
 /// of the result will be `0xffffffff` if `a.extract(0)` is not equal to
 /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
 /// upper 96 bits of `a`.
@@ -381,7 +381,7 @@ pub unsafe fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 {
     cmpss(a, b, 4)
 }
 
-/// Compare the lowest `f32` of both inputs for not-less-than. The lowest 32
+/// Compares the lowest `f32` of both inputs for not-less-than. The lowest 32
 /// bits of the result will be `0xffffffff` if `a.extract(0)` is not less than
 /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
 /// upper 96 bits of `a`.
@@ -395,7 +395,7 @@ pub unsafe fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 {
     cmpss(a, b, 5)
 }
 
-/// Compare the lowest `f32` of both inputs for not-less-than-or-equal. The
+/// Compares the lowest `f32` of both inputs for not-less-than-or-equal. The
 /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is not
 /// less than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 bits
 /// of the result are the upper 96 bits of `a`.
@@ -409,7 +409,7 @@ pub unsafe fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 {
     cmpss(a, b, 6)
 }
 
-/// Compare the lowest `f32` of both inputs for not-greater-than. The lowest 32
+/// Compares the lowest `f32` of both inputs for not-greater-than. The lowest 32
 /// bits of the result will be `0xffffffff` if `a.extract(0)` is not greater
 /// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are
 /// the upper 96 bits of `a`.
@@ -423,7 +423,7 @@ pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 {
     simd_shuffle4(a, cmpss(b, a, 5), [4, 1, 2, 3])
 }
 
-/// Compare the lowest `f32` of both inputs for not-greater-than-or-equal. The
+/// Compares the lowest `f32` of both inputs for not-greater-than-or-equal. The
 /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is not
 /// greater than or equal to `b.extract(0)`, or `0` otherwise. The upper 96
 /// bits of the result are the upper 96 bits of `a`.
@@ -437,7 +437,7 @@ pub unsafe fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 {
     simd_shuffle4(a, cmpss(b, a, 6), [4, 1, 2, 3])
 }
 
-/// Check if the lowest `f32` of both inputs are ordered. The lowest 32 bits of
+/// Checks if the lowest `f32` of both inputs are ordered. The lowest 32 bits of
 /// the result will be `0xffffffff` if neither of `a.extract(0)` or
 /// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result
 /// are the upper 96 bits of `a`.
@@ -451,7 +451,7 @@ pub unsafe fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 {
     cmpss(a, b, 7)
 }
 
-/// Check if the lowest `f32` of both inputs are unordered. The lowest 32 bits
+/// Checks if the lowest `f32` of both inputs are unordered. The lowest 32 bits
 /// of the result will be `0xffffffff` if any of `a.extract(0)` or
 /// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result
 /// are the upper 96 bits of `a`.
@@ -465,7 +465,7 @@ pub unsafe fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 {
     cmpss(a, b, 3)
 }
 
-/// Compare each of the four floats in `a` to the corresponding element in `b`.
+/// Compares each of the four floats in `a` to the corresponding element in `b`.
 /// The result in the output vector will be `0xffffffff` if the input elements
 /// were equal, or `0` otherwise.
 ///
@@ -478,7 +478,7 @@ pub unsafe fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 {
     cmpps(a, b, 0)
 }
 
-/// Compare each of the four floats in `a` to the corresponding element in `b`.
+/// Compares each of the four floats in `a` to the corresponding element in `b`.
 /// The result in the output vector will be `0xffffffff` if the input element
 /// in `a` is less than the corresponding element in `b`, or `0` otherwise.
 ///
@@ -491,7 +491,7 @@ pub unsafe fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 {
     cmpps(a, b, 1)
 }
 
-/// Compare each of the four floats in `a` to the corresponding element in `b`.
+/// Compares each of the four floats in `a` to the corresponding element in `b`.
 /// The result in the output vector will be `0xffffffff` if the input element
 /// in `a` is less than or equal to the corresponding element in `b`, or `0`
 /// otherwise.
@@ -505,7 +505,7 @@ pub unsafe fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 {
     cmpps(a, b, 2)
 }
 
-/// Compare each of the four floats in `a` to the corresponding element in `b`.
+/// Compares each of the four floats in `a` to the corresponding element in `b`.
 /// The result in the output vector will be `0xffffffff` if the input element
 /// in `a` is greater than the corresponding element in `b`, or `0` otherwise.
 ///
@@ -518,7 +518,7 @@ pub unsafe fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 {
     cmpps(b, a, 1)
 }
 
-/// Compare each of the four floats in `a` to the corresponding element in `b`.
+/// Compares each of the four floats in `a` to the corresponding element in `b`.
 /// The result in the output vector will be `0xffffffff` if the input element
 /// in `a` is greater than or equal to the corresponding element in `b`, or `0`
 /// otherwise.
@@ -532,9 +532,9 @@ pub unsafe fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 {
     cmpps(b, a, 2)
 }
 
-/// Compare each of the four floats in `a` to the corresponding element in `b`.
+/// Compares each of the four floats in `a` to the corresponding element in `b`.
 /// The result in the output vector will be `0xffffffff` if the input elements
-/// are *not* equal, or `0` otherwise.
+/// are **not** equal, or `0` otherwise.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_ps)
 #[inline]
@@ -545,9 +545,9 @@ pub unsafe fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 {
     cmpps(a, b, 4)
 }
 
-/// Compare each of the four floats in `a` to the corresponding element in `b`.
+/// Compares each of the four floats in `a` to the corresponding element in `b`.
 /// The result in the output vector will be `0xffffffff` if the input element
-/// in `a` is *not* less than the corresponding element in `b`, or `0`
+/// in `a` is **not** less than the corresponding element in `b`, or `0`
 /// otherwise.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_ps)
@@ -559,9 +559,9 @@ pub unsafe fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 {
     cmpps(a, b, 5)
 }
 
-/// Compare each of the four floats in `a` to the corresponding element in `b`.
+/// Compares each of the four floats in `a` to the corresponding element in `b`.
 /// The result in the output vector will be `0xffffffff` if the input element
-/// in `a` is *not* less than or equal to the corresponding element in `b`, or
+/// in `a` is **not** less than or equal to the corresponding element in `b`, or
 /// `0` otherwise.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_ps)
@@ -573,9 +573,9 @@ pub unsafe fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 {
     cmpps(a, b, 6)
 }
 
-/// Compare each of the four floats in `a` to the corresponding element in `b`.
+/// Compares each of the four floats in `a` to the corresponding element in `b`.
 /// The result in the output vector will be `0xffffffff` if the input element
-/// in `a` is *not* greater than the corresponding element in `b`, or `0`
+/// in `a` is **not** greater than the corresponding element in `b`, or `0`
 /// otherwise.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_ps)
@@ -587,9 +587,9 @@ pub unsafe fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 {
     cmpps(b, a, 5)
 }
 
-/// Compare each of the four floats in `a` to the corresponding element in `b`.
+/// Compares each of the four floats in `a` to the corresponding element in `b`.
 /// The result in the output vector will be `0xffffffff` if the input element
-/// in `a` is *not* greater than or equal to the corresponding element in `b`,
+/// in `a` is **not** greater than or equal to the corresponding element in `b`,
 /// or `0` otherwise.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_ps)
@@ -601,7 +601,7 @@ pub unsafe fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 {
     cmpps(b, a, 6)
 }
 
-/// Compare each of the four floats in `a` to the corresponding element in `b`.
+/// Compares each of the four floats in `a` to the corresponding element in `b`.
 /// Returns four floats that have one of two possible bit patterns. The element
 /// in the output vector will be `0xffffffff` if the input elements in `a` and
 /// `b` are ordered (i.e., neither of them is a NaN), or 0 otherwise.
@@ -615,7 +615,7 @@ pub unsafe fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 {
     cmpps(b, a, 7)
 }
 
-/// Compare each of the four floats in `a` to the corresponding element in `b`.
+/// Compares each of the four floats in `a` to the corresponding element in `b`.
 /// Returns four floats that have one of two possible bit patterns. The element
 /// in the output vector will be `0xffffffff` if the input elements in `a` and
 /// `b` are unordered (i.e., at least on of them is a NaN), or 0 otherwise.
@@ -629,7 +629,7 @@ pub unsafe fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 {
     cmpps(b, a, 3)
 }
 
-/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
+/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
 /// `1` if they are equal, or `0` otherwise.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_ss)
@@ -641,7 +641,7 @@ pub unsafe fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 {
     comieq_ss(a, b)
 }
 
-/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
+/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
 /// `1` if the value from `a` is less than the one from `b`, or `0` otherwise.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_ss)
@@ -653,7 +653,7 @@ pub unsafe fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 {
     comilt_ss(a, b)
 }
 
-/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
+/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
 /// `1` if the value from `a` is less than or equal to the one from `b`, or `0`
 /// otherwise.
 ///
@@ -666,7 +666,7 @@ pub unsafe fn _mm_comile_ss(a: __m128, b: __m128) -> i32 {
     comile_ss(a, b)
 }
 
-/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
+/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
 /// `1` if the value from `a` is greater than the one from `b`, or `0`
 /// otherwise.
 ///
@@ -679,7 +679,7 @@ pub unsafe fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 {
     comigt_ss(a, b)
 }
 
-/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
+/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
 /// `1` if the value from `a` is greater than or equal to the one from `b`, or
 /// `0` otherwise.
 ///
@@ -692,8 +692,8 @@ pub unsafe fn _mm_comige_ss(a: __m128, b: __m128) -> i32 {
     comige_ss(a, b)
 }
 
-/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
-/// `1` if they are *not* equal, or `0` otherwise.
+/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
+/// `1` if they are **not** equal, or `0` otherwise.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_ss)
 #[inline]
@@ -704,7 +704,7 @@ pub unsafe fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 {
     comineq_ss(a, b)
 }
 
-/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
+/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
 /// `1` if they are equal, or `0` otherwise. This instruction will not signal
 /// an exception if either argument is a quiet NaN.
 ///
@@ -717,7 +717,7 @@ pub unsafe fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 {
     ucomieq_ss(a, b)
 }
 
-/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
+/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
 /// `1` if the value from `a` is less than the one from `b`, or `0` otherwise.
 /// This instruction will not signal an exception if either argument is a quiet
 /// NaN.
@@ -731,7 +731,7 @@ pub unsafe fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 {
     ucomilt_ss(a, b)
 }
 
-/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
+/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
 /// `1` if the value from `a` is less than or equal to the one from `b`, or `0`
 /// otherwise. This instruction will not signal an exception if either argument
 /// is a quiet NaN.
@@ -745,7 +745,7 @@ pub unsafe fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 {
     ucomile_ss(a, b)
 }
 
-/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
+/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
 /// `1` if the value from `a` is greater than the one from `b`, or `0`
 /// otherwise. This instruction will not signal an exception if either argument
 /// is a quiet NaN.
@@ -759,7 +759,7 @@ pub unsafe fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 {
     ucomigt_ss(a, b)
 }
 
-/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
+/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
 /// `1` if the value from `a` is greater than or equal to the one from `b`, or
 /// `0` otherwise. This instruction will not signal an exception if either
 /// argument is a quiet NaN.
@@ -773,8 +773,8 @@ pub unsafe fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 {
     ucomige_ss(a, b)
 }
 
-/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
-/// `1` if they are *not* equal, or `0` otherwise. This instruction will not
+/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns
+/// `1` if they are **not** equal, or `0` otherwise. This instruction will not
 /// signal an exception if either argument is a quiet NaN.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_ss)
@@ -786,7 +786,7 @@ pub unsafe fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 {
     ucomineq_ss(a, b)
 }
 
-/// Convert the lowest 32 bit float in the input vector to a 32 bit integer.
+/// Converts the lowest 32 bit float in the input vector to a 32 bit integer.
 ///
 /// The result is rounded according to the current rounding mode. If the result
 /// cannot be represented as a 32 bit integer the result will be `0x8000_0000`
@@ -815,7 +815,7 @@ pub unsafe fn _mm_cvt_ss2si(a: __m128) -> i32 {
     _mm_cvtss_si32(a)
 }
 
-/// Convert the lowest 32 bit float in the input vector to a 32 bit integer
+/// Converts the lowest 32 bit float in the input vector to a 32 bit integer
 /// with
 /// truncation.
 ///
@@ -846,7 +846,7 @@ pub unsafe fn _mm_cvtt_ss2si(a: __m128) -> i32 {
     _mm_cvttss_si32(a)
 }
 
-/// Extract the lowest 32 bit float from the input vector.
+/// Extracts the lowest 32 bit float from the input vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_f32)
 #[inline]
@@ -858,7 +858,7 @@ pub unsafe fn _mm_cvtss_f32(a: __m128) -> f32 {
     simd_extract(a, 0)
 }
 
-/// Convert a 32 bit integer to a 32 bit float. The result vector is the input
+/// Converts a 32 bit integer to a 32 bit float. The result vector is the input
 /// vector `a` with the lowest 32 bit float replaced by the converted integer.
 ///
 /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 32 bit
@@ -985,7 +985,7 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
     ((z << 6) | (y << 4) | (x << 2) | w) as i32
 }
 
-/// Shuffle packed single-precision (32-bit) floating-point elements in `a` and
+/// Shuffles packed single-precision (32-bit) floating-point elements in `a` and
 /// `b` using `mask`.
 ///
 /// The lower half of result takes values from `a` and the higher half from
@@ -1043,7 +1043,7 @@ pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128, mask: u32) -> __m128 {
     }
 }
 
-/// Unpack and interleave single-precision (32-bit) floating-point elements
+/// Unpacks and interleave single-precision (32-bit) floating-point elements
 /// from the higher half of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_ps)
@@ -1055,7 +1055,7 @@ pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
     simd_shuffle4(a, b, [2, 6, 3, 7])
 }
 
-/// Unpack and interleave single-precision (32-bit) floating-point elements
+/// Unpacks and interleave single-precision (32-bit) floating-point elements
 /// from the lower half of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_ps)
@@ -1092,7 +1092,7 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
     simd_shuffle4(a, b, [0, 1, 4, 5])
 }
 
-/// Return a mask of the most significant bit of each element in `a`.
+/// Returns a mask of the most significant bit of each element in `a`.
 ///
 /// The mask is stored in the 4 least significant bits of the return value.
 /// All other bits are set to `0`.
@@ -1106,7 +1106,7 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
     movmskps(a)
 }
 
-/// Set the upper two single-precision floating-point values with 64 bits of
+/// Sets the upper two single-precision floating-point values with 64 bits of
 /// data loaded from the address `p`; the lower two values are passed through
 /// from `a`.
 #[inline]
@@ -1135,7 +1135,7 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
     simd_shuffle4(a, bb, [0, 1, 4, 5])
 }
 
-/// Load two floats from `p` into the lower half of a `__m128`. The upper half
+/// Loads two floats from `p` into the lower half of a `__m128`. The upper half
 /// is copied from the upper half of `a`.
 #[inline]
 #[target_feature(enable = "sse")]
@@ -1197,7 +1197,7 @@ pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
     _mm_load1_ps(p)
 }
 
-/// Load four `f32` values from *aligned* memory into a `__m128`. If the
+/// Loads four `f32` values from *aligned* memory into a `__m128`. If the
 /// pointer is not aligned to a 128-bit boundary (16 bytes) a general
 /// protection fault will be triggered (fatal program crash).
 ///
@@ -1216,7 +1216,7 @@ pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 {
     *(p as *const __m128)
 }
 
-/// Load four `f32` values from memory into a `__m128`. There are no
+/// Loads four `f32` values from memory into a `__m128`. There are no
 /// restrictions
 /// on memory alignment. For aligned memory
 /// [`_mm_load_ps`](fn._mm_load_ps.html)
@@ -1241,7 +1241,7 @@ pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
     dst
 }
 
-/// Load four `f32` values from aligned memory into a `__m128` in reverse
+/// Loads four `f32` values from aligned memory into a `__m128` in reverse
 /// order.
 ///
 /// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general
@@ -1271,7 +1271,7 @@ pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
     simd_shuffle4(a, a, [3, 2, 1, 0])
 }
 
-/// Store the upper half of `a` (64 bits) into memory.
+/// Stores the upper half of `a` (64 bits) into memory.
 ///
 /// This intrinsic corresponds to the `MOVHPS` instruction. The compiler may
 /// choose to generate an equivalent sequence of other instructions.
@@ -1305,7 +1305,7 @@ pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) {
     }
 }
 
-/// Store the lower half of `a` (64 bits) into memory.
+/// Stores the lower half of `a` (64 bits) into memory.
 ///
 /// This intrinsic corresponds to the `MOVQ` instruction. The compiler may
 /// choose to generate an equivalent sequence of other instructions.
@@ -1337,7 +1337,7 @@ pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) {
     }
 }
 
-/// Store the lowest 32 bit float of `a` into memory.
+/// Stores the lowest 32 bit float of `a` into memory.
 ///
 /// This intrinsic corresponds to the `MOVSS` instruction.
 ///
@@ -1350,7 +1350,7 @@ pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
     *p = simd_extract(a, 0);
 }
 
-/// Store the lowest 32 bit float of `a` repeated four times into *aligned*
+/// Stores the lowest 32 bit float of `a` repeated four times into *aligned*
 /// memory.
 ///
 /// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general
@@ -1389,7 +1389,7 @@ pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) {
     _mm_store1_ps(p, a);
 }
 
-/// Store four 32-bit floats into *aligned* memory.
+/// Stores four 32-bit floats into *aligned* memory.
 ///
 /// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general
 /// protection fault will be triggered (fatal program crash).
@@ -1409,7 +1409,7 @@ pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) {
     *(p as *mut __m128) = a;
 }
 
-/// Store four 32-bit floats into memory. There are no restrictions on memory
+/// Stores four 32-bit floats into memory. There are no restrictions on memory
 /// alignment. For aligned memory [`_mm_store_ps`](fn._mm_store_ps.html) may be
 /// faster.
 ///
@@ -1428,7 +1428,7 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
     );
 }
 
-/// Store four 32-bit floats into *aligned* memory in reverse order.
+/// Stores four 32-bit floats into *aligned* memory in reverse order.
 ///
 /// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general
 /// protection fault will be triggered (fatal program crash).
@@ -1454,7 +1454,7 @@ pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
     *(p as *mut __m128) = b;
 }
 
-/// Return a `__m128` with the first component from `b` and the remaining
+/// Returns a `__m128` with the first component from `b` and the remaining
 /// components from `a`.
 ///
 /// In other words for any `a` and `b`:
@@ -1471,7 +1471,7 @@ pub unsafe fn _mm_move_ss(a: __m128, b: __m128) -> __m128 {
     simd_shuffle4(a, b, [4, 1, 2, 3])
 }
 
-/// Perform a serializing operation on all store-to-memory instructions that
+/// Performs a serializing operation on all store-to-memory instructions that
 /// were issued prior to this instruction.
 ///
 /// Guarantees that every store instruction that precedes, in program order, is
@@ -1487,7 +1487,7 @@ pub unsafe fn _mm_sfence() {
     sfence()
 }
 
-/// Get the unsigned 32-bit value of the MXCSR control and status register.
+/// Gets the unsigned 32-bit value of the MXCSR control and status register.
 ///
 /// For more info see [`_mm_setcsr`](fn._mm_setcsr.html)
 ///
@@ -1502,7 +1502,7 @@ pub unsafe fn _mm_getcsr() -> u32 {
     result as u32
 }
 
-/// Set the MXCSR register with the 32-bit unsigned integer value.
+/// Sets the MXCSR register with the 32-bit unsigned integer value.
 ///
 /// This register constrols how SIMD instructions handle floating point
 /// operations. Modifying this register only affects the current thread.
@@ -1878,7 +1878,7 @@ pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) {
     pref!(strategy)
 }
 
-/// Return vector of type __m128 with undefined elements.
+/// Returns vector of type __m128 with undefined elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ps)
 #[inline]
@@ -2040,7 +2040,7 @@ pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
     intrinsics::nontemporal_store(mem_addr as *mut __m128, a);
 }
 
-/// Store 64-bits of integer data from a into memory using a non-temporal
+/// Stores 64-bits of integer data from a into memory using a non-temporal
 /// memory hint.
 #[inline]
 #[target_feature(enable = "sse,mmx")]
@@ -2437,7 +2437,7 @@ pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
     constify_imm8!(imm8, call)
 }
 
-/// Convert the two lower packed single-precision (32-bit) floating-point
+/// Converts the two lower packed single-precision (32-bit) floating-point
 /// elements in `a` to packed 32-bit integers with truncation.
 #[inline]
 #[target_feature(enable = "sse,mmx")]
@@ -2446,7 +2446,7 @@ pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
     cvttps2pi(a)
 }
 
-/// Convert the two lower packed single-precision (32-bit) floating-point
+/// Converts the two lower packed single-precision (32-bit) floating-point
 /// elements in `a` to packed 32-bit integers with truncation.
 #[inline]
 #[target_feature(enable = "sse,mmx")]
@@ -2455,7 +2455,7 @@ pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 {
     _mm_cvttps_pi32(a)
 }
 
-/// Convert the two lower packed single-precision (32-bit) floating-point
+/// Converts the two lower packed single-precision (32-bit) floating-point
 /// elements in `a` to packed 32-bit integers.
 #[inline]
 #[target_feature(enable = "sse,mmx")]
@@ -2464,7 +2464,7 @@ pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 {
     cvtps2pi(a)
 }
 
-/// Convert the two lower packed single-precision (32-bit) floating-point
+/// Converts the two lower packed single-precision (32-bit) floating-point
 /// elements in `a` to packed 32-bit integers.
 #[inline]
 #[target_feature(enable = "sse,mmx")]
@@ -2473,7 +2473,7 @@ pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 {
     _mm_cvtps_pi32(a)
 }
 
-/// Convert packed single-precision (32-bit) floating-point elements in `a` to
+/// Converts packed single-precision (32-bit) floating-point elements in `a` to
 /// packed 16-bit integers.
 #[inline]
 #[target_feature(enable = "sse,mmx")]
@@ -2485,7 +2485,7 @@ pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 {
     _mm_packs_pi32(b, c)
 }
 
-/// Convert packed single-precision (32-bit) floating-point elements in `a` to
+/// Converts packed single-precision (32-bit) floating-point elements in `a` to
 /// packed 8-bit integers, and returns theem in the lower 4 elements of the
 /// result.
 #[inline]
@@ -3784,7 +3784,7 @@ mod tests {
         let mut ofs = 0;
         let mut p = vals.as_mut_ptr();
 
-        // Make sure p is *not* aligned to 16-byte boundary
+        // Make sure p is **not** aligned to 16-byte boundary
         if (p as usize) & 0xf == 0 {
             ofs = 1;
             p = p.offset(1);
diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs
index d8700e538ed..df438f6547c 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@@ -10,7 +10,7 @@ use intrinsics;
 use mem;
 use ptr;
 
-/// Provide a hint to the processor that the code sequence is a spin-wait loop.
+/// Provides a hint to the processor that the code sequence is a spin-wait loop.
 ///
 /// This can help improve the performance and power consumption of spin-wait
 /// loops.
@@ -24,7 +24,7 @@ pub unsafe fn _mm_pause() {
     pause()
 }
 
-/// Invalidate and flush the cache line that contains `p` from all levels of
+/// Invalidates and flushes the cache line that contains `p` from all levels of
 /// the cache hierarchy.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflush)
@@ -36,7 +36,7 @@ pub unsafe fn _mm_clflush(p: *mut u8) {
     clflush(p)
 }
 
-/// Perform a serializing operation on all load-from-memory instructions
+/// Performs a serializing operation on all load-from-memory instructions
 /// that were issued prior to this instruction.
 ///
 /// Guarantees that every load instruction that precedes, in program order, is
@@ -52,7 +52,7 @@ pub unsafe fn _mm_lfence() {
     lfence()
 }
 
-/// Perform a serializing operation on all load-from-memory and store-to-memory
+/// Performs a serializing operation on all load-from-memory and store-to-memory
 /// instructions that were issued prior to this instruction.
 ///
 /// Guarantees that every memory access that precedes, in program order, the
@@ -68,7 +68,7 @@ pub unsafe fn _mm_mfence() {
     mfence()
 }
 
-/// Add packed 8-bit integers in `a` and `b`.
+/// Adds packed 8-bit integers in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi8)
 #[inline]
@@ -79,7 +79,7 @@ pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(simd_add(a.as_i8x16(), b.as_i8x16()))
 }
 
-/// Add packed 16-bit integers in `a` and `b`.
+/// Adds packed 16-bit integers in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi16)
 #[inline]
@@ -90,7 +90,7 @@ pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(simd_add(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Add packed 32-bit integers in `a` and `b`.
+/// Adds packed 32-bit integers in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi32)
 #[inline]
@@ -101,7 +101,7 @@ pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(simd_add(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Add packed 64-bit integers in `a` and "b`.
+/// Adds packed 64-bit integers in `a` and "b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi64)
 #[inline]
@@ -112,7 +112,7 @@ pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(simd_add(a.as_i64x2(), b.as_i64x2()))
 }
 
-/// Add packed 8-bit integers in `a` and `b` using saturation.
+/// Adds packed 8-bit integers in `a` and `b` using saturation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8)
 #[inline]
@@ -123,7 +123,7 @@ pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(paddsb(a.as_i8x16(), b.as_i8x16()))
 }
 
-/// Add packed 16-bit integers in `a` and `b` using saturation.
+/// Adds packed 16-bit integers in `a` and `b` using saturation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi16)
 #[inline]
@@ -134,7 +134,7 @@ pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(paddsw(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
+/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu8)
 #[inline]
@@ -145,7 +145,7 @@ pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(paddsub(a.as_u8x16(), b.as_u8x16()))
 }
 
-/// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
+/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16)
 #[inline]
@@ -156,7 +156,7 @@ pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(paddsuw(a.as_u16x8(), b.as_u16x8()))
 }
 
-/// Average packed unsigned 8-bit integers in `a` and `b`.
+/// Averages packed unsigned 8-bit integers in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu8)
 #[inline]
@@ -167,7 +167,7 @@ pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pavgb(a.as_u8x16(), b.as_u8x16()))
 }
 
-/// Average packed unsigned 16-bit integers in `a` and `b`.
+/// Averages packed unsigned 16-bit integers in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu16)
 #[inline]
@@ -178,9 +178,9 @@ pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pavgw(a.as_u16x8(), b.as_u16x8()))
 }
 
-/// Multiply and then horizontally add signed 16 bit integers in `a` and `b`.
+/// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
 ///
-/// Multiply packed signed 16-bit integers in `a` and `b`, producing
+/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
 /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
 /// intermediate 32-bit integers.
 ///
@@ -193,7 +193,7 @@ pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pmaddwd(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Compare packed 16-bit integers in `a` and `b`, and return the packed
+/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
 /// maximum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi16)
@@ -205,7 +205,7 @@ pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pmaxsw(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Compare packed unsigned 8-bit integers in `a` and `b`, and return the
+/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
 /// packed maximum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu8)
@@ -217,7 +217,7 @@ pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pmaxub(a.as_u8x16(), b.as_u8x16()))
 }
 
-/// Compare packed 16-bit integers in `a` and `b`, and return the packed
+/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
 /// minimum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi16)
@@ -229,7 +229,7 @@ pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pminsw(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Compare packed unsigned 8-bit integers in `a` and `b`, and return the
+/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
 /// packed minimum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu8)
@@ -241,7 +241,7 @@ pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pminub(a.as_u8x16(), b.as_u8x16()))
 }
 
-/// Multiply the packed 16-bit integers in `a` and `b`.
+/// Multiplies the packed 16-bit integers in `a` and `b`.
 ///
 /// The multiplication produces intermediate 32-bit integers, and returns the
 /// high 16 bits of the intermediate integers.
@@ -255,7 +255,7 @@ pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pmulhw(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Multiply the packed unsigned 16-bit integers in `a` and `b`.
+/// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
 ///
 /// The multiplication produces intermediate 32-bit integers, and returns the
 /// high 16 bits of the intermediate integers.
@@ -269,7 +269,7 @@ pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pmulhuw(a.as_u16x8(), b.as_u16x8()))
 }
 
-/// Multiply the packed 16-bit integers in `a` and `b`.
+/// Multiplies the packed 16-bit integers in `a` and `b`.
 ///
 /// The multiplication produces intermediate 32-bit integers, and returns the
 /// low 16 bits of the intermediate integers.
@@ -283,10 +283,10 @@ pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(simd_mul(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Multiply the low unsigned 32-bit integers from each packed 64-bit element
+/// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
 /// in `a` and `b`.
 ///
-/// Return the unsigned 64-bit results.
+/// Returns the unsigned 64-bit results.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epu32)
 #[inline]
@@ -299,7 +299,7 @@ pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
 
 /// Sum the absolute differences of packed unsigned 8-bit integers.
 ///
-/// Compute the absolute differences of packed unsigned 8-bit integers in `a`
+/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
 /// and `b`, then horizontally sum each consecutive 8 differences to produce
 /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
 /// the low 16 bits of 64-bit elements returned.
@@ -313,7 +313,7 @@ pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(psadbw(a.as_u8x16(), b.as_u8x16()))
 }
 
-/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
+/// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8)
 #[inline]
@@ -324,7 +324,7 @@ pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(simd_sub(a.as_i8x16(), b.as_i8x16()))
 }
 
-/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
+/// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16)
 #[inline]
@@ -405,7 +405,7 @@ pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(psubusw(a.as_u16x8(), b.as_u16x8()))
 }
 
-/// Shift `a` left by `imm8` bytes while shifting in zeros.
+/// Shifts `a` left by `imm8` bytes while shifting in zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128)
 #[inline]
@@ -472,7 +472,7 @@ unsafe fn _mm_slli_si128_impl(a: __m128i, imm8: i32) -> __m128i {
     mem::transmute(x)
 }
 
-/// Shift `a` left by `imm8` bytes while shifting in zeros.
+/// Shifts `a` left by `imm8` bytes while shifting in zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128)
 #[inline]
@@ -484,7 +484,7 @@ pub unsafe fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i {
     _mm_slli_si128_impl(a, imm8)
 }
 
-/// Shift `a` right by `imm8` bytes while shifting in zeros.
+/// Shifts `a` right by `imm8` bytes while shifting in zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128)
 #[inline]
@@ -496,7 +496,7 @@ pub unsafe fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i {
     _mm_srli_si128_impl(a, imm8)
 }
 
-/// Shift packed 16-bit integers in `a` left by `imm8` while shifting in zeros.
+/// Shifts packed 16-bit integers in `a` left by `imm8` while shifting in zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16)
 #[inline]
@@ -508,7 +508,7 @@ pub unsafe fn _mm_slli_epi16(a: __m128i, imm8: i32) -> __m128i {
     mem::transmute(pslliw(a.as_i16x8(), imm8))
 }
 
-/// Shift packed 16-bit integers in `a` left by `count` while shifting in
+/// Shifts packed 16-bit integers in `a` left by `count` while shifting in
 /// zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16)
@@ -520,7 +520,7 @@ pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
     mem::transmute(psllw(a.as_i16x8(), count.as_i16x8()))
 }
 
-/// Shift packed 32-bit integers in `a` left by `imm8` while shifting in zeros.
+/// Shifts packed 32-bit integers in `a` left by `imm8` while shifting in zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32)
 #[inline]
@@ -532,7 +532,7 @@ pub unsafe fn _mm_slli_epi32(a: __m128i, imm8: i32) -> __m128i {
     mem::transmute(psllid(a.as_i32x4(), imm8))
 }
 
-/// Shift packed 32-bit integers in `a` left by `count` while shifting in
+/// Shifts packed 32-bit integers in `a` left by `count` while shifting in
 /// zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32)
@@ -544,7 +544,7 @@ pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
     mem::transmute(pslld(a.as_i32x4(), count.as_i32x4()))
 }
 
-/// Shift packed 64-bit integers in `a` left by `imm8` while shifting in zeros.
+/// Shifts packed 64-bit integers in `a` left by `imm8` while shifting in zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64)
 #[inline]
@@ -556,7 +556,7 @@ pub unsafe fn _mm_slli_epi64(a: __m128i, imm8: i32) -> __m128i {
     mem::transmute(pslliq(a.as_i64x2(), imm8))
 }
 
-/// Shift packed 64-bit integers in `a` left by `count` while shifting in
+/// Shifts packed 64-bit integers in `a` left by `count` while shifting in
 /// zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64)
@@ -568,7 +568,7 @@ pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
     mem::transmute(psllq(a.as_i64x2(), count.as_i64x2()))
 }
 
-/// Shift packed 16-bit integers in `a` right by `imm8` while shifting in sign
+/// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in sign
 /// bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16)
@@ -581,7 +581,7 @@ pub unsafe fn _mm_srai_epi16(a: __m128i, imm8: i32) -> __m128i {
     mem::transmute(psraiw(a.as_i16x8(), imm8))
 }
 
-/// Shift packed 16-bit integers in `a` right by `count` while shifting in sign
+/// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
 /// bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16)
@@ -593,7 +593,7 @@ pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
     mem::transmute(psraw(a.as_i16x8(), count.as_i16x8()))
 }
 
-/// Shift packed 32-bit integers in `a` right by `imm8` while shifting in sign
+/// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in sign
 /// bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32)
@@ -606,7 +606,7 @@ pub unsafe fn _mm_srai_epi32(a: __m128i, imm8: i32) -> __m128i {
     mem::transmute(psraid(a.as_i32x4(), imm8))
 }
 
-/// Shift packed 32-bit integers in `a` right by `count` while shifting in sign
+/// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
 /// bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32)
@@ -618,7 +618,7 @@ pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
     mem::transmute(psrad(a.as_i32x4(), count.as_i32x4()))
 }
 
-/// Shift `a` right by `imm8` bytes while shifting in zeros.
+/// Shifts `a` right by `imm8` bytes while shifting in zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128)
 #[inline]
@@ -685,7 +685,7 @@ unsafe fn _mm_srli_si128_impl(a: __m128i, imm8: i32) -> __m128i {
     mem::transmute(x)
 }
 
-/// Shift packed 16-bit integers in `a` right by `imm8` while shifting in
+/// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in
 /// zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16)
@@ -698,7 +698,7 @@ pub unsafe fn _mm_srli_epi16(a: __m128i, imm8: i32) -> __m128i {
     mem::transmute(psrliw(a.as_i16x8(), imm8))
 }
 
-/// Shift packed 16-bit integers in `a` right by `count` while shifting in
+/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
 /// zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16)
@@ -710,7 +710,7 @@ pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
     mem::transmute(psrlw(a.as_i16x8(), count.as_i16x8()))
 }
 
-/// Shift packed 32-bit integers in `a` right by `imm8` while shifting in
+/// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in
 /// zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32)
@@ -723,7 +723,7 @@ pub unsafe fn _mm_srli_epi32(a: __m128i, imm8: i32) -> __m128i {
     mem::transmute(psrlid(a.as_i32x4(), imm8))
 }
 
-/// Shift packed 32-bit integers in `a` right by `count` while shifting in
+/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
 /// zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32)
@@ -735,7 +735,7 @@ pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
     mem::transmute(psrld(a.as_i32x4(), count.as_i32x4()))
 }
 
-/// Shift packed 64-bit integers in `a` right by `imm8` while shifting in
+/// Shifts packed 64-bit integers in `a` right by `imm8` while shifting in
 /// zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64)
@@ -748,7 +748,7 @@ pub unsafe fn _mm_srli_epi64(a: __m128i, imm8: i32) -> __m128i {
     mem::transmute(psrliq(a.as_i64x2(), imm8))
 }
 
-/// Shift packed 64-bit integers in `a` right by `count` while shifting in
+/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
 /// zeros.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64)
@@ -760,7 +760,7 @@ pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
     mem::transmute(psrlq(a.as_i64x2(), count.as_i64x2()))
 }
 
-/// Compute the bitwise AND of 128 bits (representing integer data) in `a` and
+/// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
 /// `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_si128)
@@ -772,7 +772,7 @@ pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
     simd_and(a, b)
 }
 
-/// Compute the bitwise NOT of 128 bits (representing integer data) in `a` and
+/// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
 /// then AND with `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_si128)
@@ -784,7 +784,7 @@ pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
     simd_and(simd_xor(_mm_set1_epi8(-1), a), b)
 }
 
-/// Compute the bitwise OR of 128 bits (representing integer data) in `a` and
+/// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
 /// `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_si128)
@@ -796,7 +796,7 @@ pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
     simd_or(a, b)
 }
 
-/// Compute the bitwise XOR of 128 bits (representing integer data) in `a` and
+/// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
 /// `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_si128)
@@ -808,7 +808,7 @@ pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
     simd_xor(a, b)
 }
 
-/// Compare packed 8-bit integers in `a` and `b` for equality.
+/// Compares packed 8-bit integers in `a` and `b` for equality.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8)
 #[inline]
@@ -819,7 +819,7 @@ pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16()))
 }
 
-/// Compare packed 16-bit integers in `a` and `b` for equality.
+/// Compares packed 16-bit integers in `a` and `b` for equality.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16)
 #[inline]
@@ -830,7 +830,7 @@ pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Compare packed 32-bit integers in `a` and `b` for equality.
+/// Compares packed 32-bit integers in `a` and `b` for equality.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32)
 #[inline]
@@ -841,7 +841,7 @@ pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Compare packed 8-bit integers in `a` and `b` for greater-than.
+/// Compares packed 8-bit integers in `a` and `b` for greater-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8)
 #[inline]
@@ -852,7 +852,7 @@ pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16()))
 }
 
-/// Compare packed 16-bit integers in `a` and `b` for greater-than.
+/// Compares packed 16-bit integers in `a` and `b` for greater-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16)
 #[inline]
@@ -863,7 +863,7 @@ pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Compare packed 32-bit integers in `a` and `b` for greater-than.
+/// Compares packed 32-bit integers in `a` and `b` for greater-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi32)
 #[inline]
@@ -874,7 +874,7 @@ pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Compare packed 8-bit integers in `a` and `b` for less-than.
+/// Compares packed 8-bit integers in `a` and `b` for less-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8)
 #[inline]
@@ -885,7 +885,7 @@ pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16()))
 }
 
-/// Compare packed 16-bit integers in `a` and `b` for less-than.
+/// Compares packed 16-bit integers in `a` and `b` for less-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16)
 #[inline]
@@ -896,7 +896,7 @@ pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Compare packed 32-bit integers in `a` and `b` for less-than.
+/// Compares packed 32-bit integers in `a` and `b` for less-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi32)
 #[inline]
@@ -907,7 +907,7 @@ pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Convert the lower two packed 32-bit integers in `a` to packed
+/// Converts the lower two packed 32-bit integers in `a` to packed
 /// double-precision (64-bit) floating-point elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd)
@@ -920,7 +920,7 @@ pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
     simd_cast::<i32x2, __m128d>(simd_shuffle2(a, a, [0, 1]))
 }
 
-/// Return `a` with its lower element replaced by `b` after converting it to
+/// Returns `a` with its lower element replaced by `b` after converting it to
 /// an `f64`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd)
@@ -932,7 +932,7 @@ pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
     simd_insert(a, 0, b as f64)
 }
 
-/// Convert packed 32-bit integers in `a` to packed single-precision (32-bit)
+/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
 /// floating-point elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ps)
@@ -944,7 +944,7 @@ pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
     cvtdq2ps(a.as_i32x4())
 }
 
-/// Convert packed single-precision (32-bit) floating-point elements in `a`
+/// Converts packed single-precision (32-bit) floating-point elements in `a`
 /// to packed 32-bit integers.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epi32)
@@ -956,7 +956,7 @@ pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i {
     mem::transmute(cvtps2dq(a))
 }
 
-/// Return a vector whose lowest element is `a` and all higher elements are
+/// Returns a vector whose lowest element is `a` and all higher elements are
 /// `0`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_si128)
@@ -968,7 +968,7 @@ pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i {
     mem::transmute(i32x4::new(a, 0, 0, 0))
 }
 
-/// Return the lowest element of `a`.
+/// Returns the lowest element of `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32)
 #[inline]
@@ -979,7 +979,7 @@ pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
     simd_extract(a.as_i32x4(), 0)
 }
 
-/// Set packed 64-bit integers with the supplied values, from highest to
+/// Sets packed 64-bit integers with the supplied values, from highest to
 /// lowest.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64x)
@@ -991,7 +991,7 @@ pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
     mem::transmute(i64x2::new(e0, e1))
 }
 
-/// Set packed 32-bit integers with the supplied values.
+/// Sets packed 32-bit integers with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi32)
 #[inline]
@@ -1002,7 +1002,7 @@ pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
     mem::transmute(i32x4::new(e0, e1, e2, e3))
 }
 
-/// Set packed 16-bit integers with the supplied values.
+/// Sets packed 16-bit integers with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi16)
 #[inline]
@@ -1022,7 +1022,7 @@ pub unsafe fn _mm_set_epi16(
     mem::transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
 }
 
-/// Set packed 8-bit integers with the supplied values.
+/// Sets packed 8-bit integers with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi8)
 #[inline]
@@ -1053,7 +1053,7 @@ pub unsafe fn _mm_set_epi8(
     ))
 }
 
-/// Broadcast 64-bit integer `a` to all elements.
+/// Broadcasts 64-bit integer `a` to all elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x)
 #[inline]
@@ -1064,7 +1064,7 @@ pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i {
     _mm_set_epi64x(a, a)
 }
 
-/// Broadcast 32-bit integer `a` to all elements.
+/// Broadcasts 32-bit integer `a` to all elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi32)
 #[inline]
@@ -1075,7 +1075,7 @@ pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i {
     _mm_set_epi32(a, a, a, a)
 }
 
-/// Broadcast 16-bit integer `a` to all elements.
+/// Broadcasts 16-bit integer `a` to all elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi16)
 #[inline]
@@ -1086,7 +1086,7 @@ pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i {
     _mm_set_epi16(a, a, a, a, a, a, a, a)
 }
 
-/// Broadcast 8-bit integer `a` to all elements.
+/// Broadcasts 8-bit integer `a` to all elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi8)
 #[inline]
@@ -1097,7 +1097,7 @@ pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i {
     _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
 }
 
-/// Set packed 32-bit integers with the supplied values in reverse order.
+/// Sets packed 32-bit integers with the supplied values in reverse order.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi32)
 #[inline]
@@ -1108,7 +1108,7 @@ pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
     _mm_set_epi32(e0, e1, e2, e3)
 }
 
-/// Set packed 16-bit integers with the supplied values in reverse order.
+/// Sets packed 16-bit integers with the supplied values in reverse order.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi16)
 #[inline]
@@ -1128,7 +1128,7 @@ pub unsafe fn _mm_setr_epi16(
     _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
 }
 
-/// Set packed 8-bit integers with the supplied values in reverse order.
+/// Sets packed 8-bit integers with the supplied values in reverse order.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi8)
 #[inline]
@@ -1170,7 +1170,7 @@ pub unsafe fn _mm_setzero_si128() -> __m128i {
     _mm_set1_epi64x(0)
 }
 
-/// Load 64-bit integer from memory into first element of returned vector.
+/// Loads 64-bit integer from memory into first element of returned vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64)
 #[inline]
@@ -1190,7 +1190,7 @@ pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
     _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
 }
 
-/// Load 128-bits of integer data from memory into a new vector.
+/// Loads 128-bits of integer data from memory into a new vector.
 ///
 /// `mem_addr` must be aligned on a 16-byte boundary.
 ///
@@ -1203,7 +1203,7 @@ pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
     *mem_addr
 }
 
-/// Load 128-bits of integer data from memory into a new vector.
+/// Loads 128-bits of integer data from memory into a new vector.
 ///
 /// `mem_addr` does not need to be aligned on any particular boundary.
 ///
@@ -1240,7 +1240,7 @@ pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8)
     maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
 }
 
-/// Store 128-bits of integer data from `a` into memory.
+/// Stores 128-bits of integer data from `a` into memory.
 ///
 /// `mem_addr` must be aligned on a 16-byte boundary.
 ///
@@ -1253,7 +1253,7 @@ pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
     *mem_addr = a;
 }
 
-/// Store 128-bits of integer data from `a` into memory.
+/// Stores 128-bits of integer data from `a` into memory.
 ///
 /// `mem_addr` does not need to be aligned on any particular boundary.
 ///
@@ -1266,7 +1266,7 @@ pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
     storeudq(mem_addr as *mut i8, a);
 }
 
-/// Store the lower 64-bit integer `a` to a memory location.
+/// Stores the lower 64-bit integer `a` to a memory location.
 ///
 /// `mem_addr` does not need to be aligned on any particular boundary.
 ///
@@ -1314,7 +1314,7 @@ pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
     ::intrinsics::nontemporal_store(mem_addr, a);
 }
 
-/// Return a vector where the low element is extracted from `a` and its upper
+/// Returns a vector where the low element is extracted from `a` and its upper
 /// element is zero.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64)
@@ -1329,7 +1329,7 @@ pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i {
     mem::transmute(r)
 }
 
-/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
+/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using signed saturation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi16)
@@ -1341,7 +1341,7 @@ pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(packsswb(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using signed saturation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi32)
@@ -1353,7 +1353,7 @@ pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(packssdw(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
+/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using unsigned saturation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi16)
@@ -1365,7 +1365,7 @@ pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(packuswb(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Return the `imm8` element of `a`.
+/// Returns the `imm8` element of `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16)
 #[inline]
@@ -1377,7 +1377,7 @@ pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 {
     simd_extract::<_, i16>(a.as_i16x8(), (imm8 & 7) as u32) as i32
 }
 
-/// Return a new vector where the `imm8` element of `a` is replaced with `i`.
+/// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi16)
 #[inline]
@@ -1389,7 +1389,7 @@ pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i {
     mem::transmute(simd_insert(a.as_i16x8(), (imm8 & 7) as u32, i as i16))
 }
 
-/// Return a mask of the most significant bit of each element in `a`.
+/// Returns a mask of the most significant bit of each element in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_epi8)
 #[inline]
@@ -1400,7 +1400,7 @@ pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
     pmovmskb(a.as_i8x16())
 }
 
-/// Shuffle 32-bit integers in `a` using the control in `imm8`.
+/// Shuffles 32-bit integers in `a` using the control in `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi32)
 #[inline]
@@ -1463,7 +1463,7 @@ pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i {
     mem::transmute(x)
 }
 
-/// Shuffle 16-bit integers in the high 64 bits of `a` using the control in
+/// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
 /// `imm8`.
 ///
 /// Put the results in the high 64 bits of the returned vector, with the low 64
@@ -1523,7 +1523,7 @@ pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i {
     mem::transmute(x)
 }
 
-/// Shuffle 16-bit integers in the low 64 bits of `a` using the control in
+/// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
 /// `imm8`.
 ///
 /// Put the results in the low 64 bits of the returned vector, with the high 64
@@ -1584,7 +1584,7 @@ pub unsafe fn _mm_shufflelo_epi16(a: __m128i, imm8: i32) -> __m128i {
     mem::transmute(x)
 }
 
-/// Unpack and interleave 8-bit integers from the high half of `a` and `b`.
+/// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi8)
 #[inline]
@@ -1599,7 +1599,7 @@ pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
     ))
 }
 
-/// Unpack and interleave 16-bit integers from the high half of `a` and `b`.
+/// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi16)
 #[inline]
@@ -1611,7 +1611,7 @@ pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i16x8, _>(x)
 }
 
-/// Unpack and interleave 32-bit integers from the high half of `a` and `b`.
+/// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi32)
 #[inline]
@@ -1622,7 +1622,7 @@ pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i32x4, _>(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7]))
 }
 
-/// Unpack and interleave 64-bit integers from the high half of `a` and `b`.
+/// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi64)
 #[inline]
@@ -1633,7 +1633,7 @@ pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i64x2, _>(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [1, 3]))
 }
 
-/// Unpack and interleave 8-bit integers from the low half of `a` and `b`.
+/// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi8)
 #[inline]
@@ -1648,7 +1648,7 @@ pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
     ))
 }
 
-/// Unpack and interleave 16-bit integers from the low half of `a` and `b`.
+/// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi16)
 #[inline]
@@ -1660,7 +1660,7 @@ pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i16x8, _>(x)
 }
 
-/// Unpack and interleave 32-bit integers from the low half of `a` and `b`.
+/// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi32)
 #[inline]
@@ -1671,7 +1671,7 @@ pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i32x4, _>(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5]))
 }
 
-/// Unpack and interleave 64-bit integers from the low half of `a` and `b`.
+/// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi64)
 #[inline]
@@ -1682,7 +1682,7 @@ pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute::<i64x2, _>(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [0, 2]))
 }
 
-/// Return a new vector with the low element of `a` replaced by the sum of the
+/// Returns a new vector with the low element of `a` replaced by the sum of the
 /// low elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd)
@@ -1694,7 +1694,7 @@ pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
     simd_insert(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b))
 }
 
-/// Add packed double-precision (64-bit) floating-point elements in `a` and
+/// Adds packed double-precision (64-bit) floating-point elements in `a` and
 /// `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd)
@@ -1706,7 +1706,7 @@ pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
     simd_add(a, b)
 }
 
-/// Return a new vector with the low element of `a` replaced by the result of
+/// Returns a new vector with the low element of `a` replaced by the result of
 /// diving the lower element of `a` by the lower element of `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd)
@@ -1730,7 +1730,7 @@ pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
     simd_div(a, b)
 }
 
-/// Return a new vector with the low element of `a` replaced by the maximum
+/// Returns a new vector with the low element of `a` replaced by the maximum
 /// of the lower elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd)
@@ -1742,7 +1742,7 @@ pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
     maxsd(a, b)
 }
 
-/// Return a new vector with the maximum values from corresponding elements in
+/// Returns a new vector with the maximum values from corresponding elements in
 /// `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd)
@@ -1754,7 +1754,7 @@ pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
     maxpd(a, b)
 }
 
-/// Return a new vector with the low element of `a` replaced by the minimum
+/// Returns a new vector with the low element of `a` replaced by the minimum
 /// of the lower elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd)
@@ -1766,7 +1766,7 @@ pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
     minsd(a, b)
 }
 
-/// Return a new vector with the minimum values from corresponding elements in
+/// Returns a new vector with the minimum values from corresponding elements in
 /// `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd)
@@ -1778,7 +1778,7 @@ pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
     minpd(a, b)
 }
 
-/// Return a new vector with the low element of `a` replaced by multiplying the
+/// Returns a new vector with the low element of `a` replaced by multiplying the
 /// low elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sd)
@@ -1790,7 +1790,7 @@ pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
     simd_insert(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b))
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements in `a`
+/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd)
@@ -1802,7 +1802,7 @@ pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
     simd_mul(a, b)
 }
 
-/// Return a new vector with the low element of `a` replaced by the square
+/// Returns a new vector with the low element of `a` replaced by the square
 /// root of the lower element `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd)
@@ -1814,7 +1814,7 @@ pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
     simd_insert(a, 0, _mm_cvtsd_f64(sqrtsd(b)))
 }
 
-/// Return a new vector with the square root of each of the values in `a`.
+/// Returns a new vector with the square root of each of the values in `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd)
 #[inline]
@@ -1825,7 +1825,7 @@ pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d {
     sqrtpd(a)
 }
 
-/// Return a new vector with the low element of `a` replaced by subtracting the
+/// Returns a new vector with the low element of `a` replaced by subtracting the
 /// low element by `b` from the low element of `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd)
@@ -1849,7 +1849,7 @@ pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
     simd_sub(a, b)
 }
 
-/// Compute the bitwise AND of packed double-precision (64-bit) floating-point
+/// Computes the bitwise AND of packed double-precision (64-bit) floating-point
 /// elements in `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd)
@@ -1863,7 +1863,7 @@ pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
     mem::transmute(_mm_and_si128(a, b))
 }
 
-/// Compute the bitwise NOT of `a` and then AND with `b`.
+/// Computes the bitwise NOT of `a` and then AND with `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd)
 #[inline]
@@ -1876,7 +1876,7 @@ pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
     mem::transmute(_mm_andnot_si128(a, b))
 }
 
-/// Compute the bitwise OR of `a` and `b`.
+/// Computes the bitwise OR of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_pd)
 #[inline]
@@ -1889,7 +1889,7 @@ pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
     mem::transmute(_mm_or_si128(a, b))
 }
 
-/// Compute the bitwise OR of `a` and `b`.
+/// Computes the bitwise OR of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd)
 #[inline]
@@ -1902,7 +1902,7 @@ pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
     mem::transmute(_mm_xor_si128(a, b))
 }
 
-/// Return a new vector with the low element of `a` replaced by the equality
+/// Returns a new vector with the low element of `a` replaced by the equality
 /// comparison of the lower elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd)
@@ -1914,7 +1914,7 @@ pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
     cmpsd(a, b, 0)
 }
 
-/// Return a new vector with the low element of `a` replaced by the less-than
+/// Returns a new vector with the low element of `a` replaced by the less-than
 /// comparison of the lower elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd)
@@ -1926,7 +1926,7 @@ pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
     cmpsd(a, b, 1)
 }
 
-/// Return a new vector with the low element of `a` replaced by the
+/// Returns a new vector with the low element of `a` replaced by the
 /// less-than-or-equal comparison of the lower elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd)
@@ -1938,7 +1938,7 @@ pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
     cmpsd(a, b, 2)
 }
 
-/// Return a new vector with the low element of `a` replaced by the
+/// Returns a new vector with the low element of `a` replaced by the
 /// greater-than comparison of the lower elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd)
@@ -1950,7 +1950,7 @@ pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
     simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
 }
 
-/// Return a new vector with the low element of `a` replaced by the
+/// Returns a new vector with the low element of `a` replaced by the
 /// greater-than-or-equal comparison of the lower elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd)
@@ -1962,7 +1962,7 @@ pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
     simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
 }
 
-/// Return a new vector with the low element of `a` replaced by the result
+/// Returns a new vector with the low element of `a` replaced by the result
 /// of comparing both of the lower elements of `a` and `b` to `NaN`. If
 /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
 /// otherwise.
@@ -1976,7 +1976,7 @@ pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
     cmpsd(a, b, 7)
 }
 
-/// Return a new vector with the low element of `a` replaced by the result of
+/// Returns a new vector with the low element of `a` replaced by the result of
 /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
 /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
 ///
@@ -1989,7 +1989,7 @@ pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
     cmpsd(a, b, 3)
 }
 
-/// Return a new vector with the low element of `a` replaced by the not-equal
+/// Returns a new vector with the low element of `a` replaced by the not-equal
 /// comparison of the lower elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd)
@@ -2001,7 +2001,7 @@ pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
     cmpsd(a, b, 4)
 }
 
-/// Return a new vector with the low element of `a` replaced by the
+/// Returns a new vector with the low element of `a` replaced by the
 /// not-less-than comparison of the lower elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd)
@@ -2013,7 +2013,7 @@ pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
     cmpsd(a, b, 5)
 }
 
-/// Return a new vector with the low element of `a` replaced by the
+/// Returns a new vector with the low element of `a` replaced by the
 /// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd)
@@ -2025,7 +2025,7 @@ pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
     cmpsd(a, b, 6)
 }
 
-/// Return a new vector with the low element of `a` replaced by the
+/// Returns a new vector with the low element of `a` replaced by the
 /// not-greater-than comparison of the lower elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd)
@@ -2037,7 +2037,7 @@ pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
     simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
 }
 
-/// Return a new vector with the low element of `a` replaced by the
+/// Returns a new vector with the low element of `a` replaced by the
 /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd)
@@ -2049,7 +2049,7 @@ pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
     simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
 }
 
-/// Compare corresponding elements in `a` and `b` for equality.
+/// Compares corresponding elements in `a` and `b` for equality.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd)
 #[inline]
@@ -2060,7 +2060,7 @@ pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
     cmppd(a, b, 0)
 }
 
-/// Compare corresponding elements in `a` and `b` for less-than.
+/// Compares corresponding elements in `a` and `b` for less-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd)
 #[inline]
@@ -2071,7 +2071,7 @@ pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
     cmppd(a, b, 1)
 }
 
-/// Compare corresponding elements in `a` and `b` for less-than-or-equal
+/// Compares corresponding elements in `a` and `b` for less-than-or-equal
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd)
 #[inline]
@@ -2082,7 +2082,7 @@ pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
     cmppd(a, b, 2)
 }
 
-/// Compare corresponding elements in `a` and `b` for greater-than.
+/// Compares corresponding elements in `a` and `b` for greater-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd)
 #[inline]
@@ -2093,7 +2093,7 @@ pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
     _mm_cmplt_pd(b, a)
 }
 
-/// Compare corresponding elements in `a` and `b` for greater-than-or-equal.
+/// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd)
 #[inline]
@@ -2104,7 +2104,7 @@ pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
     _mm_cmple_pd(b, a)
 }
 
-/// Compare corresponding elements in `a` and `b` to see if neither is `NaN`.
+/// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd)
 #[inline]
@@ -2115,7 +2115,7 @@ pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
     cmppd(a, b, 7)
 }
 
-/// Compare corresponding elements in `a` and `b` to see if either is `NaN`.
+/// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd)
 #[inline]
@@ -2126,7 +2126,7 @@ pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
     cmppd(a, b, 3)
 }
 
-/// Compare corresponding elements in `a` and `b` for not-equal.
+/// Compares corresponding elements in `a` and `b` for not-equal.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd)
 #[inline]
@@ -2137,7 +2137,7 @@ pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
     cmppd(a, b, 4)
 }
 
-/// Compare corresponding elements in `a` and `b` for not-less-than.
+/// Compares corresponding elements in `a` and `b` for not-less-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd)
 #[inline]
@@ -2148,7 +2148,7 @@ pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
     cmppd(a, b, 5)
 }
 
-/// Compare corresponding elements in `a` and `b` for not-less-than-or-equal.
+/// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd)
 #[inline]
@@ -2159,7 +2159,7 @@ pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
     cmppd(a, b, 6)
 }
 
-/// Compare corresponding elements in `a` and `b` for not-greater-than.
+/// Compares corresponding elements in `a` and `b` for not-greater-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_pd)
 #[inline]
@@ -2170,7 +2170,7 @@ pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
     _mm_cmpnlt_pd(b, a)
 }
 
-/// Compare corresponding elements in `a` and `b` for
+/// Compares corresponding elements in `a` and `b` for
 /// not-greater-than-or-equal.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd)
@@ -2182,7 +2182,7 @@ pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
     _mm_cmpnle_pd(b, a)
 }
 
-/// Compare the lower element of `a` and `b` for equality.
+/// Compares the lower element of `a` and `b` for equality.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd)
 #[inline]
@@ -2193,7 +2193,7 @@ pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
     comieqsd(a, b)
 }
 
-/// Compare the lower element of `a` and `b` for less-than.
+/// Compares the lower element of `a` and `b` for less-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd)
 #[inline]
@@ -2204,7 +2204,7 @@ pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
     comiltsd(a, b)
 }
 
-/// Compare the lower element of `a` and `b` for less-than-or-equal.
+/// Compares the lower element of `a` and `b` for less-than-or-equal.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd)
 #[inline]
@@ -2215,7 +2215,7 @@ pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
     comilesd(a, b)
 }
 
-/// Compare the lower element of `a` and `b` for greater-than.
+/// Compares the lower element of `a` and `b` for greater-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd)
 #[inline]
@@ -2226,7 +2226,7 @@ pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
     comigtsd(a, b)
 }
 
-/// Compare the lower element of `a` and `b` for greater-than-or-equal.
+/// Compares the lower element of `a` and `b` for greater-than-or-equal.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd)
 #[inline]
@@ -2237,7 +2237,7 @@ pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
     comigesd(a, b)
 }
 
-/// Compare the lower element of `a` and `b` for not-equal.
+/// Compares the lower element of `a` and `b` for not-equal.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd)
 #[inline]
@@ -2248,7 +2248,7 @@ pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
     comineqsd(a, b)
 }
 
-/// Compare the lower element of `a` and `b` for equality.
+/// Compares the lower element of `a` and `b` for equality.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sd)
 #[inline]
@@ -2259,7 +2259,7 @@ pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
     ucomieqsd(a, b)
 }
 
-/// Compare the lower element of `a` and `b` for less-than.
+/// Compares the lower element of `a` and `b` for less-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sd)
 #[inline]
@@ -2270,7 +2270,7 @@ pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
     ucomiltsd(a, b)
 }
 
-/// Compare the lower element of `a` and `b` for less-than-or-equal.
+/// Compares the lower element of `a` and `b` for less-than-or-equal.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sd)
 #[inline]
@@ -2281,7 +2281,7 @@ pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
     ucomilesd(a, b)
 }
 
-/// Compare the lower element of `a` and `b` for greater-than.
+/// Compares the lower element of `a` and `b` for greater-than.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sd)
 #[inline]
@@ -2292,7 +2292,7 @@ pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
     ucomigtsd(a, b)
 }
 
-/// Compare the lower element of `a` and `b` for greater-than-or-equal.
+/// Compares the lower element of `a` and `b` for greater-than-or-equal.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sd)
 #[inline]
@@ -2303,7 +2303,7 @@ pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
     ucomigesd(a, b)
 }
 
-/// Compare the lower element of `a` and `b` for not-equal.
+/// Compares the lower element of `a` and `b` for not-equal.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sd)
 #[inline]
@@ -2314,7 +2314,7 @@ pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
     ucomineqsd(a, b)
 }
 
-/// Convert packed double-precision (64-bit) floating-point elements in "a" to
+/// Converts packed double-precision (64-bit) floating-point elements in "a" to
 /// packed single-precision (32-bit) floating-point elements
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps)
@@ -2326,7 +2326,7 @@ pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
     cvtpd2ps(a)
 }
 
-/// Convert packed single-precision (32-bit) floating-point elements in `a` to
+/// Converts packed single-precision (32-bit) floating-point elements in `a` to
 /// packed
 /// double-precision (64-bit) floating-point elements.
 ///
@@ -2339,7 +2339,7 @@ pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d {
     cvtps2pd(a)
 }
 
-/// Convert packed double-precision (64-bit) floating-point elements in `a` to
+/// Converts packed double-precision (64-bit) floating-point elements in `a` to
 /// packed 32-bit integers.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32)
@@ -2351,7 +2351,7 @@ pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
     mem::transmute(cvtpd2dq(a))
 }
 
-/// Convert the lower double-precision (64-bit) floating-point element in a to
+/// Converts the lower double-precision (64-bit) floating-point element in a to
 /// a 32-bit integer.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32)
@@ -2363,9 +2363,9 @@ pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 {
     cvtsd2si(a)
 }
 
-/// Convert the lower double-precision (64-bit) floating-point element in `b`
+/// Converts the lower double-precision (64-bit) floating-point element in `b`
 /// to a single-precision (32-bit) floating-point element, store the result in
-/// the lower element of the return value, and copy the upper element from `a`
+/// the lower element of the return value, and copies the upper element from `a`
 /// to the upper element the return value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss)
@@ -2377,7 +2377,7 @@ pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
     cvtsd2ss(a, b)
 }
 
-/// Return the lower double-precision (64-bit) floating-point element of "a".
+/// Returns the lower double-precision (64-bit) floating-point element of "a".
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64)
 #[inline]
@@ -2387,9 +2387,9 @@ pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 {
     simd_extract(a, 0)
 }
 
-/// Convert the lower single-precision (32-bit) floating-point element in `b`
+/// Converts the lower single-precision (32-bit) floating-point element in `b`
 /// to a double-precision (64-bit) floating-point element, store the result in
-/// the lower element of the return value, and copy the upper element from `a`
+/// the lower element of the return value, and copies the upper element from `a`
 /// to the upper element the return value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd)
@@ -2401,7 +2401,7 @@ pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
     cvtss2sd(a, b)
 }
 
-/// Convert packed double-precision (64-bit) floating-point elements in `a` to
+/// Converts packed double-precision (64-bit) floating-point elements in `a` to
 /// packed 32-bit integers with truncation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32)
@@ -2413,7 +2413,7 @@ pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
     mem::transmute(cvttpd2dq(a))
 }
 
-/// Convert the lower double-precision (64-bit) floating-point element in `a`
+/// Converts the lower double-precision (64-bit) floating-point element in `a`
 /// to a 32-bit integer with truncation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32)
@@ -2425,7 +2425,7 @@ pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 {
     cvttsd2si(a)
 }
 
-/// Convert packed single-precision (32-bit) floating-point elements in `a` to
+/// Converts packed single-precision (32-bit) floating-point elements in `a` to
 /// packed 32-bit integers with truncation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epi32)
@@ -2437,7 +2437,7 @@ pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i {
     mem::transmute(cvttps2dq(a))
 }
 
-/// Copy double-precision (64-bit) floating-point element `a` to the lower
+/// Copies double-precision (64-bit) floating-point element `a` to the lower
 /// element of the packed 64-bit return value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd)
@@ -2448,7 +2448,7 @@ pub unsafe fn _mm_set_sd(a: f64) -> __m128d {
     _mm_set_pd(0.0, a)
 }
 
-/// Broadcast double-precision (64-bit) floating-point value a to all elements
+/// Broadcasts double-precision (64-bit) floating-point value a to all elements
 /// of the return value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd)
@@ -2459,7 +2459,7 @@ pub unsafe fn _mm_set1_pd(a: f64) -> __m128d {
     _mm_set_pd(a, a)
 }
 
-/// Broadcast double-precision (64-bit) floating-point value a to all elements
+/// Broadcasts double-precision (64-bit) floating-point value a to all elements
 /// of the return value.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1)
@@ -2470,7 +2470,7 @@ pub unsafe fn _mm_set_pd1(a: f64) -> __m128d {
     _mm_set_pd(a, a)
 }
 
-/// Set packed double-precision (64-bit) floating-point elements in the return
+/// Sets packed double-precision (64-bit) floating-point elements in the return
 /// value with the supplied values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd)
@@ -2481,7 +2481,7 @@ pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d {
     __m128d(b, a)
 }
 
-/// Set packed double-precision (64-bit) floating-point elements in the return
+/// Sets packed double-precision (64-bit) floating-point elements in the return
 /// value with the supplied values in reverse order.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd)
@@ -2504,7 +2504,7 @@ pub unsafe fn _mm_setzero_pd() -> __m128d {
     _mm_set_pd(0.0, 0.0)
 }
 
-/// Return a mask of the most significant bit of each element in `a`.
+/// Returns a mask of the most significant bit of each element in `a`.
 ///
 /// The mask is stored in the 2 least significant bits of the return value.
 /// All other bits are set to `0`.
@@ -2518,7 +2518,7 @@ pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 {
     movmskpd(a)
 }
 
-/// Load 128-bits (composed of 2 packed double-precision (64-bit)
+/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
 /// floating-point elements) from memory into the returned vector.
 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
 /// exception may be generated.
@@ -2598,7 +2598,7 @@ pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
     *mem_addr = simd_extract(a, 0)
 }
 
-/// Store 128-bits (composed of 2 packed double-precision (64-bit)
+/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
 /// floating-point elements) from `a` into memory. `mem_addr` must be aligned
 /// on a 16-byte boundary or a general-protection exception may be generated.
 ///
@@ -2612,7 +2612,7 @@ pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
     *(mem_addr as *mut __m128d) = a;
 }
 
-/// Store 128-bits (composed of 2 packed double-precision (64-bit)
+/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
 /// floating-point elements) from `a` into memory.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 ///
@@ -2625,7 +2625,7 @@ pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
     storeupd(mem_addr as *mut i8, a);
 }
 
-/// Store the lower double-precision (64-bit) floating-point element from `a`
+/// Stores the lower double-precision (64-bit) floating-point element from `a`
 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
 /// 16-byte boundary or a general-protection exception may be generated.
 ///
@@ -2639,7 +2639,7 @@ pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
     *(mem_addr as *mut __m128d) = b;
 }
 
-/// Store the lower double-precision (64-bit) floating-point element from `a`
+/// Stores the lower double-precision (64-bit) floating-point element from `a`
 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
 /// 16-byte boundary or a general-protection exception may be generated.
 ///
@@ -2653,7 +2653,7 @@ pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
     *(mem_addr as *mut __m128d) = b;
 }
 
-/// Store 2 double-precision (64-bit) floating-point elements from `a` into
+/// Stores 2 double-precision (64-bit) floating-point elements from `a` into
 /// memory in reverse order.
 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
 /// exception may be generated.
@@ -2692,7 +2692,7 @@ pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
     *mem_addr = simd_extract(a, 0);
 }
 
-/// Load a double-precision (64-bit) floating-point element from memory
+/// Loads a double-precision (64-bit) floating-point element from memory
 /// into both elements of returned vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd)
@@ -2705,7 +2705,7 @@ pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
     _mm_setr_pd(d, d)
 }
 
-/// Load a double-precision (64-bit) floating-point element from memory
+/// Loads a double-precision (64-bit) floating-point element from memory
 /// into both elements of returned vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1)
@@ -2717,7 +2717,7 @@ pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
     _mm_load1_pd(mem_addr)
 }
 
-/// Load 2 double-precision (64-bit) floating-point elements from memory into
+/// Loads 2 double-precision (64-bit) floating-point elements from memory into
 /// the returned vector in reverse order. `mem_addr` must be aligned on a
 /// 16-byte boundary or a general-protection exception may be generated.
 ///
@@ -2731,7 +2731,7 @@ pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
     simd_shuffle2(a, a, [1, 0])
 }
 
-/// Load 128-bits (composed of 2 packed double-precision (64-bit)
+/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
 /// floating-point elements) from memory into the returned vector.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 ///
@@ -2848,7 +2848,7 @@ pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 {
     mem::transmute(a)
 }
 
-/// Return vector of type __m128d with undefined elements.
+/// Returns vector of type __m128d with undefined elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd)
 #[inline]
@@ -2859,7 +2859,7 @@ pub unsafe fn _mm_undefined_pd() -> __m128d {
     mem::MaybeUninit::<__m128d>::uninitialized().into_initialized()
 }
 
-/// Return vector of type __m128i with undefined elements.
+/// Returns vector of type __m128i with undefined elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_si128)
 #[inline]
@@ -4797,7 +4797,7 @@ mod tests {
         let mut ofs = 0;
         let mut p = vals.as_mut_ptr();
 
-        // Make sure p is *not* aligned to 16-byte boundary
+        // Make sure p is **not** aligned to 16-byte boundary
         if (p as usize) & 0xf == 0 {
             ofs = 1;
             p = p.offset(1);
diff --git a/library/stdarch/crates/core_arch/src/x86/sse3.rs b/library/stdarch/crates/core_arch/src/x86/sse3.rs
index 394909763da..d78cd43b993 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse3.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse3.rs
@@ -31,7 +31,7 @@ pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
     addsubpd(a, b)
 }
 
-/// Horizontally add adjacent pairs of double-precision (64-bit)
+/// Horizontally adds adjacent pairs of double-precision (64-bit)
 /// floating-point elements in `a` and `b`, and pack the results.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pd)
@@ -43,7 +43,7 @@ pub unsafe fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
     haddpd(a, b)
 }
 
-/// Horizontally add adjacent pairs of single-precision (32-bit)
+/// Horizontally adds adjacent pairs of single-precision (32-bit)
 /// floating-point elements in `a` and `b`, and pack the results.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_ps)
@@ -67,7 +67,7 @@ pub unsafe fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
     hsubpd(a, b)
 }
 
-/// Horizontally add adjacent pairs of single-precision (32-bit)
+/// Horizontally adds adjacent pairs of single-precision (32-bit)
 /// floating-point elements in `a` and `b`, and pack the results.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_ps)
@@ -79,7 +79,7 @@ pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
     hsubps(a, b)
 }
 
-/// Load 128-bits of integer data from unaligned memory.
+/// Loads 128-bits of integer data from unaligned memory.
 /// This intrinsic may perform better than `_mm_loadu_si128`
 /// when the data crosses a cache line boundary.
 ///
@@ -104,7 +104,7 @@ pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d {
     simd_shuffle2(a, a, [0, 0])
 }
 
-/// Load a double-precision (64-bit) floating-point element from memory
+/// Loads a double-precision (64-bit) floating-point element from memory
 /// into both elements of return vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loaddup_pd)
diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs
index 10ab260d8f5..027c268de72 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse41.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs
@@ -154,7 +154,7 @@ pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 {
     constify_imm4!(imm4, call)
 }
 
-/// Extract a single-precision (32-bit) floating-point element from `a`,
+/// Extracts a single-precision (32-bit) floating-point element from `a`,
 /// selected with `imm8`
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_ps)
@@ -170,7 +170,7 @@ pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 {
     mem::transmute(simd_extract::<_, f32>(a, imm8 as u32 & 0b11))
 }
 
-/// Extract an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit
+/// Extracts an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit
 /// integer containing the zero-extended integer data.
 ///
 /// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
@@ -186,7 +186,7 @@ pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 {
     simd_extract::<_, u8>(a.as_u8x16(), imm8) as i32
 }
 
-/// Extract an 32-bit integer from `a` selected with `imm8`
+/// Extracts an 32-bit integer from `a` selected with `imm8`
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi32)
 #[inline]
@@ -240,7 +240,7 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
     constify_imm8!(imm8, call)
 }
 
-/// Return a copy of `a` with the 8-bit integer from `i` inserted at a
+/// Returns a copy of `a` with the 8-bit integer from `i` inserted at a
 /// location specified by `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi8)
@@ -253,7 +253,7 @@ pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
     mem::transmute(simd_insert(a.as_i8x16(), (imm8 & 0b1111) as u32, i as i8))
 }
 
-/// Return a copy of `a` with the 32-bit integer from `i` inserted at a
+/// Returns a copy of `a` with the 32-bit integer from `i` inserted at a
 /// location specified by `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi32)
@@ -266,7 +266,7 @@ pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i {
     mem::transmute(simd_insert(a.as_i32x4(), (imm8 & 0b11) as u32, i))
 }
 
-/// Compare packed 8-bit integers in `a` and `b` and return packed maximum
+/// Compares packed 8-bit integers in `a` and `b` and returns packed maximum
 /// values in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8)
@@ -278,7 +278,7 @@ pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pmaxsb(a.as_i8x16(), b.as_i8x16()))
 }
 
-/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
+/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
 /// maximum.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu16)
@@ -290,7 +290,7 @@ pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pmaxuw(a.as_u16x8(), b.as_u16x8()))
 }
 
-/// Compare packed 32-bit integers in `a` and `b`, and return packed maximum
+/// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum
 /// values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi32)
@@ -302,7 +302,7 @@ pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pmaxsd(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
+/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
 /// maximum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32)
@@ -314,7 +314,7 @@ pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pmaxud(a.as_u32x4(), b.as_u32x4()))
 }
 
-/// Compare packed 8-bit integers in `a` and `b` and return packed minimum
+/// Compares packed 8-bit integers in `a` and `b` and returns packed minimum
 /// values in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi8)
@@ -326,7 +326,7 @@ pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pminsb(a.as_i8x16(), b.as_i8x16()))
 }
 
-/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
+/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
 /// minimum.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu16)
@@ -338,7 +338,7 @@ pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pminuw(a.as_u16x8(), b.as_u16x8()))
 }
 
-/// Compare packed 32-bit integers in `a` and `b`, and return packed minimum
+/// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum
 /// values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi32)
@@ -350,7 +350,7 @@ pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pminsd(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
+/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
 /// minimum values.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu32)
@@ -362,7 +362,7 @@ pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pminud(a.as_u32x4(), b.as_u32x4()))
 }
 
-/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using unsigned saturation
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi32)
@@ -374,7 +374,7 @@ pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(packusdw(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Compare packed 64-bit integers in `a` and `b` for equality
+/// Compares packed 64-bit integers in `a` and `b` for equality
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi64)
 #[inline]
@@ -464,7 +464,7 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
     mem::transmute(simd_cast::<_, i64x2>(a))
 }
 
-/// Zero extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
+/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi16)
 #[inline]
@@ -477,7 +477,7 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
     mem::transmute(simd_cast::<_, i16x8>(a))
 }
 
-/// Zero extend packed unsigned 8-bit integers in `a` to packed 32-bit integers
+/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi32)
 #[inline]
@@ -490,7 +490,7 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
     mem::transmute(simd_cast::<_, i32x4>(a))
 }
 
-/// Zero extend packed unsigned 8-bit integers in `a` to packed 64-bit integers
+/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi64)
 #[inline]
@@ -503,7 +503,7 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
     mem::transmute(simd_cast::<_, i64x2>(a))
 }
 
-/// Zero extend packed unsigned 16-bit integers in `a`
+/// Zeroes extend packed unsigned 16-bit integers in `a`
 /// to packed 32-bit integers
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi32)
@@ -517,7 +517,7 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
     mem::transmute(simd_cast::<_, i32x4>(a))
 }
 
-/// Zero extend packed unsigned 16-bit integers in `a`
+/// Zeroes extend packed unsigned 16-bit integers in `a`
 /// to packed 64-bit integers
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi64)
@@ -531,7 +531,7 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
     mem::transmute(simd_cast::<_, i64x2>(a))
 }
 
-/// Zero extend packed unsigned 32-bit integers in `a`
+/// Zeroes extend packed unsigned 32-bit integers in `a`
 /// to packed 64-bit integers
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_epi64)
@@ -592,7 +592,7 @@ pub unsafe fn _mm_dp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
 }
 
 /// Round the packed double-precision (64-bit) floating-point elements in `a`
-/// down to an integer value, and store the results as packed double-precision
+/// down to an integer value, and stores the results as packed double-precision
 /// floating-point elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_pd)
@@ -605,7 +605,7 @@ pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d {
 }
 
 /// Round the packed single-precision (32-bit) floating-point elements in `a`
-/// down to an integer value, and store the results as packed single-precision
+/// down to an integer value, and stores the results as packed single-precision
 /// floating-point elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps)
@@ -620,7 +620,7 @@ pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 {
 /// Round the lower double-precision (64-bit) floating-point element in `b`
 /// down to an integer value, store the result as a double-precision
 /// floating-point element in the lower element of the intrinsic result,
-/// and copy the upper element from `a` to the upper element of the intrinsic
+/// and copies the upper element from `a` to the upper element of the intrinsic
 /// result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_sd)
@@ -635,7 +635,7 @@ pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
 /// Round the lower single-precision (32-bit) floating-point element in `b`
 /// down to an integer value, store the result as a single-precision
 /// floating-point element in the lower element of the intrinsic result,
-/// and copy the upper 3 packed elements from `a` to the upper elements
+/// and copies the upper 3 packed elements from `a` to the upper elements
 /// of the intrinsic result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ss)
@@ -648,7 +648,7 @@ pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
 }
 
 /// Round the packed double-precision (64-bit) floating-point elements in `a`
-/// up to an integer value, and store the results as packed double-precision
+/// up to an integer value, and stores the results as packed double-precision
 /// floating-point elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_pd)
@@ -661,7 +661,7 @@ pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d {
 }
 
 /// Round the packed single-precision (32-bit) floating-point elements in `a`
-/// up to an integer value, and store the results as packed single-precision
+/// up to an integer value, and stores the results as packed single-precision
 /// floating-point elements.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps)
@@ -676,7 +676,7 @@ pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 {
 /// Round the lower double-precision (64-bit) floating-point element in `b`
 /// up to an integer value, store the result as a double-precision
 /// floating-point element in the lower element of the intrisic result,
-/// and copy the upper element from `a` to the upper element
+/// and copies the upper element from `a` to the upper element
 /// of the intrinsic result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_sd)
@@ -691,7 +691,7 @@ pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
 /// Round the lower single-precision (32-bit) floating-point element in `b`
 /// up to an integer value, store the result as a single-precision
 /// floating-point element in the lower element of the intrinsic result,
-/// and copy the upper 3 packed elements from `a` to the upper elements
+/// and copies the upper 3 packed elements from `a` to the upper elements
 /// of the intrinsic result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ss)
@@ -704,7 +704,7 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
 }
 
 /// Round the packed double-precision (64-bit) floating-point elements in `a`
-/// using the `rounding` parameter, and store the results as packed
+/// using the `rounding` parameter, and stores the results as packed
 /// double-precision floating-point elements.
 /// Rounding is done according to the rounding parameter, which can be one of:
 ///
@@ -749,7 +749,7 @@ pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
 }
 
 /// Round the packed single-precision (32-bit) floating-point elements in `a`
-/// using the `rounding` parameter, and store the results as packed
+/// using the `rounding` parameter, and stores the results as packed
 /// single-precision floating-point elements.
 /// Rounding is done according to the rounding parameter, which can be one of:
 ///
@@ -796,7 +796,7 @@ pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
 /// Round the lower double-precision (64-bit) floating-point element in `b`
 /// using the `rounding` parameter, store the result as a double-precision
 /// floating-point element in the lower element of the intrinsic result,
-/// and copy the upper element from `a` to the upper element of the intrinsic
+/// and copies the upper element from `a` to the upper element of the intrinsic
 /// result.
 /// Rounding is done according to the rounding parameter, which can be one of:
 ///
@@ -843,7 +843,7 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
 /// Round the lower single-precision (32-bit) floating-point element in `b`
 /// using the `rounding` parameter, store the result as a single-precision
 /// floating-point element in the lower element of the intrinsic result,
-/// and copy the upper 3 packed elements from `a` to the upper elements
+/// and copies the upper 3 packed elements from `a` to the upper elements
 /// of the instrinsic result.
 /// Rounding is done according to the rounding parameter, which can be one of:
 ///
@@ -916,8 +916,8 @@ pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i {
     mem::transmute(phminposuw(a.as_u16x8()))
 }
 
-/// Multiply the low 32-bit integers from each packed 64-bit
-/// element in `a` and `b`, and return the signed 64-bit result.
+/// Multiplies the low 32-bit integers from each packed 64-bit
+/// element in `a` and `b`, and returns the signed 64-bit result.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epi32)
 #[inline]
@@ -928,7 +928,7 @@ pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pmuldq(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Multiply the packed 32-bit integers in `a` and `b`, producing intermediate
+/// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate
 /// 64-bit integers, and returns the lowest 32-bit, whatever they might be,
 /// reinterpreted as a signed integer. While `pmulld __m128i::splat(2),
 /// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping
diff --git a/library/stdarch/crates/core_arch/src/x86/sse42.rs b/library/stdarch/crates/core_arch/src/x86/sse42.rs
index 5862673781f..0eae371e344 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse42.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse42.rs
@@ -39,13 +39,13 @@ pub const _SIDD_CMP_EQUAL_ORDERED: i32 = 0b0000_1100;
 /// Do not negate results *(Default)*
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub const _SIDD_POSITIVE_POLARITY: i32 = 0b0000_0000;
-/// Negate results
+/// Negates results
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub const _SIDD_NEGATIVE_POLARITY: i32 = 0b0001_0000;
 /// Do not negate results before the end of the string
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub const _SIDD_MASKED_POSITIVE_POLARITY: i32 = 0b0010_0000;
-/// Negate results only before the end of the string
+/// Negates results only before the end of the string
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub const _SIDD_MASKED_NEGATIVE_POLARITY: i32 = 0b0011_0000;
 
@@ -63,7 +63,7 @@ pub const _SIDD_BIT_MASK: i32 = 0b0000_0000;
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000;
 
-/// Compare packed strings with implicit lengths in `a` and `b` using the
+/// Compares packed strings with implicit lengths in `a` and `b` using the
 /// control in `imm8`, and return the generated mask.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrm)
@@ -83,7 +83,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
     mem::transmute(constify_imm8!(imm8, call))
 }
 
-/// Compare packed strings with implicit lengths in `a` and `b` using the
+/// Compares packed strings with implicit lengths in `a` and `b` using the
 /// control in `imm8` and return the generated index. Similar to
 /// [`_mm_cmpestri`] with the exception that [`_mm_cmpestri`] requires the
 /// lengths of `a` and `b` to be explicitly specified.
@@ -115,7 +115,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
 ///
 /// # Examples
 ///
-/// Find a substring using [`_SIDD_CMP_EQUAL_ORDERED`]
+/// Finds a substring using [`_SIDD_CMP_EQUAL_ORDERED`]
 ///
 /// ```
 /// #[cfg(target_arch = "x86")]
@@ -187,7 +187,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
 /// # }
 /// ```
 ///
-/// Find the index of the first character in the haystack that is within a
+/// Finds the index of the first character in the haystack that is within a
 /// range of characters.
 ///
 /// ```
@@ -291,7 +291,7 @@ pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 {
     constify_imm8!(imm8, call)
 }
 
-/// Compare packed strings with implicit lengths in `a` and `b` using the
+/// Compares packed strings with implicit lengths in `a` and `b` using the
 /// control in `imm8`, and return `1` if any character in `b` was null.
 /// and `0` otherwise.
 ///
@@ -312,7 +312,7 @@ pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 {
     constify_imm8!(imm8, call)
 }
 
-/// Compare packed strings with implicit lengths in `a` and `b` using the
+/// Compares packed strings with implicit lengths in `a` and `b` using the
 /// control in `imm8`, and return `1` if the resulting mask was non-zero,
 /// and `0` otherwise.
 ///
@@ -333,7 +333,7 @@ pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 {
     constify_imm8!(imm8, call)
 }
 
-/// Compare packed strings with implicit lengths in `a` and `b` using the
+/// Compares packed strings with implicit lengths in `a` and `b` using the
 /// control in `imm8`, and returns `1` if any character in `a` was null,
 /// and `0` otherwise.
 ///
@@ -354,7 +354,7 @@ pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 {
     constify_imm8!(imm8, call)
 }
 
-/// Compare packed strings with implicit lengths in `a` and `b` using the
+/// Compares packed strings with implicit lengths in `a` and `b` using the
 /// control in `imm8`, and return bit `0` of the resulting bit mask.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistro)
@@ -374,7 +374,7 @@ pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 {
     constify_imm8!(imm8, call)
 }
 
-/// Compare packed strings with implicit lengths in `a` and `b` using the
+/// Compares packed strings with implicit lengths in `a` and `b` using the
 /// control in `imm8`, and return `1` if `b` did not contain a null
 /// character and the resulting mask was zero, and `0` otherwise.
 ///
@@ -395,7 +395,7 @@ pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
     constify_imm8!(imm8, call)
 }
 
-/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
+/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
 /// using the control in `imm8`, and return the generated mask.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm)
@@ -415,7 +415,7 @@ pub unsafe fn _mm_cmpestrm(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32)
     mem::transmute(constify_imm8!(imm8, call))
 }
 
-/// Compare packed strings `a` and `b` with lengths `la` and `lb` using the
+/// Compares packed strings `a` and `b` with lengths `la` and `lb` using the
 /// control in `imm8` and return the generated index. Similar to
 /// [`_mm_cmpistri`] with the exception that [`_mm_cmpistri`] implicitly
 /// determines the length of `a` and `b`.
@@ -510,7 +510,7 @@ pub unsafe fn _mm_cmpestri(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32)
     constify_imm8!(imm8, call)
 }
 
-/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
+/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
 /// using the control in `imm8`, and return `1` if any character in
 /// `b` was null, and `0` otherwise.
 ///
@@ -531,7 +531,7 @@ pub unsafe fn _mm_cmpestrz(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32)
     constify_imm8!(imm8, call)
 }
 
-/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
+/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
 /// using the control in `imm8`, and return `1` if the resulting mask
 /// was non-zero, and `0` otherwise.
 ///
@@ -552,7 +552,7 @@ pub unsafe fn _mm_cmpestrc(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32)
     constify_imm8!(imm8, call)
 }
 
-/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
+/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
 /// using the control in `imm8`, and return `1` if any character in
 /// a was null, and `0` otherwise.
 ///
@@ -573,7 +573,7 @@ pub unsafe fn _mm_cmpestrs(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32)
     constify_imm8!(imm8, call)
 }
 
-/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
+/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
 /// using the control in `imm8`, and return bit `0` of the resulting
 /// bit mask.
 ///
@@ -594,7 +594,7 @@ pub unsafe fn _mm_cmpestro(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32)
     constify_imm8!(imm8, call)
 }
 
-/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
+/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
 /// using the control in `imm8`, and return `1` if `b` did not
 /// contain a null character and the resulting mask was zero, and `0`
 /// otherwise.
@@ -652,7 +652,7 @@ pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 {
     crc32_32_32(crc, v)
 }
 
-/// Compare packed 64-bit integers in `a` and `b` for greater-than,
+/// Compares packed 64-bit integers in `a` and `b` for greater-than,
 /// return the results.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi64)
diff --git a/library/stdarch/crates/core_arch/src/x86/ssse3.rs b/library/stdarch/crates/core_arch/src/x86/ssse3.rs
index a013ab6551a..c3d78571a5e 100644
--- a/library/stdarch/crates/core_arch/src/x86/ssse3.rs
+++ b/library/stdarch/crates/core_arch/src/x86/ssse3.rs
@@ -8,7 +8,7 @@ use mem;
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
-/// Compute the absolute value of packed 8-bit signed integers in `a` and
+/// Computes the absolute value of packed 8-bit signed integers in `a` and
 /// return the unsigned results.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8)
@@ -20,7 +20,7 @@ pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
     mem::transmute(pabsb128(a.as_i8x16()))
 }
 
-/// Compute the absolute value of each of the packed 16-bit signed integers in
+/// Computes the absolute value of each of the packed 16-bit signed integers in
 /// `a` and
 /// return the 16-bit unsigned integer
 ///
@@ -33,7 +33,7 @@ pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
     mem::transmute(pabsw128(a.as_i16x8()))
 }
 
-/// Compute the absolute value of each of the packed 32-bit signed integers in
+/// Computes the absolute value of each of the packed 32-bit signed integers in
 /// `a` and
 /// return the 32-bit unsigned integer
 ///
@@ -46,7 +46,7 @@ pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
     mem::transmute(pabsd128(a.as_i32x4()))
 }
 
-/// Shuffle bytes from `a` according to the content of `b`.
+/// Shuffles bytes from `a` according to the content of `b`.
 ///
 /// The last 4 bits of each byte of `b` are used as addresses
 /// into the 16 bytes of `a`.
@@ -81,7 +81,7 @@ pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
 }
 
 /// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
-/// shift the result right by `n` bytes, and return the low 16 bytes.
+/// shift the result right by `n` bytes, and returns the low 16 bytes.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8)
 #[inline]
@@ -154,7 +154,7 @@ pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i {
     mem::transmute(r)
 }
 
-/// Horizontally add the adjacent pairs of values contained in 2 packed
+/// Horizontally adds the adjacent pairs of values contained in 2 packed
 /// 128-bit vectors of `[8 x i16]`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi16)
@@ -166,7 +166,7 @@ pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(phaddw128(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Horizontally add the adjacent pairs of values contained in 2 packed
+/// Horizontally adds the adjacent pairs of values contained in 2 packed
 /// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
 /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
 ///
@@ -179,7 +179,7 @@ pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(phaddsw128(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Horizontally add the adjacent pairs of values contained in 2 packed
+/// Horizontally adds the adjacent pairs of values contained in 2 packed
 /// 128-bit vectors of `[4 x i32]`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi32)
@@ -229,7 +229,7 @@ pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(phsubd128(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Multiply corresponding pairs of packed 8-bit unsigned integer
+/// Multiplies corresponding pairs of packed 8-bit unsigned integer
 /// values contained in the first source operand and packed 8-bit signed
 /// integer values contained in the second source operand, add pairs of
 /// contiguous products with signed saturation, and writes the 16-bit sums to
@@ -244,7 +244,7 @@ pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16()))
 }
 
-/// Multiply packed 16-bit signed integer values, truncate the 32-bit
+/// Multiplies packed 16-bit signed integer values, truncate the 32-bit
 /// product to the 18 most significant bits by right-shifting, round the
 /// truncated value by adding 1, and write bits `[16:1]` to the destination.
 ///
@@ -257,8 +257,8 @@ pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
-/// integer in `b` is negative, and return the result.
+/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
+/// integer in `b` is negative, and returns the result.
 /// Elements in result are zeroed out when the corresponding element in `b`
 /// is zero.
 ///
@@ -271,8 +271,8 @@ pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(psignb128(a.as_i8x16(), b.as_i8x16()))
 }
 
-/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
-/// integer in `b` is negative, and return the results.
+/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
+/// integer in `b` is negative, and returns the results.
 /// Elements in result are zeroed out when the corresponding element in `b`
 /// is zero.
 ///
@@ -285,8 +285,8 @@ pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(psignw128(a.as_i16x8(), b.as_i16x8()))
 }
 
-/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
-/// integer in `b` is negative, and return the results.
+/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
+/// integer in `b` is negative, and returns the results.
 /// Element in result are zeroed out when the corresponding element in `b`
 /// is zero.
 ///
@@ -299,7 +299,7 @@ pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
     mem::transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Compute the absolute value of packed 8-bit integers in `a` and
+/// Computes the absolute value of packed 8-bit integers in `a` and
 /// return the unsigned results.
 #[inline]
 #[target_feature(enable = "ssse3,mmx")]
@@ -308,7 +308,7 @@ pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
     pabsb(a)
 }
 
-/// Compute the absolute value of packed 8-bit integers in `a`, and return the
+/// Computes the absolute value of packed 8-bit integers in `a`, and returns the
 /// unsigned results.
 #[inline]
 #[target_feature(enable = "ssse3,mmx")]
@@ -317,7 +317,7 @@ pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
     pabsw(a)
 }
 
-/// Compute the absolute value of packed 32-bit integers in `a`, and return the
+/// Computes the absolute value of packed 32-bit integers in `a`, and returns the
 /// unsigned results.
 #[inline]
 #[target_feature(enable = "ssse3,mmx")]
@@ -326,8 +326,8 @@ pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
     pabsd(a)
 }
 
-/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in
-/// the corresponding 8-bit element of `b`, and return the results
+/// Shuffles packed 8-bit integers in `a` according to shuffle control mask in
+/// the corresponding 8-bit element of `b`, and returns the results
 #[inline]
 #[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(pshufb))]
@@ -350,7 +350,7 @@ pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
     constify_imm8!(n, call)
 }
 
-/// Horizontally add the adjacent pairs of values contained in 2 packed
+/// Horizontally adds the adjacent pairs of values contained in 2 packed
 /// 64-bit vectors of `[4 x i16]`.
 #[inline]
 #[target_feature(enable = "ssse3,mmx")]
@@ -359,7 +359,7 @@ pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
     phaddw(a, b)
 }
 
-/// Horizontally add the adjacent pairs of values contained in 2 packed
+/// Horizontally adds the adjacent pairs of values contained in 2 packed
 /// 64-bit vectors of `[2 x i32]`.
 #[inline]
 #[target_feature(enable = "ssse3,mmx")]
@@ -368,7 +368,7 @@ pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
     phaddd(a, b)
 }
 
-/// Horizontally add the adjacent pairs of values contained in 2 packed
+/// Horizontally adds the adjacent pairs of values contained in 2 packed
 /// 64-bit vectors of `[4 x i16]`. Positive sums greater than 7FFFh are
 /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
 #[inline]
@@ -429,8 +429,8 @@ pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
     pmulhrsw(a, b)
 }
 
-/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
-/// integer in `b` is negative, and return the results.
+/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
+/// integer in `b` is negative, and returns the results.
 /// Element in result are zeroed out when the corresponding element in `b` is
 /// zero.
 #[inline]
@@ -440,8 +440,8 @@ pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
     psignb(a, b)
 }
 
-/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
-/// integer in `b` is negative, and return the results.
+/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
+/// integer in `b` is negative, and returns the results.
 /// Element in result are zeroed out when the corresponding element in `b` is
 /// zero.
 #[inline]
@@ -451,8 +451,8 @@ pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
     psignw(a, b)
 }
 
-/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
-/// integer in `b` is negative, and return the results.
+/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
+/// integer in `b` is negative, and returns the results.
 /// Element in result are zeroed out when the corresponding element in `b` is
 /// zero.
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/xsave.rs b/library/stdarch/crates/core_arch/src/x86/xsave.rs
index 4c7f5338a96..35f00e230b5 100644
--- a/library/stdarch/crates/core_arch/src/x86/xsave.rs
+++ b/library/stdarch/crates/core_arch/src/x86/xsave.rs
@@ -23,7 +23,7 @@ extern "C" {
     fn xrstors(p: *const u8, hi: u32, lo: u32) -> ();
 }
 
-/// Perform a full or partial save of the enabled processor states to memory at
+/// Performs a full or partial save of the enabled processor states to memory at
 /// `mem_addr`.
 ///
 /// State is saved based on bits `[62:0]` in `save_mask` and XCR0.
@@ -41,7 +41,7 @@ pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) {
     xsave(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
 }
 
-/// Perform a full or partial restore of the enabled processor states using
+/// Performs a full or partial restore of the enabled processor states using
 /// the state information stored in memory at `mem_addr`.
 ///
 /// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and
@@ -63,7 +63,7 @@ pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub const _XCR_XFEATURE_ENABLED_MASK: u32 = 0;
 
-/// Copy 64-bits from `val` to the extended control register (`XCR`) specified
+/// Copies 64-bits from `val` to the extended control register (`XCR`) specified
 /// by `a`.
 ///
 /// Currently only `XFEATURE_ENABLED_MASK` `XCR` is supported.
@@ -92,7 +92,7 @@ pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
     ((edx as u64) << 32) | (eax as u64)
 }
 
-/// Perform a full or partial save of the enabled processor states to memory at
+/// Performs a full or partial save of the enabled processor states to memory at
 /// `mem_addr`.
 ///
 /// State is saved based on bits `[62:0]` in `save_mask` and `XCR0`.
@@ -109,7 +109,7 @@ pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) {
     xsaveopt(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
 }
 
-/// Perform a full or partial save of the enabled processor states to memory
+/// Performs a full or partial save of the enabled processor states to memory
 /// at `mem_addr`.
 ///
 /// `xsavec` differs from `xsave` in that it uses compaction and that it may
@@ -125,7 +125,7 @@ pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) {
     xsavec(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
 }
 
-/// Perform a full or partial save of the enabled processor states to memory at
+/// Performs a full or partial save of the enabled processor states to memory at
 /// `mem_addr`
 ///
 /// `xsaves` differs from xsave in that it can save state components
@@ -142,7 +142,7 @@ pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) {
     xsaves(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
 }
 
-/// Perform a full or partial restore of the enabled processor states using the
+/// Performs a full or partial restore of the enabled processor states using the
 /// state information stored in memory at `mem_addr`.
 ///
 /// `xrstors` differs from `xrstor` in that it can restore state components
diff --git a/library/stdarch/crates/core_arch/src/x86_64/adx.rs b/library/stdarch/crates/core_arch/src/x86_64/adx.rs
index 812fff0b865..761b0e25e57 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/adx.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/adx.rs
@@ -11,8 +11,8 @@ extern "unadjusted" {
     fn llvm_subborrow_u64(a: u8, b: u64, c: u64) -> (u8, u64);
 }
 
-/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in `c_in`
-/// (carry flag), and store the unsigned 64-bit result in out, and the carry-out
+/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`
+/// (carry flag), and store the unsigned 64-bit result in `out`, and the carry-out
 /// is returned (carry or overflow flag).
 #[inline]
 #[cfg_attr(test, assert_instr(adc))]
@@ -23,8 +23,8 @@ pub unsafe fn _addcarry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 {
     a
 }
 
-/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in `c_in`
-/// (carry or overflow flag), and store the unsigned 64-bit result in out, and
+/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`
+/// (carry or overflow flag), and store the unsigned 64-bit result in `out`, and
 /// the carry-out is returned (carry or overflow flag).
 #[inline]
 #[target_feature(enable = "adx")]
@@ -35,8 +35,8 @@ pub unsafe fn _addcarryx_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 {
     llvm_addcarryx_u64(c_in, a, b, out as *mut _ as *mut u8)
 }
 
-/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in `c_in`
-/// (carry or overflow flag), and store the unsigned 64-bit result in out, and
+/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`.
+/// (carry or overflow flag), and store the unsigned 64-bit result in `out`, and
 /// the carry-out is returned (carry or overflow flag).
 #[inline]
 #[cfg_attr(test, assert_instr(sbb))]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx.rs b/library/stdarch/crates/core_arch/src/x86_64/avx.rs
index 429ee75c59a..bd4414a733b 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx.rs
@@ -17,7 +17,7 @@ use core_arch::simd_llvm::*;
 use core_arch::x86::*;
 use mem;
 
-/// Copy `a` to result, and insert the 64-bit integer `i` into result
+/// Copies `a` to result, and insert the 64-bit integer `i` into result
 /// at the location specified by `index`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi64)
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx2.rs b/library/stdarch/crates/core_arch/src/x86_64/avx2.rs
index a27f3125751..deab866f721 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx2.rs
@@ -21,7 +21,7 @@
 use core_arch::simd_llvm::*;
 use core_arch::x86::*;
 
-/// Extract a 64-bit integer from `a`, selected with `imm8`.
+/// Extracts a 64-bit integer from `a`, selected with `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi64)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
index be3ced9e251..dc86cb0cb22 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
@@ -52,7 +52,7 @@ pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 {
     !a & b
 }
 
-/// Extract lowest set isolated bit.
+/// Extracts lowest set isolated bit.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u64)
 #[inline]
@@ -64,7 +64,7 @@ pub unsafe fn _blsi_u64(x: u64) -> u64 {
     x & x.wrapping_neg()
 }
 
-/// Get mask up to lowest set bit.
+/// Gets mask up to lowest set bit.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u64)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
index 5e0ab83f681..4b1d1931360 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
@@ -30,7 +30,7 @@ pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 {
     result as u64
 }
 
-/// Zero higher bits of `a` >= `index`.
+/// Zeroes higher bits of `a` >= `index`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u64)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
index ba121d67006..6a94a42fc60 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
@@ -5,7 +5,7 @@
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
-/// Return an integer with the reversed byte order of x
+/// Returns an integer with the reversed byte order of x
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap64)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
index 822bfc2fb3d..c48ed7525ba 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
@@ -3,7 +3,7 @@ use sync::atomic::Ordering;
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
-/// Compare and exchange 16 bytes (128 bits) of data atomically.
+/// Compares and exchange 16 bytes (128 bits) of data atomically.
 ///
 /// This intrinsic corresponds to the `cmpxchg16b` instruction on `x86_64`
 /// processors. It performs an atomic compare-and-swap, updating the `ptr`
diff --git a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
index aef4f638ebe..40b781d4062 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
@@ -16,7 +16,7 @@ extern "unadjusted" {
 use stdsimd_test::assert_instr;
 
 /// Read a hardware generated 64-bit random value and store the result in val.
-/// Return 1 if a random value was generated, and 0 otherwise.
+/// Returns 1 if a random value was generated, and 0 otherwise.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand64_step)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse.rs b/library/stdarch/crates/core_arch/src/x86_64/sse.rs
index a3126e72e94..765bd7b9467 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse.rs
@@ -15,7 +15,7 @@ extern "C" {
     fn cvtsi642ss(a: __m128, b: i64) -> __m128;
 }
 
-/// Convert the lowest 32 bit float in the input vector to a 64 bit integer.
+/// Converts the lowest 32 bit float in the input vector to a 64 bit integer.
 ///
 /// The result is rounded according to the current rounding mode. If the result
 /// cannot be represented as a 64 bit integer the result will be
@@ -34,7 +34,7 @@ pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 {
     cvtss2si64(a)
 }
 
-/// Convert the lowest 32 bit float in the input vector to a 64 bit integer
+/// Converts the lowest 32 bit float in the input vector to a 64 bit integer
 /// with truncation.
 ///
 /// The result is rounded always using truncation (round towards zero). If the
@@ -53,7 +53,7 @@ pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 {
     cvttss2si64(a)
 }
 
-/// Convert a 64 bit integer to a 32 bit float. The result vector is the input
+/// Converts a 64 bit integer to a 32 bit float. The result vector is the input
 /// vector `a` with the lowest 32 bit float replaced by the converted integer.
 ///
 /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse2.rs b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs
index 779be0a5930..570a75236ae 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs
@@ -15,7 +15,7 @@ extern "C" {
     fn cvttsd2si64(a: __m128d) -> i64;
 }
 
-/// Convert the lower double-precision (64-bit) floating-point element in a to
+/// Converts the lower double-precision (64-bit) floating-point element in a to
 /// a 64-bit integer.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64)
@@ -38,7 +38,7 @@ pub unsafe fn _mm_cvtsd_si64x(a: __m128d) -> i64 {
     _mm_cvtsd_si64(a)
 }
 
-/// Convert the lower double-precision (64-bit) floating-point element in `a`
+/// Converts the lower double-precision (64-bit) floating-point element in `a`
 /// to a 64-bit integer with truncation.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64)
@@ -74,7 +74,7 @@ pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
     intrinsics::nontemporal_store(mem_addr, a);
 }
 
-/// Return a vector whose lowest element is `a` and all higher elements are
+/// Returns a vector whose lowest element is `a` and all higher elements are
 /// `0`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_si128)
@@ -86,7 +86,7 @@ pub unsafe fn _mm_cvtsi64_si128(a: i64) -> __m128i {
     _mm_set_epi64x(0, a)
 }
 
-/// Return a vector whose lowest element is `a` and all higher elements are
+/// Returns a vector whose lowest element is `a` and all higher elements are
 /// `0`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_si128)
@@ -98,7 +98,7 @@ pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i {
     _mm_cvtsi64_si128(a)
 }
 
-/// Return the lowest element of `a`.
+/// Returns the lowest element of `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64)
 #[inline]
@@ -109,7 +109,7 @@ pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 {
     simd_extract(a.as_i64x2(), 0)
 }
 
-/// Return the lowest element of `a`.
+/// Returns the lowest element of `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x)
 #[inline]
@@ -120,7 +120,7 @@ pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 {
     _mm_cvtsi128_si64(a)
 }
 
-/// Return `a` with its lower element replaced by `b` after converting it to
+/// Returns `a` with its lower element replaced by `b` after converting it to
 /// an `f64`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_sd)
@@ -132,7 +132,7 @@ pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d {
     simd_insert(a, 0, b as f64)
 }
 
-/// Return `a` with its lower element replaced by `b` after converting it to
+/// Returns `a` with its lower element replaced by `b` after converting it to
 /// an `f64`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_sd)
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse41.rs b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs
index 9a22370019c..cbad2609cd1 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/sse41.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs
@@ -7,7 +7,7 @@ use mem;
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
-/// Extract an 64-bit integer from `a` selected with `imm8`
+/// Extracts an 64-bit integer from `a` selected with `imm8`
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi64)
 #[inline]
@@ -20,7 +20,7 @@ pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 {
     simd_extract(a.as_i64x2(), imm8)
 }
 
-/// Return a copy of `a` with the 64-bit integer from `i` inserted at a
+/// Returns a copy of `a` with the 64-bit integer from `i` inserted at a
 /// location specified by `imm8`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi64)
diff --git a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
index 7531ac5832e..50f1f5481b3 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
@@ -21,7 +21,7 @@ extern "C" {
     fn xrstors64(p: *const u8, hi: u32, lo: u32) -> ();
 }
 
-/// Perform a full or partial save of the enabled processor states to memory at
+/// Performs a full or partial save of the enabled processor states to memory at
 /// `mem_addr`.
 ///
 /// State is saved based on bits `[62:0]` in `save_mask` and XCR0.
@@ -39,7 +39,7 @@ pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) {
     xsave64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
 }
 
-/// Perform a full or partial restore of the enabled processor states using
+/// Performs a full or partial restore of the enabled processor states using
 /// the state information stored in memory at `mem_addr`.
 ///
 /// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and
@@ -55,7 +55,7 @@ pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) {
     xrstor64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
 }
 
-/// Perform a full or partial save of the enabled processor states to memory at
+/// Performs a full or partial save of the enabled processor states to memory at
 /// `mem_addr`.
 ///
 /// State is saved based on bits `[62:0]` in `save_mask` and `XCR0`.
@@ -72,7 +72,7 @@ pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) {
     xsaveopt64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
 }
 
-/// Perform a full or partial save of the enabled processor states to memory
+/// Performs a full or partial save of the enabled processor states to memory
 /// at `mem_addr`.
 ///
 /// `xsavec` differs from `xsave` in that it uses compaction and that it may
@@ -88,7 +88,7 @@ pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) {
     xsavec64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
 }
 
-/// Perform a full or partial save of the enabled processor states to memory at
+/// Performs a full or partial save of the enabled processor states to memory at
 /// `mem_addr`
 ///
 /// `xsaves` differs from xsave in that it can save state components
@@ -105,7 +105,7 @@ pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) {
     xsaves64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
 }
 
-/// Perform a full or partial restore of the enabled processor states using the
+/// Performs a full or partial restore of the enabled processor states using the
 /// state information stored in memory at `mem_addr`.
 ///
 /// `xrstors` differs from `xrstor` in that it can restore state components
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 6a3e11de3df..580d7111bdd 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -6,9 +6,8 @@
 //! whether a feature is available (bit is set) or unavaiable (bit is cleared).
 //!
 //! The enum `Feature` is used to map bit positions to feature names, and the
-//! the `__crate::detect::check_for!` macro is used to map string literals (e.g.
-//! "avx") to these bit positions (e.g. `Feature::avx`).
-//!
+//! the `__crate::detect::check_for!` macro is used to map string literals (e.g.,
+//! "avx") to these bit positions (e.g., `Feature::avx`).
 //!
 //! The run-time feature detection is performed by the
 //! `__crate::detect::check_for(Feature) -> bool` function. On its first call,
@@ -20,7 +19,7 @@
 /// x86/x86-64 platforms.
 ///
 /// This macro is provided in the standard library and will detect at runtime
-/// whether the specified CPU feature is detected. This does *not* resolve at
+/// whether the specified CPU feature is detected. This does **not** resolve at
 /// compile time unless the specified feature is already enabled for the entire
 /// crate. Runtime detection currently relies mostly on the `cpuid` instruction.
 ///
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index ee1914875cf..ac1f3e4fae7 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -147,7 +147,7 @@ impl Cache {
     }
 }
 
-/// Test the `bit` of the storage. If the storage has not been initialized,
+/// Tests the `bit` of the storage. If the storage has not been initialized,
 /// initializes it with the result of `f()`.
 ///
 /// On its first invocation, it detects the CPU features and caches them in the
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 6ccdbbc88a8..5d935a26c89 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -134,7 +134,7 @@ fn getauxval(key: usize) -> Result<usize, ()> {
 fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
     let mut file = File::open(file).map_err(|_| ())?;
 
-    // See https://github.com/torvalds/linux/blob/v3.19/include/uapi/linux/auxvec.h
+    // See <https://github.com/torvalds/linux/blob/v3.19/include/uapi/linux/auxvec.h>.
     //
     // The auxiliary vector contains at most 32 (key,value) fields: from
     // `AT_EXECFN = 31` to `AT_NULL = 0`. That is, a buffer of
diff --git a/library/stdarch/crates/stdsimd-test/src/lib.rs b/library/stdarch/crates/stdsimd-test/src/lib.rs
index dec44401d9b..5c9b27f669a 100644
--- a/library/stdarch/crates/stdsimd-test/src/lib.rs
+++ b/library/stdarch/crates/stdsimd-test/src/lib.rs
@@ -23,7 +23,7 @@ pub use assert_instr_macro::*;
 pub use simd_test_macro::*;
 use std::{collections::HashMap, env, str};
 
-// println! doesn't work on wasm32 right now, so shadow the compiler's println!
+// `println!` doesn't work on wasm32 right now, so shadow the compiler's `println!`
 // macro with our own shim that redirects to `console.log`.
 #[allow(unused)]
 #[cfg(target_arch = "wasm32")]
@@ -64,7 +64,7 @@ fn normalize(symbol: &str) -> String {
         None => symbol.to_string(),
     };
     // Normalize to no leading underscore to handle platforms that may
-    // inject extra ones in symbol names
+    // inject extra ones in symbol names.
     while ret.starts_with("_") {
         ret.remove(0);
     }
@@ -90,9 +90,9 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
     // function, returning if we do indeed find it.
     let mut found = false;
     for instr in instrs {
-        // Gets the first instruction, e.g. tzcntl in tzcntl %rax,%rax
+        // Get the first instruction, e.g., tzcntl in tzcntl %rax,%rax.
         if let Some(part) = instr.parts.get(0) {
-            // Truncates the instruction with the length of the expected
+            // Truncate the instruction with the length of the expected
             // instruction: tzcntl => tzcnt and compares that.
             if part.starts_with(expected) {
                 found = true;
@@ -136,17 +136,15 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
         .ok()
         .map_or_else(
             || match expected {
-                // cpuid returns a pretty big aggregate structure so exempt it
-                // from the slightly more restrictive 22
-                // instructions below
+                // `cpuid` returns a pretty big aggregate structure, so exempt
+                // it from the slightly more restrictive 22 instructions below.
                 "cpuid" => 30,
 
-                // Apparently on Windows LLVM generates a bunch of
-                // saves/restores of xmm registers around these
-                // intstructions which blows the 20 limit
-                // below. As it seems dictates by Windows's abi
-                // (I guess?) we probably can't do much
-                // about it...
+                // Apparently, on Windows, LLVM generates a bunch of
+                // saves/restores of xmm registers around these intstructions,
+                // which exceeds the limit of 20 below. As it seems dictated by
+                // Windows's ABI (I believe?), we probably can't do much
+                // about it.
                 "vzeroall" | "vzeroupper" if cfg!(windows) => 30,
 
                 // Intrinsics using `cvtpi2ps` are typically "composites" and
@@ -154,9 +152,8 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
                 "cvtpi2ps" => 25,
 
                 // Original limit was 20 instructions, but ARM DSP Intrinsics
-                // are exactly 20 instructions long. So bump
-                // the limit to 22 instead of adding here a
-                // long list of exceptions.
+                // are exactly 20 instructions long. So, bump the limit to 22
+                // instead of adding here a long list of exceptions.
                 _ => 22,
             },
             |v| v.parse().unwrap(),
-- 
cgit 1.4.1-3-g733a5


From b322f1c03aa5de204317163322b5e22a96078d0d Mon Sep 17 00:00:00 2001
From: Alexander Regueiro <alexreg@me.com>
Date: Mon, 18 Feb 2019 15:36:32 +0000
Subject: A few cosmetic improvements.

---
 library/stdarch/crates/core_arch/src/x86/bmi1.rs             |  6 +++---
 library/stdarch/crates/core_arch/src/x86/cpuid.rs            |  2 +-
 library/stdarch/crates/core_arch/src/x86/sse.rs              | 12 ++++++------
 library/stdarch/crates/core_arch/src/x86/tbm.rs              |  4 ++--
 library/stdarch/crates/core_arch/src/x86_64/bmi.rs           |  6 +++---
 .../stdarch/crates/std_detect/src/detect/os/linux/mips.rs    |  4 ++--
 library/stdarch/crates/stdsimd-verify/tests/arm.rs           |  4 ++--
 7 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/x86/bmi1.rs b/library/stdarch/crates/core_arch/src/x86/bmi1.rs
index 47a62d1f64b..1e37496a441 100644
--- a/library/stdarch/crates/core_arch/src/x86/bmi1.rs
+++ b/library/stdarch/crates/core_arch/src/x86/bmi1.rs
@@ -87,7 +87,7 @@ pub unsafe fn _blsr_u32(x: u32) -> u32 {
 
 /// Counts the number of trailing least significant zero bits.
 ///
-/// When the source operand is 0, it returns its size in bits.
+/// When the source operand is `0`, it returns its size in bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u32)
 #[inline]
@@ -100,7 +100,7 @@ pub unsafe fn _tzcnt_u32(x: u32) -> u32 {
 
 /// Counts the number of trailing least significant zero bits.
 ///
-/// When the source operand is 0, it returns its size in bits.
+/// When the source operand is `0`, it returns its size in bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_32)
 #[inline]
@@ -164,7 +164,7 @@ mod tests {
 
     #[simd_test(enable = "bmi1")]
     unsafe fn test_blsr_u32() {
-        // TODO: test the behavior when the input is 0
+        // TODO: test the behavior when the input is `0`.
         let r = _blsr_u32(0b0011_0000u32);
         assert_eq!(r, 0b0010_0000u32);
     }
diff --git a/library/stdarch/crates/core_arch/src/x86/cpuid.rs b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
index d796995ad38..4b639619f28 100644
--- a/library/stdarch/crates/core_arch/src/x86/cpuid.rs
+++ b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
@@ -148,7 +148,7 @@ pub fn has_cpuid() -> bool {
             // was modified or not.
             //
             // If the result is zero, the cpuid bit was not modified.
-            // If the result is 0x200000 (non-zero), then the cpuid
+            // If the result is `0x200000` (non-zero), then the cpuid
             // was correctly modified and the CPU supports the cpuid
             // instruction:
             (result & 0x200000) != 0
diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index 96ea08697c0..8e2053da5e4 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -1126,8 +1126,8 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
     all(test, target_arch = "x86", not(target_feature = "sse2")),
     assert_instr(shufps)
 )]
-// TODO: This function is actually not limited to floats, but that's what
-// what matches the C type most closely: (__m128, *const __m64) -> __m128
+// TODO: this function is actually not limited to floats, but that's what
+// what matches the C type most closely: `(__m128, *const __m64) -> __m128`.
 pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
     let q = p as *const f32x2;
     let b: f32x2 = *q;
@@ -2832,7 +2832,7 @@ mod tests {
 
     #[simd_test(enable = "sse")]
     unsafe fn test_mm_cmpnlt_ss() {
-        // TODO: This test is exactly the same as for _mm_cmpge_ss, but there
+        // TODO: this test is exactly the same as for `_mm_cmpge_ss`, but there
         // must be a difference. It may have to do with behavior in the
         // presence of NaNs (signaling or quiet). If so, we should add tests
         // for those.
@@ -2861,7 +2861,7 @@ mod tests {
 
     #[simd_test(enable = "sse")]
     unsafe fn test_mm_cmpnle_ss() {
-        // TODO: This test is exactly the same as for _mm_cmpgt_ss, but there
+        // TODO: this test is exactly the same as for `_mm_cmpgt_ss`, but there
         // must be a difference. It may have to do with behavior in the
         // presence
         // of NaNs (signaling or quiet). If so, we should add tests for those.
@@ -2890,7 +2890,7 @@ mod tests {
 
     #[simd_test(enable = "sse")]
     unsafe fn test_mm_cmpngt_ss() {
-        // TODO: This test is exactly the same as for _mm_cmple_ss, but there
+        // TODO: this test is exactly the same as for `_mm_cmple_ss`, but there
         // must be a difference. It may have to do with behavior in the
         // presence of NaNs (signaling or quiet). If so, we should add tests
         // for those.
@@ -2919,7 +2919,7 @@ mod tests {
 
     #[simd_test(enable = "sse")]
     unsafe fn test_mm_cmpnge_ss() {
-        // TODO: This test is exactly the same as for _mm_cmplt_ss, but there
+        // TODO: this test is exactly the same as for `_mm_cmplt_ss`, but there
         // must be a difference. It may have to do with behavior in the
         // presence of NaNs (signaling or quiet). If so, we should add tests
         // for those.
diff --git a/library/stdarch/crates/core_arch/src/x86/tbm.rs b/library/stdarch/crates/core_arch/src/x86/tbm.rs
index 314c5e36c7f..840974df9c2 100644
--- a/library/stdarch/crates/core_arch/src/x86/tbm.rs
+++ b/library/stdarch/crates/core_arch/src/x86/tbm.rs
@@ -230,7 +230,7 @@ pub unsafe fn _blsic_u64(x: u64) -> u64 {
 /// Clears all bits below the least significant zero of `x` and sets all other
 /// bits.
 ///
-/// If the least significant bit of `x` is 0, it sets all bits.
+/// If the least significant bit of `x` is `0`, it sets all bits.
 #[inline]
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(t1mskc))]
@@ -242,7 +242,7 @@ pub unsafe fn _t1mskc_u32(x: u32) -> u32 {
 /// Clears all bits below the least significant zero of `x` and sets all other
 /// bits.
 ///
-/// If the least significant bit of `x` is 0, it sets all bits.
+/// If the least significant bit of `x` is `0`, it sets all bits.
 #[inline]
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(t1mskc))]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
index dc86cb0cb22..c8c0f31d80f 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
@@ -92,7 +92,7 @@ pub unsafe fn _blsr_u64(x: u64) -> u64 {
 
 /// Counts the number of trailing least significant zero bits.
 ///
-/// When the source operand is 0, it returns its size in bits.
+/// When the source operand is `0`, it returns its size in bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u64)
 #[inline]
@@ -105,7 +105,7 @@ pub unsafe fn _tzcnt_u64(x: u64) -> u64 {
 
 /// Counts the number of trailing least significant zero bits.
 ///
-/// When the source operand is 0, it returns its size in bits.
+/// When the source operand is `0`, it returns its size in bits.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_64)
 #[inline]
@@ -170,7 +170,7 @@ mod tests {
 
     #[simd_test(enable = "bmi1")]
     unsafe fn test_blsr_u64() {
-        // TODO: test the behavior when the input is 0
+        // TODO: test the behavior when the input is `0`.
         let r = _blsr_u64(0b0011_0000u64);
         assert_eq!(r, 0b0010_0000u64);
     }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
index 7c180326feb..c0a5fb2e5d8 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
@@ -10,7 +10,7 @@ pub fn check_for(x: Feature) -> bool {
 }
 
 /// Try to read the features from the auxiliary vector, and if that fails, try
-/// to read them from /proc/cpuinfo.
+/// to read them from `/proc/cpuinfo`.
 fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
     let enable_feature = |value: &mut cache::Initializer, f, enable| {
@@ -26,6 +26,6 @@ fn detect_features() -> cache::Initializer {
         enable_feature(&mut value, Feature::msa, bit::test(auxv.hwcap, 1));
         return value;
     }
-    // TODO: fall back via cpuinfo
+    // TODO: fall back via `cpuinfo`.
     value
 }
diff --git a/library/stdarch/crates/stdsimd-verify/tests/arm.rs b/library/stdarch/crates/stdsimd-verify/tests/arm.rs
index 32f64304ec7..1b418f3442e 100644
--- a/library/stdarch/crates/stdsimd-verify/tests/arm.rs
+++ b/library/stdarch/crates/stdsimd-verify/tests/arm.rs
@@ -206,7 +206,7 @@ fn verify_all_signatures() {
             Some(i) => i,
             None => {
                 // Skip all these intrinsics as they're not listed in NEON
-                // descriptions online
+                // descriptions online.
                 //
                 // TODO: we still need to verify these intrinsics or find a
                 // reference for them, need to figure out where though!
@@ -290,7 +290,7 @@ fn matches(rust: &Function, arm: &Intrinsic) -> Result<(), String> {
         }
     }
 
-    // TODO: verify `target_feature`
+    // TODO: verify `target_feature`.
 
     Ok(())
 }
-- 
cgit 1.4.1-3-g733a5


From 61414fdd629426386fa498c4339fc99a6ed45351 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Tue, 19 Feb 2019 23:16:07 +0900
Subject: Change imports in std_detect to edition-agnostic style

---
 library/stdarch/crates/std_detect/src/detect/cache.rs           | 6 +++---
 library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs  | 2 +-
 library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs | 2 +-
 library/stdarch/crates/std_detect/src/detect/os/x86.rs          | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index ac1f3e4fae7..422bd6817da 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -3,13 +3,13 @@
 
 #![allow(dead_code)] // not used on all platforms
 
-use sync::atomic::Ordering;
+use crate::sync::atomic::Ordering;
 
 #[cfg(target_pointer_width = "64")]
-use sync::atomic::AtomicU64;
+use crate::sync::atomic::AtomicU64;
 
 #[cfg(target_pointer_width = "32")]
-use sync::atomic::AtomicU32;
+use crate::sync::atomic::AtomicU32;
 
 /// Sets the `bit` of `x`.
 #[inline]
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
index 1c73cefd47d..952b4baf0fd 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
@@ -5,7 +5,7 @@ cfg_if! {
         mod aarch64;
         pub use self::aarch64::check_for;
     } else {
-        use arch::detect::Feature;
+        use crate::arch::detect::Feature;
         /// Performs run-time feature detection.
         pub fn check_for(_x: Feature) -> bool {
             false
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 5d935a26c89..07b6432eafd 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -2,7 +2,7 @@
 #![cfg_attr(not(target_arch = "aarch64"), allow(dead_code))]
 
 #[cfg(feature = "std_detect_file_io")]
-use ::{fs::File, io::Read};
+use crate::{fs::File, io::Read};
 
 /// Key to access the CPU Hardware capabilities bitfield.
 pub(crate) const AT_HWCAP: usize = 16;
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 30199b1f44f..675601536fa 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -1,11 +1,11 @@
 //! x86 run-time feature detection is OS independent.
 
 #[cfg(target_arch = "x86")]
-use arch::x86::*;
+use crate::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
-use arch::x86_64::*;
+use crate::arch::x86_64::*;
 
-use mem;
+use crate::mem;
 
 use crate::detect::{Feature, cache, bit};
 
-- 
cgit 1.4.1-3-g733a5


From 5238498dbdabdda0e73aa4ab7abfab04022acee9 Mon Sep 17 00:00:00 2001
From: myfreeweb <greg@unrelenting.technology>
Date: Wed, 27 Feb 2019 21:34:33 +0300
Subject: aarch64: escape square brackets in docs

To comply with deny(intra_doc_link_resolution_failure)
---
 library/stdarch/crates/std_detect/src/detect/os/aarch64.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
index f28d15a7c3e..dfb8c87707f 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -6,7 +6,7 @@
 //! AArch64 system registers ID_AA64ISAR0_EL1, ID_AA64PFR0_EL1, ID_AA64ISAR1_EL1
 //! have bits dedicated to features like AdvSIMD, CRC32, AES, atomics (LSE), etc.
 //! Each part of the register indicates the level of support for a certain feature, e.g.
-//! when ID_AA64ISAR0_EL1[7:4] is >= 1, AES is supported; when it's >= 2, PMULL is supported.
+//! when ID_AA64ISAR0_EL1\[7:4\] is >= 1, AES is supported; when it's >= 2, PMULL is supported.
 //!
 //! For proper support of [SoCs where different cores have different capabilities](https://medium.com/@jadr2ddude/a-big-little-problem-a-tale-of-big-little-gone-wrong-e7778ce744bb),
 //! the OS has to always report only the features supported by all cores, like [FreeBSD does](https://reviews.freebsd.org/D17137#393947).
-- 
cgit 1.4.1-3-g733a5


From eac0c6c8c1c4009fbc37bb54932e6240de005cb5 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 16 Apr 2019 10:50:30 +0200
Subject: Migrate clippy lints to tool lints

---
 library/stdarch/crates/core_arch/src/lib.rs    | 33 +++++++++++---------------
 library/stdarch/crates/std_detect/src/lib.rs   |  7 ++----
 library/stdarch/crates/stdsimd-test/src/lib.rs |  6 +----
 3 files changed, 17 insertions(+), 29 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index fb621808b60..ea9a6206728 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -35,25 +35,20 @@
     external_doc
 )]
 #![cfg_attr(test, feature(test, abi_vectorcall, untagged_unions))]
-#![cfg_attr(
-    feature = "cargo-clippy",
-    deny(clippy::missing_inline_in_public_items,)
-)]
-#![cfg_attr(
-    feature = "cargo-clippy",
-    allow(
-        clippy::inline_always,
-        clippy::too_many_arguments,
-        clippy::cast_sign_loss,
-        clippy::cast_lossless,
-        clippy::cast_possible_wrap,
-        clippy::cast_possible_truncation,
-        clippy::cast_precision_loss,
-        clippy::shadow_reuse,
-        clippy::cyclomatic_complexity,
-        clippy::similar_names,
-        clippy::many_single_char_names
-    )
+#![deny(clippy::missing_inline_in_public_items)]
+#![allow(
+    clippy::inline_always,
+    clippy::too_many_arguments,
+    clippy::cast_sign_loss,
+    clippy::cast_lossless,
+    clippy::cast_possible_wrap,
+    clippy::cast_possible_truncation,
+    clippy::cast_precision_loss,
+    clippy::shadow_reuse,
+    clippy::cyclomatic_complexity,
+    clippy::cognitive_complexity,
+    clippy::similar_names,
+    clippy::many_single_char_names
 )]
 #![cfg_attr(test, allow(unused_imports))]
 #![no_std]
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index da54cc73592..8b3bb304f1c 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -13,11 +13,8 @@
 
 #![unstable(feature = "stdsimd", issue = "27731")]
 #![feature(const_fn, staged_api, stdsimd, doc_cfg, allow_internal_unstable)]
-#![cfg_attr(feature = "cargo-clippy", allow(clippy::shadow_reuse))]
-#![cfg_attr(
-    feature = "cargo-clippy",
-    deny(clippy::missing_inline_in_public_items,)
-)]
+#![allow(clippy::shadow_reuse)]
+#![deny(clippy::missing_inline_in_public_items)]
 #![cfg_attr(target_os = "linux", feature(linkage))]
 #![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))]
 #![cfg_attr(stdsimd_strict, deny(warnings))]
diff --git a/library/stdarch/crates/stdsimd-test/src/lib.rs b/library/stdarch/crates/stdsimd-test/src/lib.rs
index af1a9a28069..d20d3811ff2 100644
--- a/library/stdarch/crates/stdsimd-test/src/lib.rs
+++ b/library/stdarch/crates/stdsimd-test/src/lib.rs
@@ -3,11 +3,7 @@
 //! This basically just disassembles the current executable and then parses the
 //! output once globally and then provides the `assert` function which makes
 //! assertions about the disassembly of a function.
-
-#![cfg_attr(
-    feature = "cargo-clippy",
-    allow(clippy::missing_docs_in_private_items, clippy::print_stdout)
-)]
+#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]
 
 extern crate assert_instr_macro;
 extern crate backtrace;
-- 
cgit 1.4.1-3-g733a5


From 459afa7a592b6bc140836d0c426f854d63887c27 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 16 Apr 2019 11:01:19 +0200
Subject: Fix clippy issues

---
 .../stdarch/crates/assert-instr-macro/src/lib.rs    |  2 +-
 library/stdarch/crates/core_arch/src/lib.rs         |  1 -
 library/stdarch/crates/core_arch/src/macros.rs      |  3 +--
 library/stdarch/crates/core_arch/src/simd.rs        |  4 ++--
 library/stdarch/crates/core_arch/src/x86/adx.rs     |  3 +--
 library/stdarch/crates/core_arch/src/x86/avx.rs     | 18 +++++++++---------
 library/stdarch/crates/core_arch/src/x86/bswap.rs   |  3 +--
 library/stdarch/crates/core_arch/src/x86/cpuid.rs   | 10 +++-------
 library/stdarch/crates/core_arch/src/x86/rdrand.rs  |  3 +--
 library/stdarch/crates/core_arch/src/x86/sse.rs     | 10 +++++-----
 library/stdarch/crates/core_arch/src/x86/sse2.rs    | 12 ++++++------
 library/stdarch/crates/core_arch/src/x86/xsave.rs   |  3 +--
 .../stdarch/crates/core_arch/src/x86_64/bswap.rs    |  2 +-
 .../stdarch/crates/core_arch/src/x86_64/rdrand.rs   |  2 +-
 .../stdarch/crates/core_arch/src/x86_64/xsave.rs    |  2 +-
 .../stdarch/crates/core_arch/tests/cpu-detection.rs |  5 +----
 .../stdarch/crates/std_detect/src/detect/cache.rs   |  9 +++------
 .../stdarch/crates/std_detect/src/detect/os/x86.rs  |  2 +-
 .../crates/std_detect/tests/cpu-detection.rs        |  5 +----
 .../std_detect/tests/macro_trailing_commas.rs       |  5 +----
 .../stdarch/crates/stdsimd-test/src/disassembly.rs  |  6 +++---
 library/stdarch/crates/stdsimd-test/src/lib.rs      |  4 ++--
 library/stdarch/crates/stdsimd-verify/src/lib.rs    | 12 ++++++------
 library/stdarch/examples/hex.rs                     | 21 +++++++++------------
 24 files changed, 61 insertions(+), 86 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/assert-instr-macro/src/lib.rs b/library/stdarch/crates/assert-instr-macro/src/lib.rs
index 4525398b759..ba33dcfda92 100644
--- a/library/stdarch/crates/assert-instr-macro/src/lib.rs
+++ b/library/stdarch/crates/assert-instr-macro/src/lib.rs
@@ -47,7 +47,7 @@ pub fn assert_instr(
         .replace('.', "_")
         .replace('/', "_")
         .replace(':', "_")
-        .replace(|c: char| c.is_whitespace(), "");
+        .replace(char::is_whitespace, "");
     let assert_name = syn::Ident::new(&format!("assert_{}_{}", name, instr_str), name.span());
     let shim_name = syn::Ident::new(&format!("{}_shim_{}", name, instr_str), name.span());
     let mut inputs = Vec::new();
diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index ea9a6206728..440b0546dbd 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -45,7 +45,6 @@
     clippy::cast_possible_truncation,
     clippy::cast_precision_loss,
     clippy::shadow_reuse,
-    clippy::cyclomatic_complexity,
     clippy::cognitive_complexity,
     clippy::similar_names,
     clippy::many_single_char_names
diff --git a/library/stdarch/crates/core_arch/src/macros.rs b/library/stdarch/crates/core_arch/src/macros.rs
index 74a01be7709..a636a0c0bcf 100644
--- a/library/stdarch/crates/core_arch/src/macros.rs
+++ b/library/stdarch/crates/core_arch/src/macros.rs
@@ -275,8 +275,7 @@ macro_rules! types {
         #[derive(Copy, Clone, Debug)]
         #[allow(non_camel_case_types)]
         #[repr(simd)]
-        #[cfg_attr(feature = "cargo-clippy",
-                   allow(clippy::missing_inline_in_public_items))]
+        #[allow(clippy::missing_inline_in_public_items)]
         pub struct $name($($fields)*);
     )*)
 }
diff --git a/library/stdarch/crates/core_arch/src/simd.rs b/library/stdarch/crates/core_arch/src/simd.rs
index 542ec5bb69c..79c61b5170d 100644
--- a/library/stdarch/crates/core_arch/src/simd.rs
+++ b/library/stdarch/crates/core_arch/src/simd.rs
@@ -9,7 +9,7 @@ macro_rules! simd_ty {
         #[derive(Copy, Clone, Debug, PartialEq)]
         pub(crate) struct $id($(pub $elem_ty),*);
 
-        #[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))]
+        #[allow(clippy::use_self)]
         impl $id {
             #[inline]
             pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self {
@@ -41,7 +41,7 @@ macro_rules! simd_m_ty {
         #[derive(Copy, Clone, Debug, PartialEq)]
         pub(crate) struct $id($(pub $elem_ty),*);
 
-        #[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))]
+        #[allow(clippy::use_self)]
         impl $id {
             #[inline]
             const fn bool_to_internal(x: bool) -> $ety {
diff --git a/library/stdarch/crates/core_arch/src/x86/adx.rs b/library/stdarch/crates/core_arch/src/x86/adx.rs
index 36b733ae4e6..f5e7d4f2fb3 100644
--- a/library/stdarch/crates/core_arch/src/x86/adx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/adx.rs
@@ -32,8 +32,7 @@ pub unsafe fn _addcarry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 {
 #[stable(feature = "simd_x86_adx", since = "1.33.0")]
 #[cfg(not(stage0))]
 pub unsafe fn _addcarryx_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 {
-    let r = llvm_addcarryx_u32(c_in, a, b, out as *mut _ as *mut u8);
-    r
+    llvm_addcarryx_u32(c_in, a, b, out as *mut _ as *mut u8)
 }
 
 /// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`
diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs
index c00e902473c..86db1b10af8 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx.rs
@@ -1452,7 +1452,7 @@ pub unsafe fn _mm256_permute2f128_si256(a: __m256i, b: __m256i, imm8: i32) -> __
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::trivially_copy_pass_by_ref))]
+#[allow(clippy::trivially_copy_pass_by_ref)]
 pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
     _mm256_set1_ps(*f)
 }
@@ -1465,7 +1465,7 @@ pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::trivially_copy_pass_by_ref))]
+#[allow(clippy::trivially_copy_pass_by_ref)]
 pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
     _mm_set1_ps(*f)
 }
@@ -1478,7 +1478,7 @@ pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::trivially_copy_pass_by_ref))]
+#[allow(clippy::trivially_copy_pass_by_ref)]
 pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
     _mm256_set1_pd(*f)
 }
@@ -1618,7 +1618,7 @@ pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32, index: i32) -> __m256i {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
     *(mem_addr as *const __m256d)
 }
@@ -1633,7 +1633,7 @@ pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: __m256d) {
     *(mem_addr as *mut __m256d) = a;
 }
@@ -1648,7 +1648,7 @@ pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: __m256d) {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovaps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
     *(mem_addr as *const __m256)
 }
@@ -1663,7 +1663,7 @@ pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovaps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm256_store_ps(mem_addr: *const f32, a: __m256) {
     *(mem_addr as *mut __m256) = a;
 }
@@ -1959,7 +1959,7 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
     intrinsics::nontemporal_store(mem_addr as *mut __m256d, a);
 }
@@ -1974,7 +1974,7 @@ pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovntps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) {
     intrinsics::nontemporal_store(mem_addr as *mut __m256, a);
 }
diff --git a/library/stdarch/crates/core_arch/src/x86/bswap.rs b/library/stdarch/crates/core_arch/src/x86/bswap.rs
index 7d51082ff49..2896781f847 100644
--- a/library/stdarch/crates/core_arch/src/x86/bswap.rs
+++ b/library/stdarch/crates/core_arch/src/x86/bswap.rs
@@ -1,6 +1,5 @@
 //! Byte swap intrinsics.
-
-#![cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))]
+#![allow(clippy::module_name_repetitions)]
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
diff --git a/library/stdarch/crates/core_arch/src/x86/cpuid.rs b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
index 1c233e54601..c52157599c6 100644
--- a/library/stdarch/crates/core_arch/src/x86/cpuid.rs
+++ b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
@@ -1,16 +1,12 @@
 //! `cpuid` intrinsics
-
-#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))]
+#![allow(clippy::module_name_repetitions)]
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
 /// Result of the `cpuid` instruction.
-#[cfg_attr(
-    feature = "cargo-clippy",
-    // the derived impl of Debug for CpuidResult is not #[inline] and that's OK.
-    allow(clippy::missing_inline_in_public_items)
-)]
+#[allow(clippy::missing_inline_in_public_items)]
+// ^^ the derived impl of Debug for CpuidResult is not #[inline] and that's OK.
 #[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub struct CpuidResult {
diff --git a/library/stdarch/crates/core_arch/src/x86/rdrand.rs b/library/stdarch/crates/core_arch/src/x86/rdrand.rs
index 49abc3d0497..a1252933522 100644
--- a/library/stdarch/crates/core_arch/src/x86/rdrand.rs
+++ b/library/stdarch/crates/core_arch/src/x86/rdrand.rs
@@ -1,8 +1,7 @@
 //! RDRAND and RDSEED instructions for returning random numbers from an Intel
 //! on-chip hardware random number generator which has been seeded by an
 //! on-chip entropy source.
-
-#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))]
+#![allow(clippy::module_name_repetitions)]
 
 #[allow(improper_ctypes)]
 extern "unadjusted" {
diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index 03ea61c11ff..8075e266300 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -1209,7 +1209,7 @@ pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movaps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 {
     *(p as *const __m128)
 }
@@ -1370,7 +1370,7 @@ pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movaps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) {
     let b: __m128 = simd_shuffle4(a, a, [0, 0, 0, 0]);
     *(p as *mut __m128) = b;
@@ -1402,7 +1402,7 @@ pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movaps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) {
     *(p as *mut __m128) = a;
 }
@@ -1446,7 +1446,7 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movaps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
     let b: __m128 = simd_shuffle4(a, a, [3, 2, 1, 0]);
     *(p as *mut __m128) = b;
@@ -2033,7 +2033,7 @@ extern "C" {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movntps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
     intrinsics::nontemporal_store(mem_addr as *mut __m128, a);
 }
diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs
index 6f8fd06fd5e..e2ce40e6313 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@@ -2529,7 +2529,7 @@ pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movaps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
     *(mem_addr as *const __m128d)
 }
@@ -2582,7 +2582,7 @@ pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
     intrinsics::nontemporal_store(mem_addr as *mut __m128d, a);
 }
@@ -2608,7 +2608,7 @@ pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movaps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
     *(mem_addr as *mut __m128d) = a;
 }
@@ -2634,7 +2634,7 @@ pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
     let b: __m128d = simd_shuffle2(a, a, [0, 0]);
     *(mem_addr as *mut __m128d) = b;
@@ -2648,7 +2648,7 @@ pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
     let b: __m128d = simd_shuffle2(a, a, [0, 0]);
     *(mem_addr as *mut __m128d) = b;
@@ -2663,7 +2663,7 @@ pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+#[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
     let b: __m128d = simd_shuffle2(a, a, [1, 0]);
     *(mem_addr as *mut __m128d) = b;
diff --git a/library/stdarch/crates/core_arch/src/x86/xsave.rs b/library/stdarch/crates/core_arch/src/x86/xsave.rs
index 19a9dcbb287..f707caca199 100644
--- a/library/stdarch/crates/core_arch/src/x86/xsave.rs
+++ b/library/stdarch/crates/core_arch/src/x86/xsave.rs
@@ -1,6 +1,5 @@
 //! `i586`'s `xsave` and `xsaveopt` target feature intrinsics
-
-#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))]
+#![allow(clippy::module_name_repetitions)]
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
index 6a94a42fc60..08bf1d2f87b 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
@@ -1,6 +1,6 @@
 //! Byte swap intrinsics.
 
-#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))]
+#![allow(clippy::module_name_repetitions)]
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
diff --git a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
index 40b781d4062..7b6d0de01f8 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
@@ -2,7 +2,7 @@
 //! on-chip hardware random number generator which has been seeded by an
 //! on-chip entropy source.
 
-#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))]
+#![allow(clippy::module_name_repetitions)]
 
 #[allow(improper_ctypes)]
 extern "unadjusted" {
diff --git a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
index 703dcde449f..964a24d12e2 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
@@ -1,6 +1,6 @@
 //! `x86_64`'s `xsave` and `xsaveopt` target feature intrinsics
 
-#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))]
+#![allow(clippy::module_name_repetitions)]
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
diff --git a/library/stdarch/crates/core_arch/tests/cpu-detection.rs b/library/stdarch/crates/core_arch/tests/cpu-detection.rs
index 454176b18c4..9a7c999a18b 100644
--- a/library/stdarch/crates/core_arch/tests/cpu-detection.rs
+++ b/library/stdarch/crates/core_arch/tests/cpu-detection.rs
@@ -1,9 +1,6 @@
 #![feature(stdsimd)]
 #![cfg_attr(stdsimd_strict, deny(warnings))]
-#![cfg_attr(
-    feature = "cargo-clippy",
-    allow(clippy::option_unwrap_used, clippy::print_stdout, clippy::use_debug)
-)]
+#![allow(clippy::option_unwrap_used, clippy::print_stdout, clippy::use_debug)]
 
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #[macro_use]
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 422bd6817da..92bc4b58d16 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -30,7 +30,7 @@ const CACHE_CAPACITY: u32 = 63;
 #[derive(Copy, Clone)]
 pub(crate) struct Initializer(u64);
 
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))]
+#[allow(clippy::use_self)]
 impl Default for Initializer {
     fn default() -> Self {
         Initializer(0)
@@ -76,13 +76,10 @@ static CACHE: Cache = Cache::uninitialized();
 struct Cache(AtomicU64);
 
 #[cfg(target_pointer_width = "64")]
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))]
+#[allow(clippy::use_self)]
 impl Cache {
     /// Creates an uninitialized cache.
-    #[cfg_attr(
-        feature = "cargo-clippy",
-        allow(clippy::declare_interior_mutable_const)
-    )]
+    #[allow(clippy::declare_interior_mutable_const)]
     const fn uninitialized() -> Self {
         Cache(AtomicU64::new(u64::max_value()))
     }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 675601536fa..db8aa261188 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -30,7 +30,7 @@ pub fn check_for(x: Feature) -> bool {
 /// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
 /// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
-#[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))]
+#[allow(clippy::similar_names)]
 fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
 
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 85beeee63ac..b2b8abb0102 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -1,9 +1,6 @@
 #![feature(stdsimd)]
 #![cfg_attr(stdsimd_strict, deny(warnings))]
-#![cfg_attr(
-    feature = "cargo-clippy",
-    allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout)
-)]
+#![allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout)]
 
 #[cfg(any(
     target_arch = "arm",
diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
index d63da6af06a..df03e6555aa 100644
--- a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
+++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
@@ -1,9 +1,6 @@
 #![feature(stdsimd)]
 #![cfg_attr(stdsimd_strict, deny(warnings))]
-#![cfg_attr(
-    feature = "cargo-clippy",
-    allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout)
-)]
+#![allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout)]
 
 #[cfg(any(
     target_arch = "arm",
diff --git a/library/stdarch/crates/stdsimd-test/src/disassembly.rs b/library/stdarch/crates/stdsimd-test/src/disassembly.rs
index 3882207080b..1aee27a9e4f 100644
--- a/library/stdarch/crates/stdsimd-test/src/disassembly.rs
+++ b/library/stdarch/crates/stdsimd-test/src/disassembly.rs
@@ -103,7 +103,7 @@ fn parse_objdump(output: &str) -> HashMap<String, Vec<Function>> {
                         && usize::from_str_radix(s, 16).is_ok()
                 })
                 .skip_while(|s| *s == "lock") // skip x86-specific prefix
-                .map(|s| s.to_string())
+                .map(std::string::ToString::to_string)
                 .collect::<Vec<String>>();
             instructions.push(Instruction { parts });
         }
@@ -148,7 +148,7 @@ fn parse_otool(output: &str) -> HashMap<String, Vec<Function>> {
             let parts = instruction
                 .split_whitespace()
                 .skip(1)
-                .map(|s| s.to_string())
+                .map(std::string::ToString::to_string)
                 .collect::<Vec<String>>();
             instructions.push(Instruction { parts });
         }
@@ -199,7 +199,7 @@ fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
                 .skip(1)
                 .skip_while(|s| {
                     s.len() == 2 && usize::from_str_radix(s, 16).is_ok()
-                }).map(|s| s.to_string())
+                }).map(std::string::ToString::to_string)
                 .skip_while(|s| *s == "lock") // skip x86-specific prefix
                 .collect::<Vec<String>>();
             instructions.push(Instruction { parts });
diff --git a/library/stdarch/crates/stdsimd-test/src/lib.rs b/library/stdarch/crates/stdsimd-test/src/lib.rs
index d20d3811ff2..a405412b427 100644
--- a/library/stdarch/crates/stdsimd-test/src/lib.rs
+++ b/library/stdarch/crates/stdsimd-test/src/lib.rs
@@ -61,10 +61,10 @@ fn normalize(symbol: &str) -> String {
     };
     // Normalize to no leading underscore to handle platforms that may
     // inject extra ones in symbol names.
-    while ret.starts_with("_") {
+    while ret.starts_with('_') {
         ret.remove(0);
     }
-    return ret;
+    ret
 }
 
 /// Main entry point for this crate, called by the `#[assert_instr]` macro.
diff --git a/library/stdarch/crates/stdsimd-verify/src/lib.rs b/library/stdarch/crates/stdsimd-verify/src/lib.rs
index 284b35dc13b..e832e23d4e9 100644
--- a/library/stdarch/crates/stdsimd-verify/src/lib.rs
+++ b/library/stdarch/crates/stdsimd-verify/src/lib.rs
@@ -243,17 +243,17 @@ fn walk(root: &Path, files: &mut Vec<(syn::File, String)>) {
             continue;
         }
         let path = file.path();
-        if path.extension().and_then(|s| s.to_str()) != Some("rs") {
+        if path.extension().and_then(std::ffi::OsStr::to_str) != Some("rs") {
             continue;
         }
 
-        if path.file_name().and_then(|s| s.to_str()) == Some("test.rs") {
+        if path.file_name().and_then(std::ffi::OsStr::to_str) == Some("test.rs") {
             continue;
         }
 
         let mut contents = String::new();
         File::open(&path)
-            .expect(&format!("can't open file at path: {}", path.display()))
+            .unwrap_or_else(|_| panic!("can't open file at path: {}", path.display()))
             .read_to_string(&mut contents)
             .expect("failed to read file to string");
 
@@ -284,8 +284,8 @@ fn find_instrs(attrs: &[syn::Attribute]) -> Vec<String> {
         fn parse(content: syn::parse::ParseStream) -> syn::parse::Result<Self> {
             let input;
             parenthesized!(input in content);
-            drop(input.parse::<syn::Meta>()?);
-            drop(input.parse::<Token![,]>()?);
+            let _ = input.parse::<syn::Meta>()?;
+            let _ = input.parse::<Token![,]>()?;
             let ident = input.parse::<syn::Ident>()?;
             if ident != "assert_instr" {
                 return Err(input.error("expected `assert_instr`"));
@@ -358,7 +358,7 @@ struct RustcArgsRequiredConst {
 }
 
 impl syn::parse::Parse for RustcArgsRequiredConst {
-    #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_possible_truncation))]
+    #[allow(clippy::cast_possible_truncation)]
     fn parse(input: syn::parse::ParseStream) -> syn::parse::Result<Self> {
         let content;
         parenthesized!(content in input);
diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs
index 8b9a4cdd0d0..7589578517e 100644
--- a/library/stdarch/examples/hex.rs
+++ b/library/stdarch/examples/hex.rs
@@ -14,18 +14,15 @@
 
 #![feature(stdsimd)]
 #![cfg_attr(test, feature(test))]
-#![cfg_attr(
-    feature = "cargo-clippy",
-    allow(
-        clippy::result_unwrap_used,
-        clippy::print_stdout,
-        clippy::option_unwrap_used,
-        clippy::shadow_reuse,
-        clippy::cast_possible_wrap,
-        clippy::cast_ptr_alignment,
-        clippy::cast_sign_loss,
-        clippy::missing_docs_in_private_items
-    )
+#![allow(
+    clippy::result_unwrap_used,
+    clippy::print_stdout,
+    clippy::option_unwrap_used,
+    clippy::shadow_reuse,
+    clippy::cast_possible_wrap,
+    clippy::cast_ptr_alignment,
+    clippy::cast_sign_loss,
+    clippy::missing_docs_in_private_items
 )]
 
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-- 
cgit 1.4.1-3-g733a5


From 26d6e048cc2642a5a227439d99066d8f2ff06bc4 Mon Sep 17 00:00:00 2001
From: tyler <tyler@brainiumstudios.com>
Date: Wed, 17 Apr 2019 20:18:21 -0700
Subject: add rtm cpu feature intrinsics

---
 library/stdarch/crates/core_arch/src/lib.rs        |   1 +
 library/stdarch/crates/core_arch/src/x86/mod.rs    |   3 +
 library/stdarch/crates/core_arch/src/x86/rtm.rs    | 151 +++++++++++++++++++++
 .../crates/std_detect/src/detect/arch/x86.rs       |   8 ++
 .../stdarch/crates/std_detect/src/detect/os/x86.rs |   6 +
 5 files changed, 169 insertions(+)
 create mode 100644 library/stdarch/crates/core_arch/src/x86/rtm.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index f57d1da9641..d936d0cc1cd 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -32,6 +32,7 @@
     wasm_target_feature,
     abi_unadjusted,
     adx_target_feature,
+    rtm_target_feature,
     external_doc
 )]
 #![cfg_attr(test, feature(test, abi_vectorcall, untagged_unions))]
diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs
index 5f28aa8836d..ad254f91a51 100644
--- a/library/stdarch/crates/core_arch/src/x86/mod.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mod.rs
@@ -565,3 +565,6 @@ pub use self::avx512ifma::*;
 
 mod bt;
 pub use self::bt::*;
+
+mod rtm;
+pub use self::rtm::*;
\ No newline at end of file
diff --git a/library/stdarch/crates/core_arch/src/x86/rtm.rs b/library/stdarch/crates/core_arch/src/x86/rtm.rs
new file mode 100644
index 00000000000..d03ba7b82b0
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86/rtm.rs
@@ -0,0 +1,151 @@
+#[cfg(test)]
+use stdsimd_test::assert_instr;
+
+extern "C" {
+    #[link_name = "llvm.x86.xbegin"]
+    fn x86_xbegin() -> i32;
+    #[link_name = "llvm.x86.xend"]
+    fn x86_xend() -> ();
+    #[link_name = "llvm.x86.xabort"]
+    fn x86_xabort(imm8: i8) -> ();
+    #[link_name = "llvm.x86.xtest"]
+    fn x86_xtest() -> i32;
+}
+
+/// Transaction successfully started.
+pub const _XBEGIN_STARTED: u32 = !0;
+
+/// Transaction explicitly aborted with xabort. The parameter passed to xabort is available with
+/// _xabort_code(status).
+pub const _XABORT_EXPLICIT: u32 = 1 << 0;
+
+/// Transaction retry is possible.
+pub const _XABORT_RETRY: u32 = 1 << 1;
+
+/// Transaction abort due to a memory conflict with another thread.
+pub const _XABORT_CONFLICT: u32 = 1 << 2;
+
+/// Transaction abort due to the transaction using too much memory.
+pub const _XABORT_CAPACITY: u32 = 1 << 3;
+
+/// Transaction abort due to a debug trap.
+pub const _XABORT_DEBUG: u32 = 1 << 4;
+
+/// Transaction abort in a inner nested transaction.
+pub const _XABORT_NESTED: u32 = 1 << 5;
+
+/// Specifies the start of a restricted transactional memory (RTM) code region and returns a value
+/// indicating status.
+///
+/// [Intel's documentation](https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-xbegin).
+#[inline]
+#[target_feature(enable = "rtm")]
+#[cfg_attr(test, assert_instr(xbegin))]
+pub unsafe fn _xbegin() -> u32 {
+    x86_xbegin() as _
+}
+
+/// Specifies the end of a restricted transactional memory (RTM) code region.
+///
+/// [Intel's documentation](https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-xend).
+#[inline]
+#[target_feature(enable = "rtm")]
+#[cfg_attr(test, assert_instr(xend))]
+pub unsafe fn _xend() {
+    x86_xend()
+}
+
+/// Forces a restricted transactional memory (RTM) region to abort.
+///
+/// [Intel's documentation](https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-xabort).
+#[inline]
+#[target_feature(enable = "rtm")]
+#[cfg_attr(test, assert_instr(xabort))]
+pub unsafe fn _xabort(imm8: u32) {
+    macro_rules! call {
+        ($imm8:expr) => {
+            x86_xabort($imm8)
+        };
+    }
+    constify_imm8!(imm8, call)
+}
+
+/// Queries whether the processor is executing in a transactional region identified by restricted
+/// transactional memory (RTM) or hardware lock elision (HLE).
+///
+/// [Intel's documentation](https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-xtest).
+#[inline]
+#[target_feature(enable = "rtm")]
+#[cfg_attr(test, assert_instr(xtest))]
+pub unsafe fn _xtest() -> bool {
+    x86_xtest() != 0
+}
+
+/// Retrieves the parameter passed to [`_xabort`] when [`_xbegin`]'s status has the `_XABORT_EXPLICIT` flag set.
+#[inline]
+pub fn _xabort_code(status: u32) -> u32 {
+    (status >> 24) & 0xFF
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::core_arch::x86::*;
+
+    #[test]
+    fn test_xbegin_xend() {
+        unsafe {
+            let mut x = 0;
+            for _ in 0..10 {
+                let code = rtm::_xbegin();
+                if code == _XBEGIN_STARTED {
+                    x += 1;
+                    rtm::_xend();
+                    assert_eq!(x, 1);
+                    break
+                }
+                assert_eq!(x, 0);
+            }
+        }
+    }
+
+    #[test]
+    fn test_xabort() {
+        unsafe {
+            // aborting with outside a transactional region does nothing
+            _xabort(0);
+
+            for abort_code in 0..10 {
+                let mut x = 0;
+                let code = rtm::_xbegin();
+                if code == _XBEGIN_STARTED {
+                    x += 1;
+                    rtm::_xabort(abort_code);
+                } else if code & _XABORT_EXPLICIT != 0 {
+                    let test_abort_code = rtm::_xabort_code(code);
+                    assert_eq!(test_abort_code, abort_code);
+                }
+                assert_eq!(x, 0);
+            }
+        }
+    }
+
+    #[test]
+    fn test_xtest() {
+        unsafe {
+            assert_eq!(_xtest(), false);
+
+            for _ in 0..10 {
+                let code = rtm::_xbegin();
+                if code == _XBEGIN_STARTED {
+                    let in_tx = _xtest();
+                    rtm::_xend();
+                    
+                    // putting the assert inside the transaction would abort the transaction on fail
+                    // without any output/panic/etc
+                    assert_eq!(in_tx, true);
+                    break
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 580d7111bdd..45f2d5bfc87 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -74,6 +74,8 @@
 /// * `"xsaveopt"`
 /// * `"xsaves"`
 /// * `"xsavec"`
+/// * `"adx"`
+/// * `"rtm"`
 ///
 /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
 #[macro_export]
@@ -233,6 +235,10 @@ macro_rules! is_x86_feature_detected {
         cfg!(target_feature = "adx") || $crate::detect::check_for(
             $crate::detect::Feature::adx)
     };
+    ("rtm") => {
+        cfg!(target_feature = "rtm") || $crate::detect::check_for(
+            $crate::detect::Feature::rtm)
+    };
     ($t:tt,) => {
         is_x86_feature_detected!($t);
     };
@@ -330,4 +336,6 @@ pub enum Feature {
     cmpxchg16b,
     /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
     adx,
+    /// RTM, Intel (Restricted Transactional Memory)
+    rtm,
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index db8aa261188..ab0622106c8 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -123,6 +123,7 @@ fn detect_features() -> cache::Initializer {
         enable(proc_info_ecx, 30, Feature::rdrand);
         enable(extended_features_ebx, 18, Feature::rdseed);
         enable(extended_features_ebx, 19, Feature::adx);
+        enable(extended_features_ebx, 11, Feature::rtm);
         enable(proc_info_edx, 4, Feature::tsc);
         enable(proc_info_edx, 23, Feature::mmx);
         enable(proc_info_edx, 24, Feature::fxsr);
@@ -290,6 +291,7 @@ mod tests {
         println!("xsavec: {:?}", is_x86_feature_detected!("xsavec"));
         println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b"));
         println!("adx: {:?}", is_x86_feature_detected!("adx"));
+        println!("rtm: {:?}", is_x86_feature_detected!("rtm"));
     }
 
     #[test]
@@ -354,5 +356,9 @@ mod tests {
             is_x86_feature_detected!("adx"),
             information.adx(),
         );
+        assert_eq!(
+            is_x86_feature_detected!("rtm"),
+            information.rtm(),
+        );
     }
 }
-- 
cgit 1.4.1-3-g733a5


From d31cc0b09e14f33e888f1bb83bd88c6147ba6fcc Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 23 Apr 2019 10:10:41 +0200
Subject: Add runtime feature detection for F16C

---
 library/stdarch/crates/core_arch/src/x86/f16c.rs         | 5 +++--
 library/stdarch/crates/core_arch/tests/cpu-detection.rs  | 1 +
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 7 +++++++
 library/stdarch/crates/std_detect/src/detect/os/x86.rs   | 3 ++-
 library/stdarch/crates/std_detect/tests/cpu-detection.rs | 1 +
 5 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/x86/f16c.rs b/library/stdarch/crates/core_arch/src/x86/f16c.rs
index 496e3a70671..597d86b2d04 100644
--- a/library/stdarch/crates/core_arch/src/x86/f16c.rs
+++ b/library/stdarch/crates/core_arch/src/x86/f16c.rs
@@ -1,5 +1,6 @@
-//! F16C intrinsics:
-//! https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=fp16&expand=1769
+//! [F16C intrinsics].
+//!
+//! [F16C intrinsics]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=fp16&expand=1769
 
 use crate::{
     core_arch::{simd::*, x86::*},
diff --git a/library/stdarch/crates/core_arch/tests/cpu-detection.rs b/library/stdarch/crates/core_arch/tests/cpu-detection.rs
index 9a7c999a18b..321f24e9fc2 100644
--- a/library/stdarch/crates/core_arch/tests/cpu-detection.rs
+++ b/library/stdarch/crates/core_arch/tests/cpu-detection.rs
@@ -31,6 +31,7 @@ fn x86_all() {
         "avx512_vpopcntdq {:?}",
         is_x86_feature_detected!("avx512vpopcntdq")
     );
+    println!("f16c: {:?}", is_x86_feature_detected!("f16c"));
     println!("fma: {:?}", is_x86_feature_detected!("fma"));
     println!("abm: {:?}", is_x86_feature_detected!("abm"));
     println!("bmi: {:?}", is_x86_feature_detected!("bmi1"));
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 45f2d5bfc87..da14ce5cf0f 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -62,6 +62,7 @@
 /// * `"avx512ifma"`
 /// * `"avx512vbmi"`
 /// * `"avx512vpopcntdq"`
+/// * `"f16c"`
 /// * `"fma"`
 /// * `"bmi1"`
 /// * `"bmi2"`
@@ -179,6 +180,10 @@ macro_rules! is_x86_feature_detected {
         cfg!(target_feature = "avx512vpopcntdq") || $crate::detect::check_for(
             $crate::detect::Feature::avx512_vpopcntdq)
     };
+    ("f16c") => {
+        cfg!(target_feature = "avx512f") || $crate::detect::check_for(
+            $crate::detect::Feature::f16c)
+    };
     ("fma") => {
         cfg!(target_feature = "fma") || $crate::detect::check_for(
             $crate::detect::Feature::fma)
@@ -309,6 +314,8 @@ pub enum Feature {
     /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
     /// Quadword)
     avx512_vpopcntdq,
+    /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
+    f16c,
     /// FMA (Fused Multiply Add)
     fma,
     /// BMI1 (Bit Manipulation Instructions 1)
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index ab0622106c8..e543d301c79 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -113,13 +113,14 @@ fn detect_features() -> cache::Initializer {
         };
 
         enable(proc_info_ecx, 0, Feature::sse3);
+        enable(proc_info_ecx, 1, Feature::pclmulqdq);
         enable(proc_info_ecx, 9, Feature::ssse3);
         enable(proc_info_ecx, 13, Feature::cmpxchg16b);
         enable(proc_info_ecx, 19, Feature::sse4_1);
         enable(proc_info_ecx, 20, Feature::sse4_2);
         enable(proc_info_ecx, 23, Feature::popcnt);
         enable(proc_info_ecx, 25, Feature::aes);
-        enable(proc_info_ecx, 1, Feature::pclmulqdq);
+        enable(proc_info_ecx, 29, Feature::f16c);
         enable(proc_info_ecx, 30, Feature::rdrand);
         enable(extended_features_ebx, 18, Feature::rdseed);
         enable(extended_features_ebx, 19, Feature::adx);
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index b2b8abb0102..0aae39e2947 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -87,6 +87,7 @@ fn x86_all() {
         "avx512_vpopcntdq {:?}",
         is_x86_feature_detected!("avx512vpopcntdq")
     );
+    println!("f16c: {:?}", is_x86_feature_detected!("f16c"));
     println!("fma: {:?}", is_x86_feature_detected!("fma"));
     println!("bmi1: {:?}", is_x86_feature_detected!("bmi1"));
     println!("bmi2: {:?}", is_x86_feature_detected!("bmi2"));
-- 
cgit 1.4.1-3-g733a5


From 6d59dc14abd82281e756d67636aeaac4771eb3a9 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 24 Apr 2019 16:24:24 +0200
Subject: Update f16c intrinsics to use the f16c target feature

---
 library/stdarch/crates/core_arch/src/x86/f16c.rs         | 12 ++++++------
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs |  2 +-
 library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs |  7 +++----
 3 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/x86/f16c.rs b/library/stdarch/crates/core_arch/src/x86/f16c.rs
index 597d86b2d04..195485914b3 100644
--- a/library/stdarch/crates/core_arch/src/x86/f16c.rs
+++ b/library/stdarch/crates/core_arch/src/x86/f16c.rs
@@ -27,7 +27,7 @@ extern "unadjusted" {
 /// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
 /// vector.
 #[inline]
-#[target_feature(enable = "avx512f")]
+#[target_feature(enable = "f16c")]
 #[cfg_attr(test, assert_instr("vcvtph2ps"))]
 pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
     transmute(llvm_vcvtph2ps_128(transmute(a)))
@@ -36,7 +36,7 @@ pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
 /// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
 /// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
 #[inline]
-#[target_feature(enable = "avx512f")]
+#[target_feature(enable = "f16c")]
 #[cfg_attr(test, assert_instr("vcvtph2ps"))]
 pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
     transmute(llvm_vcvtph2ps_256(transmute(a)))
@@ -70,7 +70,7 @@ macro_rules! dispatch_rounding {
 /// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
 /// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
 #[inline]
-#[target_feature(enable = "avx512f")]
+#[target_feature(enable = "f16c")]
 #[rustc_args_required_const(1)]
 #[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
 pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i {
@@ -94,7 +94,7 @@ pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i {
 /// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
 /// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
 #[inline]
-#[target_feature(enable = "avx512f")]
+#[target_feature(enable = "f16c")]
 #[rustc_args_required_const(1)]
 #[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
 pub unsafe fn _mm256_cvtps_ph(a: __m256, imm_rounding: i32) -> __m128i {
@@ -112,7 +112,7 @@ mod tests {
     use crate::{core_arch::x86::*, mem::transmute};
     use stdsimd_test::simd_test;
 
-    #[simd_test(enable = "avx512f")]
+    #[simd_test(enable = "f16c")]
     unsafe fn test_mm_cvtph_ps() {
         let array = [1_f32, 2_f32, 3_f32, 4_f32];
         let float_vec: __m128 = transmute(array);
@@ -122,7 +122,7 @@ mod tests {
         assert_eq!(result, array);
     }
 
-    #[simd_test(enable = "avx512f")]
+    #[simd_test(enable = "f16c")]
     unsafe fn test_mm256_cvtph_ps() {
         let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32];
         let float_vec: __m256 = transmute(array);
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index da14ce5cf0f..50d5cfa87c4 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -181,7 +181,7 @@ macro_rules! is_x86_feature_detected {
             $crate::detect::Feature::avx512_vpopcntdq)
     };
     ("f16c") => {
-        cfg!(target_feature = "avx512f") || $crate::detect::check_for(
+        cfg!(target_feature = "f16c") || $crate::detect::check_for(
             $crate::detect::Feature::f16c)
     };
     ("fma") => {
diff --git a/library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs b/library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs
index 49799a5854a..fe6d8014400 100644
--- a/library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs
+++ b/library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs
@@ -299,10 +299,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
             // it "avx512ifma".
             "avx512ifma52" => String::from("avx512ifma"),
             // See: https://github.com/rust-lang-nursery/stdsimd/issues/738
-            // FIXME: we need to fix "fp16c" to "f16c" here. Since
-            // https://github.com/rust-lang/rust/pull/60191 is not merged,
-            // we temporarily map it to "avx512f".
-            "fp16c" => String::from("avx512f"),
+            // The intrinsics guide calls `f16c` `fp16c` in disagreement with
+            // Intel's architecture manuals.
+            "fp16c" => String::from("f16c"),
             _ => cpuid,
         };
         let fixed_cpuid = fixup_cpuid(cpuid);
-- 
cgit 1.4.1-3-g733a5


From a62067658ddcdbd085be256e3800cabd267fc79b Mon Sep 17 00:00:00 2001
From: miki <mikael.urankar@gmail.com>
Date: Fri, 26 Apr 2019 17:16:12 +0200
Subject: Add std_detect for FreeBSD armv6, armv7 and powerpc64

---
 .../crates/std_detect/src/detect/os/freebsd/arm.rs | 27 +++++++
 .../std_detect/src/detect/os/freebsd/auxvec.rs     | 86 ++++++++++++++++++++++
 .../crates/std_detect/src/detect/os/freebsd/mod.rs |  8 ++
 .../std_detect/src/detect/os/freebsd/powerpc.rs    | 27 +++++++
 4 files changed, 148 insertions(+)
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
new file mode 100644
index 00000000000..e13847dcbd8
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
@@ -0,0 +1,27 @@
+//! Run-time feature detection for ARM on FreeBSD
+
+use crate::detect::{Feature, cache};
+use super::{auxvec};
+
+/// Performs run-time feature detection.
+#[inline]
+pub fn check_for(x: Feature) -> bool {
+    cache::test(x as u32, detect_features)
+}
+
+/// Try to read the features from the auxiliary vector
+fn detect_features() -> cache::Initializer {
+    let mut value = cache::Initializer::default();
+    let enable_feature = |value: &mut cache::Initializer, f, enable| {
+        if enable {
+            value.set(f as u32);
+        }
+    };
+
+    if let Ok(auxv) = auxvec::auxv() {
+        enable_feature(&mut value, Feature::neon, auxv.hwcap & 0x00001000 != 0);
+        enable_feature(&mut value, Feature::pmull, auxv.hwcap2 & 0x00000002 != 0);
+        return value;
+    }
+    value
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
new file mode 100644
index 00000000000..fd2bd500bfa
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
@@ -0,0 +1,86 @@
+//! Parses ELF auxiliary vectors.
+#![cfg_attr(any(target_arch = "arm", target_arch = "powerpc64"), allow(dead_code))]
+
+/// Key to access the CPU Hardware capabilities bitfield.
+pub(crate) const AT_HWCAP: usize = 25;
+/// Key to access the CPU Hardware capabilities 2 bitfield.
+pub(crate) const AT_HWCAP2: usize = 26;
+
+/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
+///
+/// If an entry cannot be read all the bits in the bitfield are set to zero.
+/// This should be interpreted as all the features being disabled.
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct AuxVec {
+    pub hwcap: usize,
+    pub hwcap2: usize,
+}
+
+/// ELF Auxiliary Vector
+///
+/// The auxiliary vector is a memory region in a running ELF program's stack
+/// composed of (key: usize, value: usize) pairs.
+///
+/// The keys used in the aux vector are platform dependent. For FreeBSD, they are
+/// defined in [sys/elf_common.h][elf_common_h]. The hardware capabilities of a given
+/// CPU can be queried with the  `AT_HWCAP` and `AT_HWCAP2` keys.
+///
+/// Note that run-time feature detection is not invoked for features that can
+/// be detected at compile-time.
+///
+/// [elf_common.h]: https://svnweb.freebsd.org/base/release/12.0.0/sys/sys/elf_common.h?revision=341707
+pub(crate) fn auxv() -> Result<AuxVec, ()> {
+    if let Ok(hwcap) = archauxv(AT_HWCAP) {
+        if let Ok(hwcap2) = archauxv(AT_HWCAP2) {
+            if hwcap != 0 && hwcap2 != 0 {
+                return Ok(AuxVec { hwcap, hwcap2 });
+            }
+        }
+    }
+    Err(())
+}
+
+/// Tries to read the `key` from the auxiliary vector.
+fn archauxv(key: usize) -> Result<usize, ()> {
+    use mem;
+
+    #[derive (Copy, Clone)]
+    #[repr(C)]
+    pub struct Elf_Auxinfo {
+        pub a_type: usize,
+        pub a_un: unnamed,
+    }
+    #[derive (Copy, Clone)]
+    #[repr(C)]
+    pub union unnamed {
+        pub a_val: libc::c_long,
+        pub a_ptr: *mut libc::c_void,
+        pub a_fcn: Option<unsafe extern "C" fn() -> ()>,
+    }
+
+    let mut auxv: [Elf_Auxinfo; 27] =
+        [Elf_Auxinfo{a_type: 0, a_un: unnamed{a_val: 0,},}; 27];
+
+    let mut len: libc::c_uint = mem::size_of_val(&auxv) as libc::c_uint;
+
+    unsafe {
+        let mut mib = [libc::CTL_KERN, libc::KERN_PROC, libc::KERN_PROC_AUXV, libc::getpid()];
+    
+        let ret = libc::sysctl(mib.as_mut_ptr(),
+                       mib.len() as u32,
+                       &mut auxv as *mut _ as *mut _,
+                       &mut len as *mut _ as *mut _,
+                       0 as *mut libc::c_void,
+                       0,
+                );
+    
+        if ret != -1 {
+            for i in 0..auxv.len() {
+                if auxv[i].a_type == key {
+                    return Ok(auxv[i].a_un.a_val as usize);
+                }
+            }
+        }
+    }
+    return Ok(0);
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
index 952b4baf0fd..1a5338a3555 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
@@ -1,9 +1,17 @@
 //! Run-time feature detection on FreeBSD
 
+mod auxvec;
+
 cfg_if! {
     if #[cfg(target_arch = "aarch64")] {
         mod aarch64;
         pub use self::aarch64::check_for;
+    } else if #[cfg(target_arch = "arm")] {
+        mod arm;
+        pub use self::arm::check_for;
+    } else if #[cfg(target_arch = "powerpc64")] {
+        mod powerpc;
+        pub use self::powerpc::check_for;
     } else {
         use crate::arch::detect::Feature;
         /// Performs run-time feature detection.
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs
new file mode 100644
index 00000000000..c7f761d4d60
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs
@@ -0,0 +1,27 @@
+//! Run-time feature detection for PowerPC on FreeBSD.
+
+use crate::detect::{Feature, cache};
+use super::{auxvec};
+
+/// Performs run-time feature detection.
+#[inline]
+pub fn check_for(x: Feature) -> bool {
+    cache::test(x as u32, detect_features)
+}
+
+fn detect_features() -> cache::Initializer {
+    let mut value = cache::Initializer::default();
+    let enable_feature = |value: &mut cache::Initializer, f, enable| {
+        if enable {
+            value.set(f as u32);
+        }
+    };
+
+    if let Ok(auxv) = auxvec::auxv() {
+        enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0);
+        enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0);
+        enable_feature(&mut value, Feature::power8, auxv.hwcap2 & 0x80000000 != 0);
+        return value;
+    }
+    value
+}
-- 
cgit 1.4.1-3-g733a5


From a2b98a167e263ca487c4eb76d1c19f1b69a09348 Mon Sep 17 00:00:00 2001
From: MikaelUrankar <mikael.urankar@gmail.com>
Date: Sat, 11 May 2019 15:23:35 +0200
Subject: Fix detection of power8

The power8 feature is defined in hwcap2
---
 library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
index 0022a7db983..1c08a58443d 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
@@ -27,7 +27,7 @@ fn detect_features() -> cache::Initializer {
         // index of the bit to test like in ARM and Aarch64)
         enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0);
         enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0);
-        enable_feature(&mut value, Feature::power8, auxv.hwcap & 0x80000000 != 0);
+        enable_feature(&mut value, Feature::power8, auxv.hwcap2 & 0x80000000 != 0);
         return value;
     }
 
-- 
cgit 1.4.1-3-g733a5


From 491b7c0c53d8d24db8062f1d1154795090894dae Mon Sep 17 00:00:00 2001
From: Tobias Kortkamp <t6@users.noreply.github.com>
Date: Thu, 23 May 2019 03:43:54 +0000
Subject: Fix build of auxvec.rs on FreeBSD/powerpc64

```
error[E0432]: unresolved import `mem`
  --> src/libstd/../stdsimd/crates/std_detect/src/detect/os/freebsd/auxvec.rs:45:9
   |
45 |     use mem;
   |         ^^^ no `mem` external crate

error: aborting due to previous error

For more information about this error, try `rustc --explain E0432`.
error: Could not compile `std`.
```
Tested by @pkubaj in https://reviews.freebsd.org/D20332
---
 library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
index fd2bd500bfa..a2bac767601 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
@@ -42,7 +42,7 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
 
 /// Tries to read the `key` from the auxiliary vector.
 fn archauxv(key: usize) -> Result<usize, ()> {
-    use mem;
+    use crate::mem;
 
     #[derive (Copy, Clone)]
     #[repr(C)]
-- 
cgit 1.4.1-3-g733a5


From 6369621e7981c38f86e5e1e63382805416aefb26 Mon Sep 17 00:00:00 2001
From: hygonsoc <hygonsoc@gmail.com>
Date: Sat, 25 May 2019 21:36:53 +0800
Subject: add Hygon Dhyana CPU Vendor ID("HygonGenuine") checking

As Hygon Dhyana originates from AMD technology and shares most of the architecture with
AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series number(Family 18h).

for CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD family 17h.
AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf.

Related Hygon kernel patch can be found on
http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn
---
 library/stdarch/crates/std_detect/src/detect/os/x86.rs | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index e543d301c79..9257b8a4be6 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -234,8 +234,18 @@ fn detect_features() -> cache::Initializer {
         // The `is_x86_feature_detected!("lzcnt")` macro then
         // internally maps to Feature::abm.
         enable(extended_proc_info_ecx, 5, Feature::abm);
-        if vendor_id == *b"AuthenticAMD" {
-            // These features are only available on AMD CPUs:
+        // As Hygon Dhyana originates from AMD technology and shares most of the architecture with
+        // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series
+        // number(Family 18h).
+        //
+        // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD
+        // family 17h.
+        //
+        // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf.
+        // Related Hygon kernel patch can be found on
+        // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn
+        if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" {
+            // These features are available on AMD arch CPUs:
             enable(extended_proc_info_ecx, 6, Feature::sse4a);
             enable(extended_proc_info_ecx, 21, Feature::tbm);
         }
-- 
cgit 1.4.1-3-g733a5


From 686b813f5d8ac504fb2f254731d0d681147d415e Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Mon, 8 Jul 2019 23:21:37 +0200
Subject: Update repo name

---
 library/stdarch/.travis.yml                        |     16 +-
 library/stdarch/CONTRIBUTING.md                    |     26 +-
 library/stdarch/Cargo.toml                         |      2 +-
 library/stdarch/QUESTIONS.md                       |      4 +-
 library/stdarch/README.md                          |      8 +-
 library/stdarch/ci/run-docker.sh                   |     12 +-
 library/stdarch/ci/run.sh                          |     14 +-
 .../stdarch/crates/assert-instr-macro/src/lib.rs   |     10 +-
 library/stdarch/crates/core_arch/Cargo.toml        |     14 +-
 library/stdarch/crates/core_arch/README.md         |     22 +-
 .../stdarch/crates/core_arch/src/aarch64/crc.rs    |      4 +-
 .../stdarch/crates/core_arch/src/aarch64/crypto.rs |      4 +-
 .../stdarch/crates/core_arch/src/aarch64/mod.rs    |      2 +-
 .../stdarch/crates/core_arch/src/aarch64/neon.rs   |      4 +-
 library/stdarch/crates/core_arch/src/aarch64/v8.rs |      2 +-
 library/stdarch/crates/core_arch/src/acle/dsp.rs   |      4 +-
 .../crates/core_arch/src/acle/registers/aarch32.rs |      2 +-
 .../stdarch/crates/core_arch/src/acle/simd32.rs    |      4 +-
 .../stdarch/crates/core_arch/src/arm/armclang.rs   |      2 +-
 library/stdarch/crates/core_arch/src/arm/mod.rs    |      2 +-
 library/stdarch/crates/core_arch/src/arm/neon.rs   |      4 +-
 .../crates/core_arch/src/arm/table_lookup_tests.rs |      2 +-
 library/stdarch/crates/core_arch/src/arm/v6.rs     |      2 +-
 library/stdarch/crates/core_arch/src/arm/v7.rs     |      8 +-
 library/stdarch/crates/core_arch/src/lib.rs        |      3 +-
 library/stdarch/crates/core_arch/src/mips/mod.rs   |      2 +-
 library/stdarch/crates/core_arch/src/mips/msa.rs   |      4 +-
 library/stdarch/crates/core_arch/src/mod.rs        |      2 +-
 .../crates/core_arch/src/powerpc/altivec.rs        |      4 +-
 .../stdarch/crates/core_arch/src/powerpc/mod.rs    |      2 +-
 .../stdarch/crates/core_arch/src/powerpc/vsx.rs    |      4 +-
 .../stdarch/crates/core_arch/src/wasm32/atomic.rs  |      2 +-
 .../stdarch/crates/core_arch/src/wasm32/memory.rs  |      2 +-
 library/stdarch/crates/core_arch/src/wasm32/mod.rs |      2 +-
 .../stdarch/crates/core_arch/src/wasm32/simd128.rs |      2 +-
 library/stdarch/crates/core_arch/src/x86/abm.rs    |      4 +-
 library/stdarch/crates/core_arch/src/x86/adx.rs    |      4 +-
 library/stdarch/crates/core_arch/src/x86/aes.rs    |      4 +-
 library/stdarch/crates/core_arch/src/x86/avx.rs    |      8 +-
 library/stdarch/crates/core_arch/src/x86/avx2.rs   |      4 +-
 .../stdarch/crates/core_arch/src/x86/avx512f.rs    |      4 +-
 .../stdarch/crates/core_arch/src/x86/avx512ifma.rs |      4 +-
 library/stdarch/crates/core_arch/src/x86/bmi1.rs   |      4 +-
 library/stdarch/crates/core_arch/src/x86/bmi2.rs   |      4 +-
 library/stdarch/crates/core_arch/src/x86/bswap.rs  |      2 +-
 library/stdarch/crates/core_arch/src/x86/bt.rs     |      2 +-
 library/stdarch/crates/core_arch/src/x86/cpuid.rs  |      2 +-
 library/stdarch/crates/core_arch/src/x86/f16c.rs   |      4 +-
 library/stdarch/crates/core_arch/src/x86/fma.rs    |      4 +-
 library/stdarch/crates/core_arch/src/x86/fxsr.rs   |      4 +-
 library/stdarch/crates/core_arch/src/x86/mmx.rs    |      4 +-
 library/stdarch/crates/core_arch/src/x86/mod.rs    |     10 +-
 .../stdarch/crates/core_arch/src/x86/pclmulqdq.rs  |      4 +-
 library/stdarch/crates/core_arch/src/x86/rdrand.rs |      2 +-
 library/stdarch/crates/core_arch/src/x86/rdtsc.rs  |      4 +-
 library/stdarch/crates/core_arch/src/x86/rtm.rs    |      4 +-
 library/stdarch/crates/core_arch/src/x86/sha.rs    |      4 +-
 library/stdarch/crates/core_arch/src/x86/sse.rs    |      6 +-
 library/stdarch/crates/core_arch/src/x86/sse2.rs   |      4 +-
 library/stdarch/crates/core_arch/src/x86/sse3.rs   |      4 +-
 library/stdarch/crates/core_arch/src/x86/sse41.rs  |      4 +-
 library/stdarch/crates/core_arch/src/x86/sse42.rs  |      4 +-
 library/stdarch/crates/core_arch/src/x86/sse4a.rs  |      4 +-
 library/stdarch/crates/core_arch/src/x86/ssse3.rs  |      4 +-
 library/stdarch/crates/core_arch/src/x86/tbm.rs    |      4 +-
 library/stdarch/crates/core_arch/src/x86/xsave.rs  |     12 +-
 library/stdarch/crates/core_arch/src/x86_64/abm.rs |      4 +-
 library/stdarch/crates/core_arch/src/x86_64/adx.rs |      4 +-
 library/stdarch/crates/core_arch/src/x86_64/avx.rs |      2 +-
 .../stdarch/crates/core_arch/src/x86_64/avx2.rs    |      2 +-
 library/stdarch/crates/core_arch/src/x86_64/bmi.rs |      4 +-
 .../stdarch/crates/core_arch/src/x86_64/bmi2.rs    |      4 +-
 .../stdarch/crates/core_arch/src/x86_64/bswap.rs   |      2 +-
 library/stdarch/crates/core_arch/src/x86_64/bt.rs  |      2 +-
 .../crates/core_arch/src/x86_64/cmpxchg16b.rs      |      2 +-
 .../stdarch/crates/core_arch/src/x86_64/fxsr.rs    |      4 +-
 .../stdarch/crates/core_arch/src/x86_64/rdrand.rs  |      2 +-
 library/stdarch/crates/core_arch/src/x86_64/sse.rs |      4 +-
 .../stdarch/crates/core_arch/src/x86_64/sse2.rs    |      4 +-
 .../stdarch/crates/core_arch/src/x86_64/sse41.rs   |      4 +-
 .../stdarch/crates/core_arch/src/x86_64/sse42.rs   |      4 +-
 .../stdarch/crates/core_arch/src/x86_64/xsave.rs   |      8 +-
 .../crates/core_arch/tests/cpu-detection.rs        |      1 -
 library/stdarch/crates/simd-test-macro/src/lib.rs  |      2 +-
 library/stdarch/crates/std_detect/Cargo.toml       |     12 +-
 library/stdarch/crates/std_detect/README.md        |      6 +-
 library/stdarch/crates/std_detect/src/lib.rs       |      1 -
 .../crates/std_detect/tests/cpu-detection.rs       |      1 -
 .../std_detect/tests/macro_trailing_commas.rs      |      1 -
 library/stdarch/crates/stdarch-test/Cargo.toml     |     20 +
 .../stdarch/crates/stdarch-test/src/disassembly.rs |    182 +
 library/stdarch/crates/stdarch-test/src/lib.rs     |    178 +
 library/stdarch/crates/stdarch-test/src/wasm.rs    |     88 +
 .../stdarch/crates/stdarch-verify/.gitattributes   |      1 +
 library/stdarch/crates/stdarch-verify/Cargo.toml   |     19 +
 .../crates/stdarch-verify/arm-intrinsics.html      |  93399 ++++++++++++
 library/stdarch/crates/stdarch-verify/build.rs     |     28 +
 library/stdarch/crates/stdarch-verify/mips-msa.h   |    707 +
 library/stdarch/crates/stdarch-verify/src/lib.rs   |    387 +
 library/stdarch/crates/stdarch-verify/tests/arm.rs |    592 +
 .../stdarch/crates/stdarch-verify/tests/mips.rs    |    335 +
 .../crates/stdarch-verify/tests/x86-intel.rs       |    537 +
 .../stdarch/crates/stdarch-verify/x86-intel.xml    | 134861 ++++++++++++++++++
 library/stdarch/crates/stdsimd-test/Cargo.toml     |     20 -
 .../stdarch/crates/stdsimd-test/src/disassembly.rs |    182 -
 library/stdarch/crates/stdsimd-test/src/lib.rs     |    178 -
 library/stdarch/crates/stdsimd-test/src/wasm.rs    |     88 -
 .../stdarch/crates/stdsimd-verify/.gitattributes   |      1 -
 library/stdarch/crates/stdsimd-verify/Cargo.toml   |     19 -
 .../crates/stdsimd-verify/arm-intrinsics.html      |  93399 ------------
 library/stdarch/crates/stdsimd-verify/build.rs     |     28 -
 library/stdarch/crates/stdsimd-verify/mips-msa.h   |    707 -
 library/stdarch/crates/stdsimd-verify/src/lib.rs   |    387 -
 library/stdarch/crates/stdsimd-verify/tests/arm.rs |    592 -
 .../stdarch/crates/stdsimd-verify/tests/mips.rs    |    335 -
 .../crates/stdsimd-verify/tests/x86-intel.rs       |    537 -
 .../stdarch/crates/stdsimd-verify/x86-intel.xml    | 134861 ------------------
 library/stdarch/examples/Cargo.toml                |      4 +-
 library/stdarch/examples/hex.rs                    |      4 +-
 library/stdarch/vendor.yml                         |      4 +-
 120 files changed, 231547 insertions(+), 231552 deletions(-)
 create mode 100644 library/stdarch/crates/stdarch-test/Cargo.toml
 create mode 100644 library/stdarch/crates/stdarch-test/src/disassembly.rs
 create mode 100644 library/stdarch/crates/stdarch-test/src/lib.rs
 create mode 100644 library/stdarch/crates/stdarch-test/src/wasm.rs
 create mode 100644 library/stdarch/crates/stdarch-verify/.gitattributes
 create mode 100644 library/stdarch/crates/stdarch-verify/Cargo.toml
 create mode 100644 library/stdarch/crates/stdarch-verify/arm-intrinsics.html
 create mode 100644 library/stdarch/crates/stdarch-verify/build.rs
 create mode 100644 library/stdarch/crates/stdarch-verify/mips-msa.h
 create mode 100644 library/stdarch/crates/stdarch-verify/src/lib.rs
 create mode 100644 library/stdarch/crates/stdarch-verify/tests/arm.rs
 create mode 100644 library/stdarch/crates/stdarch-verify/tests/mips.rs
 create mode 100644 library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
 create mode 100644 library/stdarch/crates/stdarch-verify/x86-intel.xml
 delete mode 100644 library/stdarch/crates/stdsimd-test/Cargo.toml
 delete mode 100644 library/stdarch/crates/stdsimd-test/src/disassembly.rs
 delete mode 100644 library/stdarch/crates/stdsimd-test/src/lib.rs
 delete mode 100644 library/stdarch/crates/stdsimd-test/src/wasm.rs
 delete mode 100644 library/stdarch/crates/stdsimd-verify/.gitattributes
 delete mode 100644 library/stdarch/crates/stdsimd-verify/Cargo.toml
 delete mode 100644 library/stdarch/crates/stdsimd-verify/arm-intrinsics.html
 delete mode 100644 library/stdarch/crates/stdsimd-verify/build.rs
 delete mode 100644 library/stdarch/crates/stdsimd-verify/mips-msa.h
 delete mode 100644 library/stdarch/crates/stdsimd-verify/src/lib.rs
 delete mode 100644 library/stdarch/crates/stdsimd-verify/tests/arm.rs
 delete mode 100644 library/stdarch/crates/stdsimd-verify/tests/mips.rs
 delete mode 100644 library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs
 delete mode 100644 library/stdarch/crates/stdsimd-verify/x86-intel.xml

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/.travis.yml b/library/stdarch/.travis.yml
index 171771b0735..93ad469e817 100644
--- a/library/stdarch/.travis.yml
+++ b/library/stdarch/.travis.yml
@@ -13,14 +13,14 @@ matrix:
       env: TARGET=x86_64-unknown-linux-gnu
       install: true
     - name: "x86_64-unknown-linux-gnu-emulated (runs all assert_instr tests)"
-      env: TARGET=x86_64-unknown-linux-gnu-emulated STDSIMD_TEST_EVERYTHING=1 RUSTFLAGS="--cfg stdsimd_intel_sde"
+      env: TARGET=x86_64-unknown-linux-gnu-emulated STDARCH_TEST_EVERYTHING=1 RUSTFLAGS="--cfg stdarch_intel_sde"
       install: true
     - name: "x86_64-linux-android - no assert_instr"
-      env: TARGET=x86_64-linux-android STDSIMD_DISABLE_ASSERT_INSTR=1
+      env: TARGET=x86_64-linux-android STDARCH_DISABLE_ASSERT_INSTR=1
     - name: "arm-unknown-linux-gnueabihf"
       env: TARGET=arm-unknown-linux-gnueabihf
     - name: "arm-linux-androideabi - no assert_instr"
-      env: TARGET=arm-linux-androideabi STDSIMD_DISABLE_ASSERT_INSTR=1
+      env: TARGET=arm-linux-androideabi STDARCH_DISABLE_ASSERT_INSTR=1
     - name: "armv7-unknown-linux-gnueabihf - NEON"
       env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon"
     - name: "aarch64-unknown-linux-gnu"
@@ -34,13 +34,13 @@ matrix:
     - name: "mips64el-unknown-linux-gnuabi64 - build-only"
       env: TARGET=mips64el-unknown-linux-gnuabi64 NORUN=1
     - name: "aarch64-linux-android - no assert_instr"
-      env: TARGET=aarch64-linux-android STDSIMD_DISABLE_ASSERT_INSTR=1
+      env: TARGET=aarch64-linux-android STDARCH_DISABLE_ASSERT_INSTR=1
     - name: "powerpc-unknown-linux-gnu - no assert_instr"
-      env: TARGET=powerpc-unknown-linux-gnu STDSIMD_DISABLE_ASSERT_INSTR=1
+      env: TARGET=powerpc-unknown-linux-gnu STDARCH_DISABLE_ASSERT_INSTR=1
     - name: "powerpc64-unknown-linux-gnu - no assert_instr"
-      env: TARGET=powerpc64-unknown-linux-gnu STDSIMD_DISABLE_ASSERT_INSTR=1
+      env: TARGET=powerpc64-unknown-linux-gnu STDARCH_DISABLE_ASSERT_INSTR=1
     - name: "powerpc64le-unknown-linux-gnu - no assert_instr"
-      env: TARGET=powerpc64le-unknown-linux-gnu STDSIMD_DISABLE_ASSERT_INSTR=1
+      env: TARGET=powerpc64le-unknown-linux-gnu STDARCH_DISABLE_ASSERT_INSTR=1
     - name: "s390x-unknown-linux-gnu"
       env: TARGET=s390x-unknown-linux-gnu
     - name: "i686-apple-darwin"
@@ -90,7 +90,7 @@ matrix:
       install: true
       script: ci/dox.sh
     - name: "Automatic verification: x86 / x86_64 / arm / aarch64 / mips*"
-      script: cargo test --manifest-path crates/stdsimd-verify/Cargo.toml
+      script: cargo test --manifest-path crates/stdarch-verify/Cargo.toml
       install: true
     - name: "rustfmt"
       install: true
diff --git a/library/stdarch/CONTRIBUTING.md b/library/stdarch/CONTRIBUTING.md
index 47a3d6ba665..85b7bb474fc 100644
--- a/library/stdarch/CONTRIBUTING.md
+++ b/library/stdarch/CONTRIBUTING.md
@@ -1,11 +1,11 @@
-# Contributing to stdsimd
+# Contributing to stdarch
 
-The `stdsimd` crate is more than willing to accept contributions! First you'll
+The `stdarch` crate is more than willing to accept contributions! First you'll
 probably want to check out the repository and make sure that tests pass for you:
 
 ```
-$ git clone https://github.com/rust-lang-nursery/stdsimd
-$ cd stdsimd
+$ git clone https://github.com/rust-lang/stdarch
+$ cd stdarch
 $ cargo +nightly test
 ```
 
@@ -29,7 +29,7 @@ around! Feel free to ping either @BurntSushi or @alexcrichton with questions.
 
 [gitter]: https://gitter.im/rust-impl-period/WG-libs-simd
 
-# How to write examples for stdsimd intrinsics
+# How to write examples for stdarch intrinsics
 
 There are a few features that must be enabled for the given intrinsic to work
 properly and the example must only be run by `cargo test --doc` when the feature
@@ -44,9 +44,9 @@ to ensure your example works as expected.
 /// # // We need target_feature for the intrinsic to work
 /// # #![feature(target_feature)]
 /// #
-/// # // rustdoc by default uses `extern crate stdsimd`, but we need the
+/// # // rustdoc by default uses `extern crate stdarch`, but we need the
 /// # // `#[macro_use]`
-/// # #[macro_use] extern crate stdsimd;
+/// # #[macro_use] extern crate stdarch;
 /// #
 /// # // The real main function
 /// # fn main() {
@@ -69,12 +69,12 @@ to ensure your example works as expected.
 If some of the above syntax does not look familiar, the [Documentation as tests] section
 of the [Rust Book] describes the `rustdoc` syntax quite well. As always, feel free
 to [join us on gitter][gitter] and ask us if you hit any snags, and thank you for helping
-to improve the documentation of `stdsimd`!
+to improve the documentation of `stdarch`!
 
-[new]: https://github.com/rust-lang-nursery/stdsimd/issues/new
-[issues]: https://github.com/rust-lang-nursery/stdsimd/issues
-[help]: https://github.com/rust-lang-nursery/stdsimd/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22
-[impl]: https://github.com/rust-lang-nursery/stdsimd/issues?q=is%3Aissue+is%3Aopen+label%3Aimpl-period
-[vendor]: https://github.com/rust-lang-nursery/stdsimd/issues/40
+[new]: https://github.com/rust-lang/stdarch/issues/new
+[issues]: https://github.com/rust-lang/stdarch/issues
+[help]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22
+[impl]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3Aimpl-period
+[vendor]: https://github.com/rust-lang/stdarch/issues/40
 [Documentation as tests]: https://doc.rust-lang.org/book/first-edition/documentation.html#documentation-as-tests
 [Rust Book]: https://doc.rust-lang.org/book/first-edition
diff --git a/library/stdarch/Cargo.toml b/library/stdarch/Cargo.toml
index e97bf7f3632..7b4c5ead8a8 100644
--- a/library/stdarch/Cargo.toml
+++ b/library/stdarch/Cargo.toml
@@ -1,6 +1,6 @@
 [workspace]
 members = [
-  "crates/stdsimd-verify",
+  "crates/stdarch-verify",
   "crates/core_arch",
   "crates/std_detect",
   "examples/"
diff --git a/library/stdarch/QUESTIONS.md b/library/stdarch/QUESTIONS.md
index 3c6492556ca..bc04ea051ca 100644
--- a/library/stdarch/QUESTIONS.md
+++ b/library/stdarch/QUESTIONS.md
@@ -2,10 +2,10 @@ This program emits `psllw` instead of `pslliw` instruction. `pslliw` should be
 usable here since `imm8` is a constant:
 
 ```rust
-extern crate stdsimd;
+extern crate stdarch;
 
 use std::env;
-use stdsimd as s;
+use stdarch as s;
 
 fn main() {
     let imm8: i32 = env::args().nth(1).unwrap().parse().unwrap();
diff --git a/library/stdarch/README.md b/library/stdarch/README.md
index 660295b0992..699d076a386 100644
--- a/library/stdarch/README.md
+++ b/library/stdarch/README.md
@@ -1,4 +1,4 @@
-stdsimd - Rust's standard library SIMD components
+stdarch - Rust's standard library SIMD components
 =======
 
 [![Travis-CI Status]][travis] [![Appveyor Status]][appveyor] 
@@ -29,9 +29,9 @@ To do a release of the `core_arch` and `std_detect` crates,
   https://github.com/rust-lang/cargo/issues/4242),
 * publish the crates.
 
-[travis]: https://travis-ci.com/rust-lang-nursery/stdsimd
-[Travis-CI Status]: https://travis-ci.com/rust-lang-nursery/stdsimd.svg?branch=master
-[appveyor]: https://ci.appveyor.com/project/rust-lang-libs/stdsimd/branch/master
+[travis]: https://travis-ci.com/rust-lang/stdarch
+[Travis-CI Status]: https://travis-ci.com/rust-lang/stdarch.svg?branch=master
+[appveyor]: https://ci.appveyor.com/project/rust-lang-libs/stdarch/branch/master
 [Appveyor Status]: https://ci.appveyor.com/api/projects/status/ix74qhmilpibn00x/branch/master?svg=true
 [core_arch_crate_badge]: https://img.shields.io/crates/v/core_arch.svg
 [core_arch_crate_link]: https://crates.io/crates/core_arch
diff --git a/library/stdarch/ci/run-docker.sh b/library/stdarch/ci/run-docker.sh
index 3db8e49ad90..48989f225d8 100755
--- a/library/stdarch/ci/run-docker.sh
+++ b/library/stdarch/ci/run-docker.sh
@@ -8,7 +8,7 @@ set -ex
 run() {
     target=$(echo "${1}" | sed 's/-emulated//')
     echo "Building docker container for TARGET=${1}"
-    docker build -t stdsimd -f "ci/docker/${1}/Dockerfile" ci/
+    docker build -t stdarch -f "ci/docker/${1}/Dockerfile" ci/
     mkdir -p target
     echo "Running docker"
     # shellcheck disable=SC2016
@@ -20,18 +20,18 @@ run() {
       --env CARGO_HOME=/cargo-h \
       --volume "$(rustc --print sysroot)":/rust:ro \
       --env TARGET="${target}" \
-      --env STDSIMD_TEST_EVERYTHING \
-      --env STDSIMD_ASSERT_INSTR_IGNORE \
-      --env STDSIMD_DISABLE_ASSERT_INSTR \
+      --env STDARCH_TEST_EVERYTHING \
+      --env STDARCH_ASSERT_INSTR_IGNORE \
+      --env STDARCH_DISABLE_ASSERT_INSTR \
       --env NOSTD \
       --env NORUN \
       --env RUSTFLAGS \
-      --env STDSIMD_TEST_NORUN \
+      --env STDARCH_TEST_NORUN \
       --volume "$(pwd)":/checkout:ro \
       --volume "$(pwd)"/target:/checkout/target \
       --workdir /checkout \
       --privileged \
-      stdsimd \
+      stdarch \
       bash \
       -c 'PATH=/rust/bin:$PATH exec ci/run.sh'
 }
diff --git a/library/stdarch/ci/run.sh b/library/stdarch/ci/run.sh
index bc60edd5b19..4fd8ba0ec83 100755
--- a/library/stdarch/ci/run.sh
+++ b/library/stdarch/ci/run.sh
@@ -10,7 +10,7 @@ set -ex
 #export RUST_TEST_NOCAPTURE=1
 #export RUST_TEST_THREADS=1
 
-RUSTFLAGS="$RUSTFLAGS --cfg stdsimd_strict"
+RUSTFLAGS="$RUSTFLAGS --cfg stdarch_strict"
 
 case ${TARGET} in
     # On 32-bit use a static relocation model which avoids some extra
@@ -33,8 +33,8 @@ esac
 echo "RUSTFLAGS=${RUSTFLAGS}"
 echo "FEATURES=${FEATURES}"
 echo "OBJDUMP=${OBJDUMP}"
-echo "STDSIMD_DISABLE_ASSERT_INSTR=${STDSIMD_DISABLE_ASSERT_INSTR}"
-echo "STDSIMD_TEST_EVERYTHING=${STDSIMD_TEST_EVERYTHING}"
+echo "STDARCH_DISABLE_ASSERT_INSTR=${STDARCH_DISABLE_ASSERT_INSTR}"
+echo "STDARCH_TEST_EVERYTHING=${STDARCH_TEST_EVERYTHING}"
 
 cargo_test() {
     cmd="cargo"
@@ -49,7 +49,7 @@ cargo_test() {
 
 CORE_ARCH="--manifest-path=crates/core_arch/Cargo.toml"
 STD_DETECT="--manifest-path=crates/std_detect/Cargo.toml"
-STDSIMD_EXAMPLES="--manifest-path=examples/Cargo.toml"
+STDARCH_EXAMPLES="--manifest-path=examples/Cargo.toml"
 cargo_test "${CORE_ARCH} --release"
 
 if [ "$NOSTD" != "1" ]; then
@@ -61,14 +61,14 @@ if [ "$NOSTD" != "1" ]; then
     cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval"
     cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval,std_detect_file_io"
 
-    cargo_test "${STDSIMD_EXAMPLES}"
-    cargo_test "${STDSIMD_EXAMPLES} --release"
+    cargo_test "${STDARCH_EXAMPLES}"
+    cargo_test "${STDARCH_EXAMPLES} --release"
 fi
 
 # Test targets compiled with extra features.
 case ${TARGET} in
     x86*)
-        export STDSIMD_DISABLE_ASSERT_INSTR=1
+        export STDARCH_DISABLE_ASSERT_INSTR=1
         export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx"
         cargo_test "--release"
         ;;
diff --git a/library/stdarch/crates/assert-instr-macro/src/lib.rs b/library/stdarch/crates/assert-instr-macro/src/lib.rs
index 61986411211..0e739605846 100644
--- a/library/stdarch/crates/assert-instr-macro/src/lib.rs
+++ b/library/stdarch/crates/assert-instr-macro/src/lib.rs
@@ -1,6 +1,6 @@
 //! Implementation of the `#[assert_instr]` macro
 //!
-//! This macro is used when testing the `stdsimd` crate and is used to generate
+//! This macro is used when testing the `stdarch` crate and is used to generate
 //! test cases to assert that functions do indeed contain the instructions that
 //! we're expecting them to contain.
 //!
@@ -41,7 +41,7 @@ pub fn assert_instr(
     // Disable assert_instr for x86 targets compiled with avx enabled, which
     // causes LLVM to generate different intrinsics that the ones we are
     // testing for.
-    let disable_assert_instr = std::env::var("STDSIMD_DISABLE_ASSERT_INSTR").is_ok();
+    let disable_assert_instr = std::env::var("STDARCH_DISABLE_ASSERT_INSTR").is_ok();
 
     // If instruction tests are disabled avoid emitting this shim at all, just
     // return the original item without our attribute.
@@ -57,7 +57,7 @@ pub fn assert_instr(
     let assert_name = syn::Ident::new(&format!("assert_{}_{}", name, instr_str), name.span());
     // These name has to be unique enough for us to find it in the disassembly later on:
     let shim_name = syn::Ident::new(
-        &format!("stdsimd_test_shim_{}_{}", name, instr_str),
+        &format!("stdarch_test_shim_{}_{}", name, instr_str),
         name.span(),
     );
     let mut inputs = Vec::new();
@@ -123,7 +123,7 @@ pub fn assert_instr(
             // generate some code that's hopefully very tight in terms of
             // codegen but is otherwise unique to prevent code from being
             // folded.
-            ::stdsimd_test::_DONT_DEDUP.store(
+            ::stdarch_test::_DONT_DEDUP.store(
                 std::mem::transmute(#shim_name_str.as_bytes().as_ptr()),
                 std::sync::atomic::Ordering::Relaxed,
             );
@@ -142,7 +142,7 @@ pub fn assert_instr(
             // code:
             unsafe { asm!("" : : "r"(#shim_name as usize) : "memory" : "volatile") };
 
-            ::stdsimd_test::assert(#shim_name as usize,
+            ::stdarch_test::assert(#shim_name as usize,
                                    stringify!(#shim_name),
                                    #instr);
         }
diff --git a/library/stdarch/crates/core_arch/Cargo.toml b/library/stdarch/crates/core_arch/Cargo.toml
index e3fe73c4692..c67e3e15c9c 100644
--- a/library/stdarch/crates/core_arch/Cargo.toml
+++ b/library/stdarch/crates/core_arch/Cargo.toml
@@ -8,8 +8,8 @@ authors = [
 ]
 description = "`core::arch` - Rust's core library architecture-specific intrinsics."
 documentation = "https://docs.rs/core_arch"
-homepage = "https://github.com/rust-lang-nursery/stdsimd"
-repository = "https://github.com/rust-lang-nursery/stdsimd"
+homepage = "https://github.com/rust-lang/stdarch"
+repository = "https://github.com/rust-lang/stdarch"
 readme = "README.md"
 keywords = ["core", "simd", "arch", "intrinsics"]
 categories = ["hardware-support", "no-std"]
@@ -18,14 +18,14 @@ build = "build.rs"
 edition = "2018"
 
 [badges]
-travis-ci = { repository = "rust-lang-nursery/stdsimd" }
-appveyor = { repository = "rust-lang-nursery/stdsimd"  }
-is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/stdsimd" }
-is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
+travis-ci = { repository = "rust-lang/stdarch" }
+appveyor = { repository = "rust-lang/stdarch"  }
+is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" }
+is-it-maintained-open-issues = { repository = "rust-lang/stdarch" }
 maintenance = { status = "experimental" }
 
 [dev-dependencies]
-stdsimd-test = { version = "0.*", path = "../stdsimd-test" }
+stdarch-test = { version = "0.*", path = "../stdarch-test" }
 std_detect = { version = "0.*", path = "../std_detect" }
 
 [target.wasm32-unknown-unknown.dev-dependencies]
diff --git a/library/stdarch/crates/core_arch/README.md b/library/stdarch/crates/core_arch/README.md
index f8d10b4d620..89f03b7527d 100644
--- a/library/stdarch/crates/core_arch/README.md
+++ b/library/stdarch/crates/core_arch/README.md
@@ -39,14 +39,14 @@ are:
 * [How to get started][contrib]
 * [How to help implement intrinsics][help-implement]
 
-[contrib]: https://github.com/rust-lang-nursery/stdsimd/blob/master/CONTRIBUTING.md
-[help-implement]: https://github.com/rust-lang-nursery/stdsimd/issues/40
-[i686]: https://rust-lang-nursery.github.io/stdsimd/i686/core_arch/
-[x86_64]: https://rust-lang-nursery.github.io/stdsimd/x86_64/core_arch/
-[arm]: https://rust-lang-nursery.github.io/stdsimd/arm/core_arch/
-[aarch64]: https://rust-lang-nursery.github.io/stdsimd/aarch64/core_arch/
-[powerpc]: https://rust-lang-nursery.github.io/stdsimd/powerpc/core_arch/
-[powerpc64]: https://rust-lang-nursery.github.io/stdsimd/powerpc64/core_arch/
+[contrib]: https://github.com/rust-lang/stdarch/blob/master/CONTRIBUTING.md
+[help-implement]: https://github.com/rust-lang/stdarch/issues/40
+[i686]: https://rust-lang.github.io/stdarch/i686/core_arch/
+[x86_64]: https://rust-lang.github.io/stdarch/x86_64/core_arch/
+[arm]: https://rust-lang.github.io/stdarch/arm/core_arch/
+[aarch64]: https://rust-lang.github.io/stdarch/aarch64/core_arch/
+[powerpc]: https://rust-lang.github.io/stdarch/powerpc/core_arch/
+[powerpc64]: https://rust-lang.github.io/stdarch/powerpc64/core_arch/
 
 # License
 
@@ -62,9 +62,9 @@ Unless you explicitly state otherwise, any contribution intentionally submitted
 for inclusion in `core_arch` by you, as defined in the Apache-2.0 license,
 shall be dual licensed as above, without any additional terms or conditions.
 
-[travis]: https://travis-ci.com/rust-lang-nursery/stdsimd
-[Travis-CI Status]: https://travis-ci.com/rust-lang-nursery/stdsimd.svg?branch=master
-[appveyor]: https://ci.appveyor.com/project/rust-lang-libs/stdsimd/branch/master
+[travis]: https://travis-ci.com/rust-lang/stdarch
+[Travis-CI Status]: https://travis-ci.com/rust-lang/stdarch.svg?branch=master
+[appveyor]: https://ci.appveyor.com/project/rust-lang-libs/stdarch/branch/master
 [Appveyor Status]: https://ci.appveyor.com/api/projects/status/ix74qhmilpibn00x/branch/master?svg=true
 [core_arch_crate_badge]: https://img.shields.io/crates/v/core_arch.svg
 [core_arch_crate_link]: https://crates.io/crates/core_arch
diff --git a/library/stdarch/crates/core_arch/src/aarch64/crc.rs b/library/stdarch/crates/core_arch/src/aarch64/crc.rs
index 18307d295d4..278a7856271 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/crc.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/crc.rs
@@ -19,7 +19,7 @@ extern "C" {
 }
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// CRC32 single round checksum for bytes (8 bits).
 #[inline]
@@ -89,7 +89,7 @@ pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 {
 mod tests {
     use crate::core_arch::{aarch64::*, simd::*};
     use std::mem;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "crc")]
     unsafe fn test_crc32b() {
diff --git a/library/stdarch/crates/core_arch/src/aarch64/crypto.rs b/library/stdarch/crates/core_arch/src/aarch64/crypto.rs
index 5d46070b233..91269d8052f 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/crypto.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/crypto.rs
@@ -35,7 +35,7 @@ extern "C" {
 }
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// AES single round encryption.
 #[inline]
@@ -165,7 +165,7 @@ pub unsafe fn vsha256su1q_u32(
 mod tests {
     use crate::core_arch::{aarch64::*, simd::*};
     use std::mem;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "crypto")]
     unsafe fn test_vaeseq_u8() {
diff --git a/library/stdarch/crates/core_arch/src/aarch64/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/mod.rs
index 0f2f9da11a3..e33dc7eaf5b 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/mod.rs
@@ -21,7 +21,7 @@ pub use self::crc::*;
 pub use super::acle::*;
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Generates the trap instruction `BRK 1`
 #[cfg_attr(test, assert_instr(brk))]
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon.rs b/library/stdarch/crates/core_arch/src/aarch64/neon.rs
index b96f7ac3cfe..46af4567b3b 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon.rs
@@ -9,7 +9,7 @@ use crate::{
     mem::{transmute, zeroed},
 };
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 types! {
     /// ARM-specific 64-bit wide vector of one packed `f64`.
@@ -1546,7 +1546,7 @@ pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> pol
 mod tests {
     use crate::core_arch::{aarch64::*, simd::*};
     use std::mem::transmute;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vadd_f64() {
diff --git a/library/stdarch/crates/core_arch/src/aarch64/v8.rs b/library/stdarch/crates/core_arch/src/aarch64/v8.rs
index f44f43d34f4..778721c6851 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/v8.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/v8.rs
@@ -6,7 +6,7 @@
 //! ddi0487a.k_10775/index.html
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Reverse the order of the bytes.
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/acle/dsp.rs b/library/stdarch/crates/core_arch/src/acle/dsp.rs
index c87363ea030..03cc082697a 100644
--- a/library/stdarch/crates/core_arch/src/acle/dsp.rs
+++ b/library/stdarch/crates/core_arch/src/acle/dsp.rs
@@ -21,7 +21,7 @@
 //! - \[x\] __smlawt
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 use crate::mem::transmute;
 
@@ -241,7 +241,7 @@ mod tests {
         simd::{i16x2, i8x4, u8x4},
     };
     use std::{i32, mem::transmute};
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[test]
     fn smulbb() {
diff --git a/library/stdarch/crates/core_arch/src/acle/registers/aarch32.rs b/library/stdarch/crates/core_arch/src/acle/registers/aarch32.rs
index 5cc66308be2..e0b71218a7f 100644
--- a/library/stdarch/crates/core_arch/src/acle/registers/aarch32.rs
+++ b/library/stdarch/crates/core_arch/src/acle/registers/aarch32.rs
@@ -3,7 +3,7 @@ pub struct APSR;
 
 // Note (@Lokathor): Because this breaks the use of Rust on the Game Boy
 // Advance, this change must be reverted until Rust learns to handle cpu state
-// properly. See also: https://github.com/rust-lang-nursery/stdsimd/issues/702
+// properly. See also: https://github.com/rust-lang/stdarch/issues/702
 
 //#[cfg(any(not(target_feature = "thumb-state"), target_feature = "v6t2"))]
 //rsr!(APSR);
diff --git a/library/stdarch/crates/core_arch/src/acle/simd32.rs b/library/stdarch/crates/core_arch/src/acle/simd32.rs
index fe5a818fd09..678324f1065 100644
--- a/library/stdarch/crates/core_arch/src/acle/simd32.rs
+++ b/library/stdarch/crates/core_arch/src/acle/simd32.rs
@@ -63,7 +63,7 @@
 //! - \[x\] __smusdx
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 use crate::{core_arch::acle::dsp::int16x2_t, mem::transmute};
 
@@ -465,7 +465,7 @@ pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 {
 mod tests {
     use crate::core_arch::simd::{i16x2, i8x4, u8x4};
     use std::{i16, i8, mem::transmute};
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[test]
     fn qadd8() {
diff --git a/library/stdarch/crates/core_arch/src/arm/armclang.rs b/library/stdarch/crates/core_arch/src/arm/armclang.rs
index 36a3a2fe9a3..2e0a82ade3e 100644
--- a/library/stdarch/crates/core_arch/src/arm/armclang.rs
+++ b/library/stdarch/crates/core_arch/src/arm/armclang.rs
@@ -7,7 +7,7 @@
 //! [arm_comp_ref]: https://developer.arm.com/docs/100067/0610
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Inserts a breakpoint instruction.
 ///
diff --git a/library/stdarch/crates/core_arch/src/arm/mod.rs b/library/stdarch/crates/core_arch/src/arm/mod.rs
index 6829a8523c4..a467c2ce854 100644
--- a/library/stdarch/crates/core_arch/src/arm/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm/mod.rs
@@ -37,7 +37,7 @@ pub use self::neon::*;
 pub use crate::core_arch::acle::*;
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Generates the trap instruction `UDF`
 #[cfg(target_arch = "arm")]
diff --git a/library/stdarch/crates/core_arch/src/arm/neon.rs b/library/stdarch/crates/core_arch/src/arm/neon.rs
index 99097b229de..73391c84ebe 100644
--- a/library/stdarch/crates/core_arch/src/arm/neon.rs
+++ b/library/stdarch/crates/core_arch/src/arm/neon.rs
@@ -2,7 +2,7 @@
 
 use crate::{core_arch::simd_llvm::*, mem::transmute};
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 types! {
     /// ARM-specific 64-bit wide vector of eight packed `i8`.
@@ -978,7 +978,7 @@ pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t
 mod tests {
     use crate::core_arch::{arm::*, simd::*};
     use std::{i16, i32, i8, mem::transmute, u16, u32, u8};
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "neon")]
     unsafe fn test_vadd_s8() {
diff --git a/library/stdarch/crates/core_arch/src/arm/table_lookup_tests.rs b/library/stdarch/crates/core_arch/src/arm/table_lookup_tests.rs
index 5b8b4878f6c..15aa2f2695d 100644
--- a/library/stdarch/crates/core_arch/src/arm/table_lookup_tests.rs
+++ b/library/stdarch/crates/core_arch/src/arm/table_lookup_tests.rs
@@ -12,7 +12,7 @@ use crate::core_arch::arm::*;
 
 use crate::core_arch::simd::*;
 use std::mem;
-use stdsimd_test::simd_test;
+use stdarch_test::simd_test;
 
 macro_rules! test_vtbl {
     ($test_name:ident => $fn_id:ident:
diff --git a/library/stdarch/crates/core_arch/src/arm/v6.rs b/library/stdarch/crates/core_arch/src/arm/v6.rs
index c7ec7d411df..5df30cd623b 100644
--- a/library/stdarch/crates/core_arch/src/arm/v6.rs
+++ b/library/stdarch/crates/core_arch/src/arm/v6.rs
@@ -7,7 +7,7 @@
 //! html
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Reverse the order of the bytes.
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/arm/v7.rs b/library/stdarch/crates/core_arch/src/arm/v7.rs
index 9ee4a461827..e7507f9b953 100644
--- a/library/stdarch/crates/core_arch/src/arm/v7.rs
+++ b/library/stdarch/crates/core_arch/src/arm/v7.rs
@@ -10,13 +10,13 @@
 pub use super::v6::*;
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Count Leading Zeros.
 #[inline]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
-// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/382
+// FIXME: https://github.com/rust-lang/stdarch/issues/382
 // #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))]
 pub unsafe fn _clz_u8(x: u8) -> u8 {
     x.leading_zeros() as u8
@@ -26,7 +26,7 @@ pub unsafe fn _clz_u8(x: u8) -> u8 {
 #[inline]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
-// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/382
+// FIXME: https://github.com/rust-lang/stdarch/issues/382
 // #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))]
 pub unsafe fn _clz_u16(x: u16) -> u16 {
     x.leading_zeros() as u16
@@ -36,7 +36,7 @@ pub unsafe fn _clz_u16(x: u16) -> u16 {
 #[inline]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
-// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/382
+// FIXME: https://github.com/rust-lang/stdarch/issues/382
 // #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))]
 pub unsafe fn _clz_u32(x: u32) -> u32 {
     x.leading_zeros() as u32
diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index 424a185f88e..3be5859671c 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -1,5 +1,4 @@
 #![doc(include = "core_arch_docs.md")]
-#![cfg_attr(stdsimd_strict, deny(warnings))]
 #![allow(dead_code)]
 #![allow(unused_features)]
 #![feature(
@@ -65,7 +64,7 @@ extern crate std;
 #[macro_use]
 extern crate std_detect;
 #[cfg(test)]
-extern crate stdsimd_test;
+extern crate stdarch_test;
 
 #[cfg(all(test, target_arch = "wasm32"))]
 extern crate wasm_bindgen_test;
diff --git a/library/stdarch/crates/core_arch/src/mips/mod.rs b/library/stdarch/crates/core_arch/src/mips/mod.rs
index d7ab34f5c3f..efde97f4df7 100644
--- a/library/stdarch/crates/core_arch/src/mips/mod.rs
+++ b/library/stdarch/crates/core_arch/src/mips/mod.rs
@@ -4,7 +4,7 @@ mod msa;
 pub use self::msa::*;
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Generates the trap instruction `BREAK`
 #[cfg_attr(test, assert_instr(break))]
diff --git a/library/stdarch/crates/core_arch/src/mips/msa.rs b/library/stdarch/crates/core_arch/src/mips/msa.rs
index 18e1e78d983..46c9bd089af 100644
--- a/library/stdarch/crates/core_arch/src/mips/msa.rs
+++ b/library/stdarch/crates/core_arch/src/mips/msa.rs
@@ -6,7 +6,7 @@
 //! [msa_ref]: http://cdn2.imgtec.com/documentation/MD00866-2B-MSA32-AFP-01.12.pdf
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 use crate::mem;
 
@@ -9235,7 +9235,7 @@ mod tests {
         mem,
     };
     use std::{f32, f64};
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_add_a_b() {
diff --git a/library/stdarch/crates/core_arch/src/mod.rs b/library/stdarch/crates/core_arch/src/mod.rs
index 940f6398614..88603024b95 100644
--- a/library/stdarch/crates/core_arch/src/mod.rs
+++ b/library/stdarch/crates/core_arch/src/mod.rs
@@ -10,7 +10,7 @@ mod simd;
 
 #[cfg_attr(
     bootstrap,
-    doc(include = "../stdsimd/crates/core_arch/src/core_arch_docs.md")
+    doc(include = "../stdarch/crates/core_arch/src/core_arch_docs.md")
 )]
 #[cfg_attr(not(bootstrap), doc(include = "core_arch_docs.md"))]
 #[stable(feature = "simd_arch", since = "1.27.0")]
diff --git a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
index 2bcec9374b8..2f56ecf8f3d 100644
--- a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
@@ -19,7 +19,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 types! {
     /// PowerPC-specific 128-bit wide vector of sixteen packed `i8`
@@ -1714,7 +1714,7 @@ mod tests {
     use std::mem::transmute;
 
     use crate::core_arch::simd::*;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     macro_rules! test_vec_2 {
         { $name: ident, $fn:ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => {
diff --git a/library/stdarch/crates/core_arch/src/powerpc/mod.rs b/library/stdarch/crates/core_arch/src/powerpc/mod.rs
index 4bfee70e1fc..9765d11d1f4 100644
--- a/library/stdarch/crates/core_arch/src/powerpc/mod.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/mod.rs
@@ -9,7 +9,7 @@ mod vsx;
 pub use self::vsx::*;
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Generates the trap instruction `TRAP`
 #[cfg_attr(test, assert_instr(trap))]
diff --git a/library/stdarch/crates/core_arch/src/powerpc/vsx.rs b/library/stdarch/crates/core_arch/src/powerpc/vsx.rs
index 77b7306b027..394c9a7043b 100644
--- a/library/stdarch/crates/core_arch/src/powerpc/vsx.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/vsx.rs
@@ -11,7 +11,7 @@
 use crate::core_arch::simd_llvm::*;
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 use crate::mem;
 
@@ -93,7 +93,7 @@ mod tests {
 
     use super::mem;
     use crate::core_arch::simd::*;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     macro_rules! test_vec_xxpermdi {
         {$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
diff --git a/library/stdarch/crates/core_arch/src/wasm32/atomic.rs b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
index 4ebbaa19bd2..b8ffaeac0e2 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
@@ -9,7 +9,7 @@
 #![cfg(any(target_feature = "atomics", dox))]
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 #[cfg(test)]
 use wasm_bindgen_test::wasm_bindgen_test;
 
diff --git a/library/stdarch/crates/core_arch/src/wasm32/memory.rs b/library/stdarch/crates/core_arch/src/wasm32/memory.rs
index 0ccc104c63b..f2c7fa54c3d 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/memory.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/memory.rs
@@ -1,5 +1,5 @@
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 #[cfg(test)]
 use wasm_bindgen_test::wasm_bindgen_test;
 
diff --git a/library/stdarch/crates/core_arch/src/wasm32/mod.rs b/library/stdarch/crates/core_arch/src/wasm32/mod.rs
index d95eb0890da..5e7a9d85f4e 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/mod.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/mod.rs
@@ -1,7 +1,7 @@
 //! WASM32 intrinsics
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 #[cfg(test)]
 use wasm_bindgen_test::wasm_bindgen_test;
 
diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
index 7bf579f38e3..99d02a0a441 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@@ -13,7 +13,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 #[cfg(test)]
 use wasm_bindgen_test::wasm_bindgen_test;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/abm.rs b/library/stdarch/crates/core_arch/src/x86/abm.rs
index 9ee69f46e66..50912f77423 100644
--- a/library/stdarch/crates/core_arch/src/x86/abm.rs
+++ b/library/stdarch/crates/core_arch/src/x86/abm.rs
@@ -18,7 +18,7 @@
 //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Counts the leading most significant zero bits.
 ///
@@ -46,7 +46,7 @@ pub unsafe fn _popcnt32(x: i32) -> i32 {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/adx.rs b/library/stdarch/crates/core_arch/src/x86/adx.rs
index 3bcb4d8ba25..6df321c049b 100644
--- a/library/stdarch/crates/core_arch/src/x86/adx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/adx.rs
@@ -1,5 +1,5 @@
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "unadjusted" {
@@ -48,7 +48,7 @@ pub unsafe fn _subborrow_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/aes.rs b/library/stdarch/crates/core_arch/src/x86/aes.rs
index bc45fc39da5..603744aef6e 100644
--- a/library/stdarch/crates/core_arch/src/x86/aes.rs
+++ b/library/stdarch/crates/core_arch/src/x86/aes.rs
@@ -10,7 +10,7 @@
 use crate::core_arch::x86::__m128i;
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "C" {
@@ -111,7 +111,7 @@ mod tests {
     // __m128i happens to be defined in terms of signed integers.
     #![allow(overflowing_literals)]
 
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs
index 180b6e72387..56067ba5c93 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx.rs
@@ -21,7 +21,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Adds packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`.
@@ -54,7 +54,7 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
 #[inline]
 #[target_feature(enable = "avx")]
 // FIXME: Should be 'vandpd' instuction.
-// See https://github.com/rust-lang-nursery/stdsimd/issues/71
+// See https://github.com/rust-lang/stdarch/issues/71
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
@@ -84,7 +84,7 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
 #[inline]
 #[target_feature(enable = "avx")]
 // FIXME: should be `vorpd` instuction.
-// See <https://github.com/rust-lang-nursery/stdsimd/issues/71>.
+// See <https://github.com/rust-lang/stdarch/issues/71>.
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
@@ -3309,7 +3309,7 @@ extern "C" {
 #[cfg(test)]
 mod tests {
     use crate::hint::black_box;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs
index fcc1fdfd087..84f3364b922 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs
@@ -24,7 +24,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Computes the absolute values of packed 32-bit integers in `a`.
 ///
@@ -4057,7 +4057,7 @@ extern "C" {
 #[cfg(test)]
 mod tests {
     use std;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index ad1a5a40704..f38583102b9 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -4,7 +4,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Computes the absolute values of packed 32-bit integers in `a`.
 ///
@@ -97,7 +97,7 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
 #[cfg(test)]
 mod tests {
     use std;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs b/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs
index ad3a21a796c..425d0ff7e5e 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs
@@ -1,7 +1,7 @@
 use crate::core_arch::x86::*;
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Multiply packed unsigned 52-bit integers in each 64-bit element of
 /// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
@@ -106,7 +106,7 @@ extern "C" {
 #[cfg(test)]
 mod tests {
     use std;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/bmi1.rs b/library/stdarch/crates/core_arch/src/x86/bmi1.rs
index f9df86b50a3..0f769f33b0f 100644
--- a/library/stdarch/crates/core_arch/src/x86/bmi1.rs
+++ b/library/stdarch/crates/core_arch/src/x86/bmi1.rs
@@ -10,7 +10,7 @@
 //! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Extracts bits in range [`start`, `start` + `length`) from `a` into
 /// the least significant bits of the result.
@@ -118,7 +118,7 @@ extern "C" {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/bmi2.rs b/library/stdarch/crates/core_arch/src/x86/bmi2.rs
index b709d1187d3..b08b8733c2f 100644
--- a/library/stdarch/crates/core_arch/src/x86/bmi2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/bmi2.rs
@@ -11,7 +11,7 @@
 //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Unsigned multiply without affecting flags.
 ///
@@ -77,7 +77,7 @@ extern "C" {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/bswap.rs b/library/stdarch/crates/core_arch/src/x86/bswap.rs
index 2896781f847..20e3aa6fc35 100644
--- a/library/stdarch/crates/core_arch/src/x86/bswap.rs
+++ b/library/stdarch/crates/core_arch/src/x86/bswap.rs
@@ -2,7 +2,7 @@
 #![allow(clippy::module_name_repetitions)]
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Returns an integer with the reversed byte order of x
 ///
diff --git a/library/stdarch/crates/core_arch/src/x86/bt.rs b/library/stdarch/crates/core_arch/src/x86/bt.rs
index 8ecc87fe4cb..6e42828dd7c 100644
--- a/library/stdarch/crates/core_arch/src/x86/bt.rs
+++ b/library/stdarch/crates/core_arch/src/x86/bt.rs
@@ -1,5 +1,5 @@
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Returns the bit in position `b` of the memory addressed by `p`.
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/cpuid.rs b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
index c52157599c6..32a13b532e5 100644
--- a/library/stdarch/crates/core_arch/src/x86/cpuid.rs
+++ b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
@@ -2,7 +2,7 @@
 #![allow(clippy::module_name_repetitions)]
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Result of the `cpuid` instruction.
 #[allow(clippy::missing_inline_in_public_items)]
diff --git a/library/stdarch/crates/core_arch/src/x86/f16c.rs b/library/stdarch/crates/core_arch/src/x86/f16c.rs
index 195485914b3..503bd41d2fd 100644
--- a/library/stdarch/crates/core_arch/src/x86/f16c.rs
+++ b/library/stdarch/crates/core_arch/src/x86/f16c.rs
@@ -9,7 +9,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "unadjusted" {
@@ -110,7 +110,7 @@ pub unsafe fn _mm256_cvtps_ph(a: __m256, imm_rounding: i32) -> __m128i {
 #[cfg(test)]
 mod tests {
     use crate::{core_arch::x86::*, mem::transmute};
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "f16c")]
     unsafe fn test_mm_cvtph_ps() {
diff --git a/library/stdarch/crates/core_arch/src/x86/fma.rs b/library/stdarch/crates/core_arch/src/x86/fma.rs
index de4e589fe14..48abe9f49a9 100644
--- a/library/stdarch/crates/core_arch/src/x86/fma.rs
+++ b/library/stdarch/crates/core_arch/src/x86/fma.rs
@@ -21,7 +21,7 @@
 use crate::core_arch::x86::*;
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`, and add the intermediate result to packed elements in `c`.
@@ -508,7 +508,7 @@ extern "C" {
 #[cfg(test)]
 mod tests {
     use std;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/fxsr.rs b/library/stdarch/crates/core_arch/src/x86/fxsr.rs
index eeca495e0b4..83d53f4773d 100644
--- a/library/stdarch/crates/core_arch/src/x86/fxsr.rs
+++ b/library/stdarch/crates/core_arch/src/x86/fxsr.rs
@@ -1,7 +1,7 @@
 //! FXSR floating-point context fast save and restor.
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "C" {
@@ -59,7 +59,7 @@ pub unsafe fn _fxrstor(mem_addr: *const u8) {
 mod tests {
     use crate::core_arch::x86::*;
     use std::{cmp::PartialEq, fmt};
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[repr(align(16))]
     struct FxsaveArea {
diff --git a/library/stdarch/crates/core_arch/src/x86/mmx.rs b/library/stdarch/crates/core_arch/src/x86/mmx.rs
index 96a3c2d881d..ff4f8277e25 100644
--- a/library/stdarch/crates/core_arch/src/x86/mmx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mmx.rs
@@ -14,7 +14,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Constructs a 64-bit integer vector initialized to zero.
 #[inline]
@@ -532,7 +532,7 @@ extern "C" {
 #[cfg(test)]
 mod tests {
     use crate::core_arch::x86::*;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_setzero_si64() {
diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs
index bed9e4a020e..68965ad93de 100644
--- a/library/stdarch/crates/core_arch/src/x86/mod.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mod.rs
@@ -519,14 +519,14 @@ pub use self::bmi1::*;
 mod bmi2;
 pub use self::bmi2::*;
 
-#[cfg(not(stdsimd_intel_sde))]
+#[cfg(not(stdarch_intel_sde))]
 mod sse4a;
-#[cfg(not(stdsimd_intel_sde))]
+#[cfg(not(stdarch_intel_sde))]
 pub use self::sse4a::*;
 
-#[cfg(not(stdsimd_intel_sde))]
+#[cfg(not(stdarch_intel_sde))]
 mod tbm;
-#[cfg(not(stdsimd_intel_sde))]
+#[cfg(not(stdarch_intel_sde))]
 pub use self::tbm::*;
 
 mod mmx;
@@ -548,7 +548,7 @@ mod adx;
 pub use self::adx::*;
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Generates the trap instruction `UD2`
 #[cfg_attr(test, assert_instr(ud2))]
diff --git a/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs
index 9b9d21c9add..0e1bebae9ee 100644
--- a/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs
+++ b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs
@@ -8,7 +8,7 @@
 use crate::core_arch::x86::__m128i;
 
 #[cfg(test)]
-use crate::stdsimd_test::assert_instr;
+use crate::stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "C" {
@@ -48,7 +48,7 @@ mod tests {
     // __m128i happens to be defined in terms of signed integers.
     #![allow(overflowing_literals)]
 
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/rdrand.rs b/library/stdarch/crates/core_arch/src/x86/rdrand.rs
index a1252933522..c6bab914892 100644
--- a/library/stdarch/crates/core_arch/src/x86/rdrand.rs
+++ b/library/stdarch/crates/core_arch/src/x86/rdrand.rs
@@ -16,7 +16,7 @@ extern "unadjusted" {
 }
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Read a hardware generated 16-bit random value and store the result in val.
 /// Returns 1 if a random value was generated, and 0 otherwise.
diff --git a/library/stdarch/crates/core_arch/src/x86/rdtsc.rs b/library/stdarch/crates/core_arch/src/x86/rdtsc.rs
index a92ff4b3ece..67f6e48fa7b 100644
--- a/library/stdarch/crates/core_arch/src/x86/rdtsc.rs
+++ b/library/stdarch/crates/core_arch/src/x86/rdtsc.rs
@@ -1,7 +1,7 @@
 //! RDTSC instructions.
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Reads the current value of the processor’s time-stamp counter.
 ///
@@ -60,7 +60,7 @@ extern "C" {
 #[cfg(test)]
 mod tests {
     use crate::core_arch::x86::*;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "sse2")]
     unsafe fn _rdtsc() {
diff --git a/library/stdarch/crates/core_arch/src/x86/rtm.rs b/library/stdarch/crates/core_arch/src/x86/rtm.rs
index ebe3ed80da7..1e532b4efe7 100644
--- a/library/stdarch/crates/core_arch/src/x86/rtm.rs
+++ b/library/stdarch/crates/core_arch/src/x86/rtm.rs
@@ -14,7 +14,7 @@
 //! [intel_consid]: https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-intel-transactional-synchronization-extensions-intel-tsx-programming-considerations
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 extern "C" {
     #[link_name = "llvm.x86.xbegin"]
@@ -107,7 +107,7 @@ pub const fn _xabort_code(status: u32) -> u32 {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/sha.rs b/library/stdarch/crates/core_arch/src/x86/sha.rs
index c6cbc5324f5..362a97ccd36 100644
--- a/library/stdarch/crates/core_arch/src/x86/sha.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sha.rs
@@ -22,7 +22,7 @@ extern "C" {
 }
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Performs an intermediate calculation for the next four SHA1 message values
 /// (unsigned 32-bit integers) using previous message values from `a` and `b`,
@@ -141,7 +141,7 @@ mod tests {
         core_arch::{simd::*, x86::*},
         hint::black_box,
     };
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "sha")]
     #[allow(overflowing_literals)]
diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index f03773d1ea0..3160ac57b80 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -6,7 +6,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Adds the first component of `a` and `b`, the other components are copied
 /// from `a`.
@@ -978,7 +978,7 @@ pub unsafe fn _mm_setzero_ps() -> __m128 {
 /// permute intrinsics.
 #[inline]
 #[allow(non_snake_case)]
-#[unstable(feature = "stdsimd", issue = "27731")]
+#[unstable(feature = "stdarch", issue = "27731")]
 pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
     ((z << 6) | (y << 4) | (x << 2) | w) as i32
 }
@@ -2499,7 +2499,7 @@ pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
 mod tests {
     use crate::{hint::black_box, mem::transmute};
     use std::{boxed, f32::NAN};
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::{simd::*, x86::*};
 
diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs
index 2c621b21fc7..21a3b4a0816 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@@ -1,7 +1,7 @@
 //! Streaming SIMD Extensions 2 (SSE2)
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 use crate::{
     core_arch::{simd::*, simd_llvm::*, x86::*},
@@ -3193,7 +3193,7 @@ mod tests {
         i32,
         mem::{self, transmute},
     };
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[test]
     fn test_mm_pause() {
diff --git a/library/stdarch/crates/core_arch/src/x86/sse3.rs b/library/stdarch/crates/core_arch/src/x86/sse3.rs
index 9e335e12c8d..977de1dc17a 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse3.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse3.rs
@@ -10,7 +10,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Alternatively add and subtract packed single-precision (32-bit)
 /// floating-point elements in `a` to/from packed elements in `b`.
@@ -165,7 +165,7 @@ extern "C" {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs
index 2e0a0acb461..f8616296718 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse41.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs
@@ -6,7 +6,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 // SSE4 rounding constans
 /// round to nearest
@@ -1194,7 +1194,7 @@ extern "C" {
 mod tests {
     use crate::core_arch::x86::*;
     use std::mem;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "sse4.1")]
     unsafe fn test_mm_blendv_epi8() {
diff --git a/library/stdarch/crates/core_arch/src/x86/sse42.rs b/library/stdarch/crates/core_arch/src/x86/sse42.rs
index eca71233ad5..d4d8aa644e4 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse42.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse42.rs
@@ -3,7 +3,7 @@
 //! Extends SSE4.1 with STTNI (String and Text New Instructions).
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 use crate::{
     core_arch::{simd::*, simd_llvm::*, x86::*},
@@ -707,7 +707,7 @@ extern "C" {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
     use std::ptr;
diff --git a/library/stdarch/crates/core_arch/src/x86/sse4a.rs b/library/stdarch/crates/core_arch/src/x86/sse4a.rs
index 51401e783f7..e6345d0da90 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse4a.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse4a.rs
@@ -6,7 +6,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "C" {
@@ -84,7 +84,7 @@ pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
 #[cfg(test)]
 mod tests {
     use crate::core_arch::x86::*;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "sse4a")]
     unsafe fn test_mm_extract_si64() {
diff --git a/library/stdarch/crates/core_arch/src/x86/ssse3.rs b/library/stdarch/crates/core_arch/src/x86/ssse3.rs
index 07eb620534c..6a45603e441 100644
--- a/library/stdarch/crates/core_arch/src/x86/ssse3.rs
+++ b/library/stdarch/crates/core_arch/src/x86/ssse3.rs
@@ -6,7 +6,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Computes the absolute value of packed 8-bit signed integers in `a` and
 /// return the unsigned results.
@@ -560,7 +560,7 @@ extern "C" {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/tbm.rs b/library/stdarch/crates/core_arch/src/x86/tbm.rs
index dd8f800c68a..d1102a11693 100644
--- a/library/stdarch/crates/core_arch/src/x86/tbm.rs
+++ b/library/stdarch/crates/core_arch/src/x86/tbm.rs
@@ -11,7 +11,7 @@
 //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 // FIXME(blocked on #248)
 // TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select:
@@ -279,7 +279,7 @@ pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/xsave.rs b/library/stdarch/crates/core_arch/src/x86/xsave.rs
index 124cb39b78d..f3814dd7f62 100644
--- a/library/stdarch/crates/core_arch/src/x86/xsave.rs
+++ b/library/stdarch/crates/core_arch/src/x86/xsave.rs
@@ -2,7 +2,7 @@
 #![allow(clippy::module_name_repetitions)]
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "C" {
@@ -165,7 +165,7 @@ mod tests {
     use std::{fmt, prelude::v1::*};
 
     use crate::core_arch::x86::*;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[repr(align(64))]
     struct XsaveArea {
@@ -209,7 +209,7 @@ mod tests {
         }
     }
 
-    // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209
+    // FIXME: https://github.com/rust-lang/stdarch/issues/209
     /*
     #[simd_test(enable = "xsave")]
     unsafe fn xsave() {
@@ -237,7 +237,7 @@ mod tests {
         assert_eq!(xcr, xcr_cpy);
     }
 
-    // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209
+    // FIXME: https://github.com/rust-lang/stdarch/issues/209
     /*
     #[simd_test(enable = "xsave,xsaveopt")]
     unsafe fn xsaveopt() {
@@ -253,7 +253,7 @@ mod tests {
     */
 
     // FIXME: this looks like a bug in Intel's SDE:
-    #[cfg(not(stdsimd_intel_sde))]
+    #[cfg(not(stdarch_intel_sde))]
     #[simd_test(enable = "xsave,xsavec")]
     unsafe fn xsavec() {
         let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
@@ -266,7 +266,7 @@ mod tests {
         assert_eq!(a, b);
     }
 
-    // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209
+    // FIXME: https://github.com/rust-lang/stdarch/issues/209
     /*
     #[simd_test(enable = "xsave,xsaves")]
     unsafe fn xsaves() {
diff --git a/library/stdarch/crates/core_arch/src/x86_64/abm.rs b/library/stdarch/crates/core_arch/src/x86_64/abm.rs
index b5a2b267a6e..988074d673d 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/abm.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/abm.rs
@@ -18,7 +18,7 @@
 //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Counts the leading most significant zero bits.
 ///
@@ -46,7 +46,7 @@ pub unsafe fn _popcnt64(x: i64) -> i32 {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::arch::x86_64::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86_64/adx.rs b/library/stdarch/crates/core_arch/src/x86_64/adx.rs
index d1d295d7728..57efe75ddd7 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/adx.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/adx.rs
@@ -1,5 +1,5 @@
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "unadjusted" {
@@ -48,7 +48,7 @@ pub unsafe fn _subborrow_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86_64::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx.rs b/library/stdarch/crates/core_arch/src/x86_64/avx.rs
index 082913e9353..5215fd6fbf3 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx.rs
@@ -33,7 +33,7 @@ pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx2.rs b/library/stdarch/crates/core_arch/src/x86_64/avx2.rs
index f31c018f547..7cc3fb6efec 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx2.rs
@@ -36,7 +36,7 @@ pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 {
 #[cfg(test)]
 mod tests {
     use crate::core_arch::arch::x86_64::*;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "avx2")]
     unsafe fn test_mm256_extract_epi64() {
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
index 6e1296d5fed..9f71a8d3885 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
@@ -10,7 +10,7 @@
 //! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Extracts bits in range [`start`, `start` + `length`) from `a` into
 /// the least significant bits of the result.
@@ -123,7 +123,7 @@ extern "C" {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::{x86::*, x86_64::*};
 
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
index 99b7fa0256e..356d95a3d13 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
@@ -11,7 +11,7 @@
 //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Unsigned multiply without affecting flags.
 ///
@@ -79,7 +79,7 @@ extern "C" {
 
 #[cfg(test)]
 mod tests {
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     use crate::core_arch::x86_64::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
index 08bf1d2f87b..9e8e76d4f70 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
@@ -3,7 +3,7 @@
 #![allow(clippy::module_name_repetitions)]
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Returns an integer with the reversed byte order of x
 ///
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bt.rs b/library/stdarch/crates/core_arch/src/x86_64/bt.rs
index c6de6b28d5c..9c6dcf7b61b 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bt.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bt.rs
@@ -1,5 +1,5 @@
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Returns the bit in position `b` of the memory addressed by `p`.
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
index 3c4d4f6421e..391daed20ee 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
@@ -1,7 +1,7 @@
 use crate::sync::atomic::Ordering;
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Compares and exchange 16 bytes (128 bits) of data atomically.
 ///
diff --git a/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
index 543ed6dcee0..0b26fb6d04f 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
@@ -1,7 +1,7 @@
 //! FXSR floating-point context fast save and restor.
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "C" {
@@ -59,7 +59,7 @@ pub unsafe fn _fxrstor64(mem_addr: *const u8) {
 mod tests {
     use crate::core_arch::x86_64::*;
     use std::{cmp::PartialEq, fmt};
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[repr(align(16))]
     struct FxsaveArea {
diff --git a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
index 7b6d0de01f8..e5ec933fb93 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
@@ -13,7 +13,7 @@ extern "unadjusted" {
 }
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Read a hardware generated 64-bit random value and store the result in val.
 /// Returns 1 if a random value was generated, and 0 otherwise.
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse.rs b/library/stdarch/crates/core_arch/src/x86_64/sse.rs
index 066d22fa664..a93215072ab 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse.rs
@@ -3,7 +3,7 @@
 use crate::core_arch::x86::*;
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "C" {
@@ -72,7 +72,7 @@ pub unsafe fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 {
 mod tests {
     use crate::core_arch::arch::x86_64::*;
     use std::{f32::NAN, i64::MIN};
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "sse")]
     unsafe fn test_mm_cvtss_si64() {
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse2.rs b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs
index 37bfc45cf82..94a919d4fe7 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs
@@ -6,7 +6,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "C" {
@@ -149,7 +149,7 @@ pub unsafe fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d {
 mod tests {
     use crate::core_arch::arch::x86_64::*;
     use std::{boxed, f64, i64};
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "sse2")]
     unsafe fn test_mm_cvtsd_si64() {
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse41.rs b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs
index 36e498888ab..18c315c90fe 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/sse41.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs
@@ -6,7 +6,7 @@ use crate::{
 };
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 /// Extracts an 64-bit integer from `a` selected with `imm8`
 ///
@@ -37,7 +37,7 @@ pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i {
 #[cfg(test)]
 mod tests {
     use crate::core_arch::arch::x86_64::*;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "sse4.1")]
     unsafe fn test_mm_extract_epi64() {
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse42.rs b/library/stdarch/crates/core_arch/src/x86_64/sse42.rs
index 03db7aca384..405073261ce 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/sse42.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse42.rs
@@ -1,7 +1,7 @@
 //! `x86_64`'s Streaming SIMD Extensions 4.2 (SSE4.2)
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "C" {
@@ -25,7 +25,7 @@ pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 {
 mod tests {
     use crate::core_arch::arch::x86_64::*;
 
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "sse4.2")]
     unsafe fn test_mm_crc32_u64() {
diff --git a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
index 964a24d12e2..8296695058b 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
@@ -3,7 +3,7 @@
 #![allow(clippy::module_name_repetitions)]
 
 #[cfg(test)]
-use stdsimd_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "C" {
@@ -124,16 +124,16 @@ pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) {
     xrstors64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
 }
 
-// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209
+// FIXME: https://github.com/rust-lang/stdarch/issues/209
 // All these tests fail with Intel SDE.
 /*
 #[cfg(test)]
 mod tests {
     use crate::core_arch::x86::x86_64::xsave;
-    use stdsimd_test::simd_test;
+    use stdarch_test::simd_test;
     use std::fmt;
 
-    // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209
+    // FIXME: https://github.com/rust-lang/stdarch/issues/209
     #[repr(align(64))]
     struct XsaveArea {
         // max size for 256-bit registers is 800 bytes:
diff --git a/library/stdarch/crates/core_arch/tests/cpu-detection.rs b/library/stdarch/crates/core_arch/tests/cpu-detection.rs
index 321f24e9fc2..07b56c616d6 100644
--- a/library/stdarch/crates/core_arch/tests/cpu-detection.rs
+++ b/library/stdarch/crates/core_arch/tests/cpu-detection.rs
@@ -1,5 +1,4 @@
 #![feature(stdsimd)]
-#![cfg_attr(stdsimd_strict, deny(warnings))]
 #![allow(clippy::option_unwrap_used, clippy::print_stdout, clippy::use_debug)]
 
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
diff --git a/library/stdarch/crates/simd-test-macro/src/lib.rs b/library/stdarch/crates/simd-test-macro/src/lib.rs
index 67f3059ebee..4d1170cc7e9 100644
--- a/library/stdarch/crates/simd-test-macro/src/lib.rs
+++ b/library/stdarch/crates/simd-test-macro/src/lib.rs
@@ -126,7 +126,7 @@ pub fn simd_test(
                 #emms
                 return v;
             } else {
-                ::stdsimd_test::assert_skip_test_ok(stringify!(#name));
+                ::stdarch_test::assert_skip_test_ok(stringify!(#name));
             }
 
             #[target_feature(enable = #enable_feature)]
diff --git a/library/stdarch/crates/std_detect/Cargo.toml b/library/stdarch/crates/std_detect/Cargo.toml
index ae633566591..8902868c623 100644
--- a/library/stdarch/crates/std_detect/Cargo.toml
+++ b/library/stdarch/crates/std_detect/Cargo.toml
@@ -8,8 +8,8 @@ authors = [
 ]
 description = "`std::detect` - Rust's standard library run-time CPU feature detection."
 documentation = "https://docs.rs/std_detect"
-homepage = "https://github.com/rust-lang-nursery/stdsimd"
-repository = "https://github.com/rust-lang-nursery/stdsimd"
+homepage = "https://github.com/rust-lang/stdarch"
+repository = "https://github.com/rust-lang/stdarch"
 readme = "README.md"
 keywords = ["std", "run-time", "feature", "detection"]
 categories = ["hardware-support"]
@@ -17,10 +17,10 @@ license = "MIT/Apache-2.0"
 edition = "2015"
 
 [badges]
-travis-ci = { repository = "rust-lang-nursery/stdsimd" }
-appveyor = { repository = "rust-lang-nursery/stdsimd"  }
-is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/stdsimd" }
-is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
+travis-ci = { repository = "rust-lang/stdarch" }
+appveyor = { repository = "rust-lang/stdarch"  }
+is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" }
+is-it-maintained-open-issues = { repository = "rust-lang/stdarch" }
 maintenance = { status = "experimental" }
 
 [dependencies]
diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md
index 4d2ec7d3448..1c83a54b80d 100644
--- a/library/stdarch/crates/std_detect/README.md
+++ b/library/stdarch/crates/std_detect/README.md
@@ -75,9 +75,9 @@ Unless you explicitly state otherwise, any contribution intentionally submitted
 for inclusion in `std_detect` by you, as defined in the Apache-2.0 license,
 shall be dual licensed as above, without any additional terms or conditions.
 
-[travis]: https://travis-ci.com/rust-lang-nursery/stdsimd
-[Travis-CI Status]: https://travis-ci.com/rust-lang-nursery/stdsimd.svg?branch=master
-[appveyor]: https://ci.appveyor.com/project/rust-lang-libs/stdsimd/branch/master
+[travis]: https://travis-ci.com/rust-lang/stdarch
+[Travis-CI Status]: https://travis-ci.com/rust-lang/stdarch.svg?branch=master
+[appveyor]: https://ci.appveyor.com/project/rust-lang-libs/stdarch/branch/master
 [Appveyor Status]: https://ci.appveyor.com/api/projects/status/ix74qhmilpibn00x/branch/master?svg=true
 [std_detect_crate_badge]: https://img.shields.io/crates/v/std_detect.svg
 [std_detect_crate_link]: https://crates.io/crates/std_detect
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 8b3bb304f1c..7737719c3b0 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -17,7 +17,6 @@
 #![deny(clippy::missing_inline_in_public_items)]
 #![cfg_attr(target_os = "linux", feature(linkage))]
 #![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))]
-#![cfg_attr(stdsimd_strict, deny(warnings))]
 #![cfg_attr(test, allow(unused_imports))]
 #![no_std]
 
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 0aae39e2947..d0c9901c4cd 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -1,5 +1,4 @@
 #![feature(stdsimd)]
-#![cfg_attr(stdsimd_strict, deny(warnings))]
 #![allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout)]
 
 #[cfg(any(
diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
index df03e6555aa..e950523d0e5 100644
--- a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
+++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
@@ -1,5 +1,4 @@
 #![feature(stdsimd)]
-#![cfg_attr(stdsimd_strict, deny(warnings))]
 #![allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout)]
 
 #[cfg(any(
diff --git a/library/stdarch/crates/stdarch-test/Cargo.toml b/library/stdarch/crates/stdarch-test/Cargo.toml
new file mode 100644
index 00000000000..2b445f8dc5b
--- /dev/null
+++ b/library/stdarch/crates/stdarch-test/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "stdarch-test"
+version = "0.1.0"
+authors = ["Alex Crichton <alex@alexcrichton.com>"]
+
+[dependencies]
+assert-instr-macro = { path = "../assert-instr-macro" }
+simd-test-macro = { path = "../simd-test-macro" }
+cc = "1.0"
+lazy_static = "1.0"
+rustc-demangle = "0.1.8"
+cfg-if = "0.1"
+
+[target.wasm32-unknown-unknown.dependencies]
+wasm-bindgen = "0.2.47"
+js-sys = "0.3"
+console_error_panic_hook = "0.1"
+
+[features]
+default = []
diff --git a/library/stdarch/crates/stdarch-test/src/disassembly.rs b/library/stdarch/crates/stdarch-test/src/disassembly.rs
new file mode 100644
index 00000000000..23ebd92e740
--- /dev/null
+++ b/library/stdarch/crates/stdarch-test/src/disassembly.rs
@@ -0,0 +1,182 @@
+//! Disassembly calling function for most targets.
+
+use std::{env, collections::HashSet, process::Command, str};
+use crate::Function;
+
+// Extracts the "shim" name from the `symbol`.
+fn normalize(mut symbol: &str) -> String {
+    // Remove trailing colon:
+    if symbol.ends_with(':') {
+        symbol = &symbol[..symbol.len() - 1];
+    }
+    if symbol.ends_with('>') {
+        symbol = &symbol[..symbol.len() - 1];
+    }
+    if let Some(idx) = symbol.find('<') {
+        symbol = &symbol[idx + 1..];
+    }
+
+    let mut symbol = rustc_demangle::demangle(symbol).to_string();
+    symbol = match symbol.rfind("::h") {
+        Some(i) => symbol[..i].to_string(),
+        None => symbol.to_string(),
+    };
+
+    // Remove Rust paths
+    if let Some(last_colon) = symbol.rfind(':') {
+        symbol = (&symbol[last_colon + 1..]).to_string();
+    }
+
+    // Normalize to no leading underscore to handle platforms that may
+    // inject extra ones in symbol names.
+    while symbol.starts_with('_') {
+        symbol.remove(0);
+    }
+    symbol
+}
+
+pub(crate) fn disassemble_myself() -> HashSet<Function> {
+    let me = env::current_exe().expect("failed to get current exe");
+
+    let disassembly = if cfg!(target_arch = "x86_64")
+        && cfg!(target_os = "windows")
+        && cfg!(target_env = "msvc")
+    {
+        let mut cmd = cc::windows_registry::find(
+            "x86_64-pc-windows-msvc",
+            "dumpbin.exe",
+        ).expect("failed to find `dumpbin` tool");
+        let output = cmd
+            .arg("/DISASM")
+            .arg(&me)
+            .output()
+            .expect("failed to execute dumpbin");
+        println!(
+            "{}\n{}",
+            output.status,
+            String::from_utf8_lossy(&output.stderr)
+        );
+        assert!(output.status.success());
+        String::from_utf8(output.stdout)
+    } else if cfg!(target_os = "windows") {
+        panic!("disassembly unimplemented")
+    } else if cfg!(target_os = "macos") {
+        let output = Command::new("otool")
+            .arg("-vt")
+            .arg(&me)
+            .output()
+            .expect("failed to execute otool");
+        println!(
+            "{}\n{}",
+            output.status,
+            String::from_utf8_lossy(&output.stderr)
+        );
+        assert!(output.status.success());
+
+        String::from_utf8(output.stdout)
+    } else {
+        let objdump =
+            env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string());
+        let output = Command::new(objdump.clone())
+            .arg("--disassemble")
+            .arg(&me)
+            .output()
+            .unwrap_or_else(|_| panic!(
+                "failed to execute objdump. OBJDUMP={}",
+                objdump
+            ));
+        println!(
+            "{}\n{}",
+            output.status,
+            String::from_utf8_lossy(&output.stderr)
+        );
+        assert!(output.status.success());
+
+        String::from_utf8(output.stdout)
+    }.expect("failed to convert to utf8");
+
+    parse(&disassembly)
+}
+
+fn parse(output: &str) -> HashSet<Function> {
+    let mut lines = output.lines();
+
+    for line in output.lines().take(100) {
+        println!("{}", line);
+    }
+
+    let mut functions = HashSet::new();
+    let mut cached_header = None;
+    while let Some(header) = cached_header.take().or_else(|| lines.next()) {
+        if !header.ends_with(':') || !header.contains("stdarch_test_shim") {
+            continue
+        }
+        let symbol = normalize(header);
+        let mut instructions = Vec::new();
+        while let Some(instruction) = lines.next() {
+            if instruction.ends_with(':') {
+                cached_header = Some(instruction);
+                break;
+            }
+            if instruction.is_empty() {
+                cached_header = None;
+                break;
+            }
+            let parts = if cfg!(target_os = "macos") {
+                // Each line of instructions should look like:
+                //
+                //      $addr    $instruction...
+                instruction
+                    .split_whitespace()
+                    .skip(1)
+                    .map(std::string::ToString::to_string)
+                    .collect::<Vec<String>>()
+            } else if cfg!(target_env = "msvc") {
+                // Each line looks like:
+                //
+                // >  $addr: ab cd ef     $instr..
+                // >         00 12          # this line os optional
+                if instruction.starts_with("       ") {
+                    continue;
+                }
+                instruction
+                    .split_whitespace()
+                    .skip(1)
+                    .skip_while(|s| {
+                        s.len() == 2 && usize::from_str_radix(s, 16).is_ok()
+                    }).map(std::string::ToString::to_string)
+                    .skip_while(|s| *s == "lock") // skip x86-specific prefix
+                    .collect::<Vec<String>>()
+            } else {
+                // objdump
+                // Each line of instructions should look like:
+                //
+                //      $rel_offset: ab cd ef 00    $instruction...
+                let expected_len
+                    = if cfg!(target_arch = "arm") || cfg!(target_arch = "aarch64") {
+                        8
+                    } else {
+                        2
+                    };
+
+                instruction
+                    .split_whitespace()
+                    .skip(1)
+                    .skip_while(|s| {
+                        s.len() == expected_len
+                            && usize::from_str_radix(s, 16).is_ok()
+                    })
+                    .skip_while(|s| *s == "lock") // skip x86-specific prefix
+                    .map(std::string::ToString::to_string)
+                    .collect::<Vec<String>>()
+            };
+            instructions.push(parts.join(" "));
+        }
+        let function = Function {
+            name: symbol,
+            instrs: instructions
+        };
+        assert!(functions.insert(function));
+    }
+    functions
+}
diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs
new file mode 100644
index 00000000000..a284697d6bc
--- /dev/null
+++ b/library/stdarch/crates/stdarch-test/src/lib.rs
@@ -0,0 +1,178 @@
+//! Runtime support needed for testing the stdarch crate.
+//!
+//! This basically just disassembles the current executable and then parses the
+//! output once globally and then provides the `assert` function which makes
+//! assertions about the disassembly of a function.
+#![feature(const_str_as_bytes)]
+#![feature(const_transmute)]
+#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]
+
+extern crate assert_instr_macro;
+extern crate cc;
+#[macro_use]
+extern crate lazy_static;
+extern crate rustc_demangle;
+extern crate simd_test_macro;
+#[macro_use]
+extern crate cfg_if;
+
+pub use assert_instr_macro::*;
+pub use simd_test_macro::*;
+use std::{cmp, collections::HashSet, env, hash, str, sync::atomic::AtomicPtr};
+
+// `println!` doesn't work on wasm32 right now, so shadow the compiler's `println!`
+// macro with our own shim that redirects to `console.log`.
+#[allow(unused)]
+#[cfg(target_arch = "wasm32")]
+#[macro_export]
+macro_rules! println {
+    ($($args:tt)*) => (crate::wasm::js_console_log(&format!($($args)*)))
+}
+
+cfg_if! {
+    if #[cfg(target_arch = "wasm32")] {
+        extern crate wasm_bindgen;
+        extern crate console_error_panic_hook;
+        pub mod wasm;
+        use wasm::disassemble_myself;
+    } else {
+        mod disassembly;
+        use disassembly::disassemble_myself;
+    }
+}
+
+lazy_static! {
+    static ref DISASSEMBLY: HashSet<Function> = disassemble_myself();
+}
+
+#[derive(Debug)]
+struct Function {
+    name: String,
+    instrs: Vec<String>,
+}
+impl Function {
+    fn new(n: &str) -> Self {
+        Self {
+            name: n.to_string(),
+            instrs: Vec::new(),
+        }
+    }
+}
+
+impl cmp::PartialEq for Function {
+    fn eq(&self, other: &Self) -> bool {
+        self.name == other.name
+    }
+}
+impl cmp::Eq for Function {}
+
+impl hash::Hash for Function {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.name.hash(state)
+    }
+}
+
+/// Main entry point for this crate, called by the `#[assert_instr]` macro.
+///
+/// This asserts that the function at `fnptr` contains the instruction
+/// `expected` provided.
+pub fn assert(_fnptr: usize, fnname: &str, expected: &str) {
+    //eprintln!("shim name: {}", fnname);
+    let function = &DISASSEMBLY
+        .get(&Function::new(fnname))
+        .unwrap_or_else(|| panic!("function \"{}\" not found in the disassembly", fnname));
+    //eprintln!("  function: {:?}", function);
+
+    let mut instrs = &function.instrs[..];
+    while instrs.last().map_or(false, |s| s == "nop") {
+        instrs = &instrs[..instrs.len() - 1];
+    }
+
+    // Look for `expected` as the first part of any instruction in this
+    // function, e.g., tzcntl in tzcntl %rax,%rax.
+    let found = instrs.iter().any(|s| s.contains(expected));
+
+    // Look for `call` instructions in the disassembly to detect whether
+    // inlining failed: all intrinsics are `#[inline(always)]`, so
+    // calling one intrinsic from another should not generate `call`
+    // instructions.
+    let inlining_failed = instrs.windows(2).any(|s| {
+        // On 32-bit x86 position independent code will call itself and be
+        // immediately followed by a `pop` to learn about the current address.
+        // Let's not take that into account when considering whether a function
+        // failed inlining something.
+        s[0].contains("call") && (!cfg!(target_arch = "x86") || s[1].contains("pop"))
+    });
+
+    let instruction_limit = std::env::var("STDARCH_ASSERT_INSTR_LIMIT")
+        .ok()
+        .map_or_else(
+            || match expected {
+                // `cpuid` returns a pretty big aggregate structure, so exempt
+                // it from the slightly more restrictive 22 instructions below.
+                "cpuid" => 30,
+
+                // Apparently, on Windows, LLVM generates a bunch of
+                // saves/restores of xmm registers around these intstructions,
+                // which exceeds the limit of 20 below. As it seems dictated by
+                // Windows's ABI (I believe?), we probably can't do much
+                // about it.
+                "vzeroall" | "vzeroupper" if cfg!(windows) => 30,
+
+                // Intrinsics using `cvtpi2ps` are typically "composites" and
+                // in some cases exceed the limit.
+                "cvtpi2ps" => 25,
+
+                // core_arch/src/acle/simd32
+                "usad8" => 27,
+                "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29,
+
+                // Original limit was 20 instructions, but ARM DSP Intrinsics
+                // are exactly 20 instructions long. So, bump the limit to 22
+                // instead of adding here a long list of exceptions.
+                _ => 22,
+            },
+            |v| v.parse().unwrap(),
+        );
+    let probably_only_one_instruction = instrs.len() < instruction_limit;
+
+    if found && probably_only_one_instruction && !inlining_failed {
+        return;
+    }
+
+    // Help debug by printing out the found disassembly, and then panic as we
+    // didn't find the instruction.
+    println!("disassembly for {}: ", fnname,);
+    for (i, instr) in instrs.iter().enumerate() {
+        println!("\t{:2}: {}", i, instr);
+    }
+
+    if !found {
+        panic!(
+            "failed to find instruction `{}` in the disassembly",
+            expected
+        );
+    } else if !probably_only_one_instruction {
+        panic!(
+            "instruction found, but the disassembly contains too many \
+             instructions: #instructions = {} >= {} (limit)",
+            instrs.len(),
+            instruction_limit
+        );
+    } else if inlining_failed {
+        panic!(
+            "instruction found, but the disassembly contains `call` \
+             instructions, which hint that inlining failed"
+        );
+    }
+}
+
+pub fn assert_skip_test_ok(name: &str) {
+    if env::var("STDARCH_TEST_EVERYTHING").is_err() {
+        return;
+    }
+    panic!("skipped test `{}` when it shouldn't be skipped", name);
+}
+
+// See comment in `assert-instr-macro` crate for why this exists
+pub static _DONT_DEDUP: AtomicPtr<u8> = AtomicPtr::new(b"".as_ptr() as *mut _);
diff --git a/library/stdarch/crates/stdarch-test/src/wasm.rs b/library/stdarch/crates/stdarch-test/src/wasm.rs
new file mode 100644
index 00000000000..b31051bc68f
--- /dev/null
+++ b/library/stdarch/crates/stdarch-test/src/wasm.rs
@@ -0,0 +1,88 @@
+//! Disassembly calling function for `wasm32` targets.
+use wasm_bindgen::prelude::*;
+
+use crate::Function;
+use std::collections::HashSet;
+
+#[wasm_bindgen(module = "child_process")]
+extern "C" {
+    #[wasm_bindgen(js_name = execFileSync)]
+    fn exec_file_sync(cmd: &str, args: &js_sys::Array, opts: &js_sys::Object) -> Buffer;
+}
+
+#[wasm_bindgen(module = "buffer")]
+extern "C" {
+    type Buffer;
+    #[wasm_bindgen(method, js_name = toString)]
+    fn to_string(this: &Buffer) -> String;
+}
+
+#[wasm_bindgen]
+extern "C" {
+    #[wasm_bindgen(js_namespace = require)]
+    fn resolve(module: &str) -> String;
+    #[wasm_bindgen(js_namespace = console, js_name = log)]
+    pub fn js_console_log(s: &str);
+}
+
+pub(crate) fn disassemble_myself() -> HashSet<Function> {
+    use std::path::Path;
+    ::console_error_panic_hook::set_once();
+    // Our wasm module in the wasm-bindgen test harness is called
+    // "wasm-bindgen-test_bg". When running in node this is actually a shim JS
+    // file. Ask node where that JS file is, and then we use that with a wasm
+    // extension to find the wasm file itself.
+    let js_shim = resolve("wasm-bindgen-test_bg");
+    let js_shim = Path::new(&js_shim).with_extension("wasm");
+
+    // Execute `wasm2wat` synchronously, waiting for and capturing all of its
+    // output. Note that we pass in a custom `maxBuffer` parameter because we're
+    // generating a ton of output that needs to be buffered.
+    let args = js_sys::Array::new();
+    args.push(&js_shim.display().to_string().into());
+    args.push(&"--enable-simd".into());
+    let opts = js_sys::Object::new();
+    js_sys::Reflect::set(&opts, &"maxBuffer".into(), &(200 * 1024 * 1024).into())
+        .unwrap();
+    let output = exec_file_sync("wasm2wat", &args, &opts).to_string();
+
+    let mut ret: HashSet<Function> = HashSet::new();
+    let mut lines = output.lines().map(|s| s.trim());
+    while let Some(line) = lines.next() {
+        // If this isn't a function, we don't care about it.
+        if !line.starts_with("(func ") {
+            continue;
+        }
+
+        let mut function = Function {
+            name: String::new(),
+            instrs: Vec::new(),
+        };
+
+        // Empty functions will end in `))` so there's nothing to do, otherwise
+        // we'll have a bunch of following lines which are instructions.
+        //
+        // Lines that have an imbalanced `)` mark the end of a function.
+        if !line.ends_with("))") {
+            while let Some(line) = lines.next() {
+                function.instrs.push(line.to_string());
+                if !line.starts_with("(") && line.ends_with(")") {
+                    break;
+                }
+            }
+        }
+        // The second element here split on whitespace should be the name of
+        // the function, skipping the type/params/results
+        function.name = line.split_whitespace().nth(1).unwrap().to_string();
+        if function.name.starts_with("$") {
+            function.name = function.name[1..].to_string()
+        }
+
+        if !function.name.contains("stdarch_test_shim") {
+            continue;
+        }
+
+        assert!(ret.insert(function));
+    }
+    return ret;
+}
diff --git a/library/stdarch/crates/stdarch-verify/.gitattributes b/library/stdarch/crates/stdarch-verify/.gitattributes
new file mode 100644
index 00000000000..621fdea6f7d
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/.gitattributes
@@ -0,0 +1 @@
+*.xml binary
diff --git a/library/stdarch/crates/stdarch-verify/Cargo.toml b/library/stdarch/crates/stdarch-verify/Cargo.toml
new file mode 100644
index 00000000000..1414b110070
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "stdarch-verify"
+version = "0.1.0"
+authors = ["Alex Crichton <alex@alexcrichton.com>"]
+edition = "2018"
+
+[dependencies]
+proc-macro2 = "0.4"
+quote = "0.6"
+syn = { version = "0.15", features = ["full"] }
+
+[lib]
+proc-macro = true
+test = false
+
+[dev-dependencies]
+serde = { version = "1.0", features = ['derive'] }
+serde-xml-rs = "0.3"
+html5ever = "0.23.0"
diff --git a/library/stdarch/crates/stdarch-verify/arm-intrinsics.html b/library/stdarch/crates/stdarch-verify/arm-intrinsics.html
new file mode 100644
index 00000000000..ac246c6bae2
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/arm-intrinsics.html
@@ -0,0 +1,93399 @@
+
+
+<!DOCTYPE html>
+<html class="page no-js" lang="en">
+<head>
+    <script>
+        if ((window.location.href.indexOf('<') || window.location.href.indexOf('>')) > -1) {
+            window.location.href = window.location.href.replace(/<.*>/g, '');            
+        }
+    </script>
+
+    <title>Technologies | NEON Intrinsics Reference – Arm Developer</title>
+    
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta http-equiv="X-UA-Compatible" content="IE=Edge">
+<meta content="en" http-equiv="content-language">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<meta name="author" content="Arm Ltd.">
+<meta name="description" content="All the NEON intrinsics reference in an interactive page.">
+<meta name="keywords" content="">
+<meta content="Copyright &#169; 1995-2018 Arm Limited (or its affiliates). All rights reserved." name="copyright">
+<meta name="apple-mobile-web-app-capable" content="yes">
+<meta name="msapplication-config" content="https://developer.arm.com:443/shared/common/img/favicon/browserconfig.xml">
+<meta name="msapplication-TileColor" content="#2b5797">
+<meta name="msapplication-TileImage" content="https://developer.arm.com:443/shared/common/img/favicon/mstile-144x144.png">
+<meta name="theme-color" content="#ffffff">
+<meta name="server" content="ARMGPCD2" />
+
+<meta property="og:title" content="Technologies | NEON Intrinsics Reference – Arm Developer">
+<meta property="og:description" content="All the NEON intrinsics reference in an interactive page.">
+<meta property="og:image" content="https://developer.arm.com:443">
+<meta property="og:site_name" content="ARM Developer">
+<meta property="og:url" content="https://developer.arm.com/technologies/neon/intrinsics">
+<meta property="og:type" content="website">
+<meta property="og:locale" content="en">
+
+<meta property="article:author" content="Arm Ltd.">
+<meta property="article:publisher" content="Arm Ltd.">
+
+<meta name="twitter:card" content="summary">
+<meta name="twitter:site" content="ARM Developer">
+<meta name="twitter:title" content="Technologies | NEON Intrinsics Reference – Arm Developer">
+<meta name="twitter:description" content="All the NEON intrinsics reference in an interactive page.">
+<meta name="twitter:image" content="https://developer.arm.com:443">
+<meta name="twitter:url" content="https://developer.arm.com/technologies/neon/intrinsics">
+
+<meta itemprop="name" content="Technologies | NEON Intrinsics Reference – Arm Developer">
+<meta itemprop="description" content="All the NEON intrinsics reference in an interactive page.">
+<meta itemprop="image" content="https://developer.arm.com:443">
+
+
+    
+
+    <link rel="stylesheet" type="text/css" href="/shared/developer.arm.com/css/app.css?v=D41D8CD98F00B204E9800998ECF8427E" />
+
+    
+
+<link rel="apple-touch-icon" sizes="57x57" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="60x60" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="72x72" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="76x76" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="114x114" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="120x120" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="144x144" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="152x152" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="180x180" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/favicon-32x32.png?v=2.29.0.0" sizes="32x32" />
+<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/favicon-48x48.png?v=2.29.0.0" sizes="48x48" />
+<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/android-chrome-192x192.png?v=2.29.0.0" sizes="192x192" />
+<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/android-chrome-256x256.png?v=2.29.0.0" sizes="256x256" />
+<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/favicon-16x16.png?v=2.29.0.0" sizes="16x16" />
+<link rel="shortcut icon" type="image/ico" href="https://developer.arm.com/shared/common/img/favicon/favicon.ico?v=2.29.0.0" />
+<link rel="manifest" href="https://developer.arm.com/shared/common/img/favicon/manifest.json?v=2.29.0.0" />
+
+    <link rel="search" type="application/opensearchdescription+xml" title="ARM Developer" href="/opensearch.xml"/>
+
+    
+    
+
+
+<!-- Google Tag Manager -->
+<script>
+(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
+new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
+j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
+'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
+    })(window, document, 'script', 'dataLayer', 'GTM-K25LQR');
+</script>
+<!-- End Google Tag Manager -->
+
+
+</head>
+<body id="">
+    
+    
+
+<noscript>
+    <iframe src="https://www.googletagmanager.com/ns.html?id=GTM-K25LQR" height="0" width="0" style="display:none;visibility:hidden"></iframe>
+</noscript>
+
+
+    
+    <div class="c-feedback-message-container u-no-print"><style>
+/* Docs top margin fix */
+#content.arm-docs .section-wrapper h1 {
+    padding-top: 0 !important;
+}
+#content.arm-docs .section-wrapper {
+    margin-top: 2em;
+}
+</style>
+<style>
+    .top-bar-section .has-dropdown ul.dropdown a.active {
+      box-shadow: none;
+    }
+</style>
+<div id="modal-welcome" class="reveal-modal" data-reveal>
+  <p class="lead">ARM’s developer website includes documentation, tutorials, support resources and more.</p>
+  <p>Over the next few months we will be adding more developer resources and documentation for all the products and technologies that ARM provides.</p>
+  <a class="close-reveal-modal button" style="position: relative; float: right; color: white; font-size: 1em;">Close</a>
+</div>
+
+<!-- Hot fix for accordion icons -->
+<style>
+.accordion .accordion-navigation > a .accordion-icon:before {
+    content: "\f196" !important;
+}
+.accordion .accordion-navigation.active > a .accordion-icon:before {
+    content: "\f147" !important;
+}
+.accordion .accordion-navigation > a .accordion-icon {
+    font-size: 1em !important;
+    vertical-align: 0 !important;
+}
+</style></div>
+
+    
+    
+
+    
+    
+
+<div class="c-skip-navigation u-no-print">
+    <a href="#content" aria-label="Clik here to skip to Main Content">Skip to Main Content</a>
+    <a href="#footer" aria-label="Clik here to skip to Footer Navigation">Skip to Footer Navigation</a>
+</div>
+
+<div class="c-unsupported-browser-message u-no-print text-center old-ie-version">
+    <p>
+        <strong>
+            Sorry, your browser is not supported. <a href="https://whatbrowser.org/" target="_blank"> We recommend upgrading your browser</a>.
+            We have done our best to make all the documentation and resources available on old versions of Internet Explorer, but vector image support and the layout may not be optimal. Technical documentation is available as a PDF Download.
+        </strong>
+    </p>
+</div>
+
+<noscript>
+    <div class="js-disabled-message u-no-print text-center">
+        <p>
+            <strong>JavaScript seems to be disabled in your browser.</strong><br />
+            You must have JavaScript enabled in your browser to utilize the functionality of this website.
+        </p>
+    </div>
+</noscript>
+
+
+<div class="c-notifications-wrapper">
+
+    
+    <style>
+        .c-notifications-wrapper {
+            background-color: #e5eceb;
+            position: relative;
+            z-index: 99;
+        }
+    </style>
+
+    
+    
+
+
+    
+    <script>
+        (function() {
+            function setHeight() {
+                var $notification = document.querySelector('.c-notification');
+                if (!$notification) return;
+                var computedStyles = getComputedStyle($notification);
+                var height = computedStyles.getPropertyValue('height');
+                var $parent = $notification.parentElement;
+                if ($parent) $parent.style.height = height;
+            }
+            function getCookie(cname) {
+                var name = cname + '=';
+                var decodedCookie = decodeURIComponent(document.cookie);
+                var ca = decodedCookie.split(';');
+                for(var i = 0; i <ca.length; i++) {
+                    var c = ca[i];
+                    while (c.charAt(0) == ' ') {
+                        c = c.substring(1);
+                    }
+                    if (c.indexOf(name) == 0) {
+                        return c.substring(name.length, c.length);
+                    }
+                }
+                return '';
+            }                   
+            var $closeNotification = document.querySelector('.c-notification__close');
+            var $notificationWrapper = document.querySelector('.c-notifications-wrapper');
+            if (!$notificationWrapper) return;
+            if (!$closeNotification) return;
+            $closeNotification.onclick = function() {    
+                $notificationWrapper.style.display = 'none';
+                var expiryDate = new Date();
+                expiryDate.setTime(expiryDate.getTime() + (3650 * 24 * 60 * 60 * 1000));
+                document.cookie ='acceptSesameCookie=true; expires=' + expiryDate + '; domain=.arm.com; path=/';
+            };
+            var acceptCookie = getCookie('acceptSesameCookie');
+            if (acceptCookie) {
+                $notificationWrapper.style.display = 'none';
+            }
+            window.addEventListener('resize', setHeight);
+            setHeight(); 
+        })();
+    </script>
+</div>
+
+
+<header class="c-header u-no-print" role="banner">
+    
+    <div class="arm-global-menu-wrapper">
+        <div class="arm-global-menu">
+
+            
+            <nav class="top-bar js-mobile-navigation" data-topbar="" role="navigation">
+
+                
+                <div class="global-menu">
+ 
+                    
+                    <ul class="title-area">
+
+                        
+                        <li class="navigation-dropdown">
+                            <span class="navigation-dropdown-label">
+                                <a href="/">
+                                    <span>
+                                        <img src="/shared/developer.arm.com/img/arm-developer.svg" alt="ARM Developer" />
+                                    </span>
+                                    <i class="fa fa-caret-down"></i>
+                                </a>
+                            </span>
+                            <ul class="navigation-dropdown-list">
+    <li class="navigation-dropdown-list-item">
+        <a href="/" title="Home"><span><i class="fa fa-home"></i> Home</span></a>
+    </li>
+    <li class="navigation-dropdown-list-divider"><span></span></li>    
+        <li class="navigation-dropdown-list-item">
+            <a href="https://developer.arm.com/embedded" title="Embedded Software Development"><span>Embedded Software Development</span></a>
+        </li>
+        <li class="navigation-dropdown-list-item">
+            <a href="https://developer.arm.com/open-source" title="Linux &amp; Open Source"><span>Linux &amp; Open Source</span></a>
+        </li>
+        <li class="navigation-dropdown-list-item">
+            <a href="https://www.arm.com/resources/education" title="Education"><span>Education</span></a>
+        </li>
+        <li class="navigation-dropdown-list-item">
+            <a href="https://www.arm.com/resources/research" title="Research"><span>Research</span></a>
+        </li>
+        <li class="navigation-dropdown-list-item">
+            <a href="https://developer.arm.com/graphics" title="Graphics &amp; Multimedia Development"><span>Graphics &amp; Multimedia Development</span></a>
+        </li>
+        <li class="navigation-dropdown-list-item">
+            <a href="https://developer.arm.com/soc" title="SoC Design"><span>SoC Design</span></a>
+        </li>
+        <li class="navigation-dropdown-list-item">
+            <a href="https://developer.arm.com/hpc" title="High Performance Computing"><span>High Performance Computing</span></a>
+        </li>
+</ul>
+
+                        </li>
+
+                        
+                        <li class="menu">
+    <ul>
+            <li><a class="underline" href="https://developer.arm.com/products" title="Products">Products</a></li>
+            <li><a class="underline" href="https://developer.arm.com/solutions" title="Solutions">Solutions</a></li>
+            <li><a class="underline" href="https://developer.arm.com/technologies" title="Technologies">Technologies</a></li>
+            <li><a class="underline" href="https://developer.arm.com/support" title="Support">Support</a></li>
+    </ul>
+</li>
+
+
+                        
+                        <li class="search js-search">
+                                <div id="global-search-box">
+                                    <!-- When customizing this component, ensure to use "Coveo.$" instead of the regular jQuery "$" to
+     avoid any conflicts with Sitecore's Page Editor/Experience Editor.  -->
+
+<div>
+    <link rel="stylesheet" href="/Coveo/css/CoveoFullSearchNewDesign.css" />
+    <link rel="stylesheet" href="/Coveo/css/CoveoComponent.css" />
+    <link href="/shared/developer.arm.com/css/search.css" rel="stylesheet" />
+
+    <script type="text/javascript" src="/Coveo/js/CoveoJsSearch.WithDependencies.min.js"></script>
+    <script type="text/javascript" src="/Coveo/js/CoveoForSitecorePolyfills.js"></script>
+    <script type="text/javascript" src="/Coveo/js/CoveoForSitecore.js"></script>
+
+</div>
+<script src="/Coveo/js/CoveoForSitecoreOmniboxResultListFixCursor.js"></script>
+
+
+
+
+    <div>
+        <script type="text/javascript" src="/Coveo/js/cultures/en.js"></script>
+        <script type="text/javascript">
+            Coveo.$(function () {
+              
+                var searchbox = Coveo.$('#globalsearchbox');
+                if (typeof (CoveoForSitecore) !== 'undefined') {
+                    CoveoForSitecore.componentsOptions = {"analyticsCustomMetadata" : {"sitename" : "website" , "siteName" : "website" , "pageFullPath" : "/sitecore/content/developer/technologies/neon/intrinsics"},"analyticsEndpointUri" : "/coveo/rest/v6/analytics" , "boostExpressions" : "" , "clientLanguageFieldName" : "@z95xlanguage" , "clientLanguageName" : "en" , "defaultSortType" : "" , "defaultSortField" : "" , "defaultSortCriteriaNoSpace" : "Relevancy" , "defaultSortCriteriaLowercase" : "relevancy" , "enableClientSideLogging" : false,"externalCollections" : [],"externalSources" : [],"filterResultsOnCurrentCulture" : true,"filterExpression" : "NOT @templateid==(\"adb6ca4f-03ef-4f47-b9ac-9ce2ba53ff97\",\"fe5dd826-48c6-436d-b87a-7c4210c7413b\") AND @haslayout == 1 AND @z95xpath == \"110d559fdea542ea9c1c8a5df7e70ef9\"" , "id" : "coveoa8e6c6d9" , "indexSourceName" : "Coveo_web_index - DEVELOPER" , "isEditingInPageEditor" : false,"isPreviewingInPageEditor" : false,"isPreviewingInPageEditorWithSimulatedDevice" : false,"latestVersionFieldName" : "@z95xlatestversion" , "pageFullPath" : "/sitecore/content/developer/technologies/neon/intrinsics" , "pageName" : "intrinsics" , "restEndpointUri" : "/coveo/rest" , "searchboxPlaceholderText" : "" , "sendToSitecoreAnalytics" : false,"sitecoreItemId" : "eae5ffe5-224d-49c2-b491-93cad803b595" , "sitecoreItemUri" : "sitecore://web/{EAE5FFE5-224D-49C2-B491-93CAD803B595}?lang=en\u0026ver=2" , "siteName" : "website" , "searchRedirectionItemName" : "search" , "searchRedirectionUrl" : "/search" , "keepOmniboxSuggestionsProvidersDefaultOrdering" : false};
+                    searchbox.coveoForSitecore('initSearchbox',
+                        CoveoForSitecore.componentsOptions);
+                } else {
+                    Coveo.SearchEndpoint.endpoints["default"] = new Coveo.SearchEndpoint({"restUri" : "/coveo/rest" , "queryStringArguments" : {"sitecoreItemUri" : "sitecore://web/{EAE5FFE5-224D-49C2-B491-93CAD803B595}?lang=en\u0026ver=2" , "siteName" : "website"}});
+                    searchbox.coveo('initSearchbox',
+                        '/search');
+                }
+
+                 Coveo.$('#globalsearchbox').on("afterInitialization", function() {
+                var queryBox = Coveo.$(this).find("input.CoveoQueryBox");
+                if (!queryBox) {
+                    return;
+                }
+                queryBox.attr("placeholder", '');
+                queryBox.attr("aria-label", 'Search');
+            });
+                
+            });
+        </script>
+
+        <div id="globalsearchbox"
+                          >
+            <div class="CoveoAnalytics"
+                 data-anonymous="True"
+                 data-endpoint="/coveo/rest/coveoanalytics"
+                 data-search-hub="search"
+                 data-send-to-cloud="True">
+            </div>
+            <div class="CoveoSearchbox"
+                 data-auto-focus="True"
+                 data-enable-lowercase-operators="False"
+                 data-enable-partial-match="False"
+                 data-partial-match-keywords="5"
+                 data-partial-match-threshold="50%"
+                 data-enable-question-marks="False"
+                 data-enable-wildcards="False"
+                 data-enable-omnibox="true"
+                 data-omnibox-timeout="500"
+                 data-enable-field-addon="False"
+                 data-enable-simple-field-addon="False"
+                 data-enable-top-query-addon="False"
+                 data-enable-reveal-query-suggest-addon="False"
+                 data-enable-query-extension-addon="False"
+                 ></div>
+
+
+            
+
+
+
+
+    <script type="text/javascript">
+        
+       
+    </script>
+    <span class="CoveoForSitecoreOmniboxResultList"
+          data-header-title='Suggested Results'
+          data-query-expression=''
+          data-number-of-results='10'
+          data-result-template-id='globalsearchresults'>
+    </span>
+    <script id="globalsearchresults" class="result-template" type="text/x-underscore-template">
+        <div>
+            <a href="{{=clickUri}}" class="coveo-title">{{=title?highlight(title, titleHighlights):''}}</a>
+        </div>
+    </script>
+
+
+        </div>
+    </div>
+
+                                </div>
+                        </li>
+                
+                        
+                        
+
+                        
+                        <li class="menu-icon c-mobile-toggle c-mobile-search-toggle js-mobile-toggle" data-toggle="search">
+                            <a href="#" tabindex="0" title="Search" aria-label="Search" aria-haspopup="true">
+                                <i class="fa fa-search"></i><span class="sr-only">Search</span>
+                            </a>
+                        </li>
+
+                        
+                        
+
+                        
+                        <li class="menu-icon c-mobile-toggle c-mobile-navigation-toggle js-mobile-toggle" data-toggle="navigation">
+                            <a href="#" tabindex="0" title="Mobile Navigation" aria-label="Mobile Navigation" aria-haspopup="true">
+                                <i class="fa fa-bars"></i><span class="sr-only">Mobile Navigation</span>
+                            </a>
+                        </li>
+                
+                        
+                        <li class="developer-user-menu">
+                            
+
+<div id="c-65110123-4209-4daa-a4a3-335c8f0caa10" class="c-user-menu" role="navigation" aria-label="User menu">
+    <ul class="c-user-menu__items c-navigation__items c-user-menu__root" role="menubar">
+    <li class="c-user-menu__item" aria-haspopup="true" role="menuitem">
+        <a class="c-user-menu__toggle" title="User Menu" tabindex="0" aria-expanded="false">
+            <i class="fa fa-user" aria-hidden="true"></i>
+        </a>
+        <ul class="c-user-menu__items c-navigation__items c-user-menu__section is-aligned-right" aria-hidden="true" role="menu" tabindex="-1">
+                    <li class="c-user-menu__item" aria-haspopup="false" role="menuitem">
+                        <a href="/login?returnUrl=/technologies/neon/intrinsics" title="Login" tabindex="0" aria-expanded="false">Login</a>
+                    </li>
+                    <li class="c-user-menu__item" aria-haspopup="false" role="menuitem">
+                        <a href="/register?returnUrl=/technologies/neon/intrinsics" title="Register" tabindex="0" aria-expanded="false">Register</a>
+                    </li>
+        </ul>
+    </li>
+</ul>
+
+</div>
+
+<link rel="stylesheet" href="https://developer.arm.com/shared/arm-account/css/modules/user-menu.css?v=2.29.0.0" />
+
+                        </li>
+                    </ul>
+                    
+
+                </div>
+                
+
+                
+                <section class="arm-mobile-navigation top-bar-section hide-for-large-up">
+                    <ul class="right">
+    <li class="has-dropdown">
+        <a href="#">Main Menu</a>
+        <ul class="dropdown">
+                <li><a href="/products">Products</a></li>
+                <li><a href="/solutions">Solutions</a></li>
+                <li><a href="/technologies">Technologies</a></li>
+                <li><a href="/support">Support</a></li>
+        </ul>
+    </li>
+    <ul class="left"><li><a class="" href="/technologies">Overview</a></li><li><a class="" href="/technologies/big-little">big.LITTLE</a></li><li><a class="active" href="/technologies/neon">NEON</a></li><li><a class="" href="/embedded/cmsis">CMSIS</a></li><li class="has-dropdown"><a class="" href="/technologies/dsp">DSP</a><ul class="dropdown"><li><a class="" href="/technologies/dsp">DSP Overview</a></li><li><a class="" href="/technologies/dsp/arm-dsp-ecosystem-partners">Arm DSP ecosystem partners</a></li><li><a class="" href="/technologies/dsp/dsp-for-cortex-r">DSP for Cortex-R</a></li><li><a class="" href="/technologies/dsp/dsp-for-cortex-m">DSP for Cortex-M</a></li><li><a class="" href="/technologies/neon">NEON for Cortex-A and Cortex-R52</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm">Machine Learning on Arm</a><ul class="dropdown"><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm/developer-material">Developer material</a><ul class="dropdown"><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides">How-to guides</a><ul class="dropdown"><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-onnx">Configuring the Arm NN SDK build environment for ONNX</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-tensorflow">Configuring the Arm NN SDK build environment for TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-tensorflow-lite">Configuring the Arm NN SDK build environment for TensorFlow Lite</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-caffe-model-on-openmv-using-cmsis-nn">Deploying a Caffe Model on OpenMV using CMSIS-NN</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-cloud-based-ml-for-speech-transcription">Deploying cloud-based ML for speech transcription</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-caffe">Configuring the Arm NN SDK build environment for Caffe</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-tensorflow-mnist-model-on-arm-nn">Deploying a TensorFlow MNIST model on Arm NN</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-caffe-mnist-model-using-the-arm-nn-sdk">Deploying a Caffe MNIST model using the Arm NN SDK</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/profiling-alexnet-on-raspberry-pi-and-hikey-960-with-the-compute-library">Profiling AlexNet on Raspberry Pi and HiKey 960 with the Compute Library</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/optimizing-neural-networks-for-mobile-and-embedded-devices-with-tensorflow">Optimizing neural networks for mobile and embedded devices with TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/running-alexnet-on-raspberry-pi-with-compute-library">Running AlexNet on Raspberry Pi with Compute Library</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/improving-your-machine-learning-workflow-using-the-arm-nn-sdk">Improving your machine learning workflow using the Arm NN SDK</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/quantizing-neural-networks-to-8-bit-using-tensorflow">Quantizing neural networks to 8-bit using TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/cross-compiling-arm-nn-for-the-raspberry-pi-and-tensorflow">Cross-compiling Arm NN for the Raspberry Pi and TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/teach-your-raspberry-pi-yeah-world">Teach your Raspberry Pi - Yeah world</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/teach-your-raspberry-pi-multi-gesture">Teach your Raspberry Pi - Multi-gesture</a></li></ul></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/webinars">Webinars</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/software-for-machine-learning-on-arm">Software for Machine Learning on Arm</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/research-papers">Research papers</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/white-papers">White papers</a></li></ul></li><li><a class="" href="/technologies/machine-learning-on-arm/ecosystem-partners">Ecosystem partners</a></li><li><a class="" href="https://community.arm.com/p/ml-blog">Blog</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/security-on-arm">Security on Arm</a><ul class="dropdown"><li><a class="" href="/technologies/security-on-arm/arm-technologies">Arm technologies</a></li><li><a class="" href="/technologies/security-on-arm/how-do-i-implement">How do I implement</a></li><li><a class="" href="/technologies/security-on-arm/types-of-attack-and-counter-measures">Types of attack and counter-measures</a></li><li class="has-dropdown"><a class="" href="/technologies/security-on-arm/arm-security-developer-community">Arm Security Developer Community</a><ul class="dropdown"><li><a class="" href="/technologies/security-on-arm/arm-security-developer-community/arm-security-partners">Arm Security Partners</a></li></ul></li></ul></li><li><a class="" href="/technologies/uefi-drivers">UEFI Drivers</a></li><li><a class="" href="/technologies/dynamiq">DynamIQ</a></li><li class="has-dropdown"><a class="" href="/technologies/graphics-technologies">Graphics Technologies</a><ul class="dropdown"><li><a class="" href="/technologies/graphics-technologies/adaptive-scalable-texture-compression">Adaptive Scalable Texture Compression</a></li><li><a class="" href="/technologies/graphics-technologies/arm-frame-buffer-compression">Arm Frame Buffer Compression</a></li><li><a class="" href="/technologies/graphics-technologies/transaction-elimination">Transaction Elimination</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/trustzone">TrustZone</a><ul class="dropdown"><li><a class="" href="/technologies/trustzone/webinar-how-to-implement-a-secure-iot-system-on-armv8-m">Webinar - How to implement a secure IoT system on Armv8-M</a></li></ul></li><li><a class="" href="/technologies/compute-library">Compute Library</a></li><li><a class="" href="/technologies/floating-point">Floating Point</a></li></ul>
+</ul>
+
+                </section>
+                
+
+            </nav>
+            
+
+            <script>
+                (function() {
+                    var $globalMenu = document.querySelector('.arm-global-menu');
+                    if (!$globalMenu) return;
+                    var computedStyles = getComputedStyle($globalMenu);
+                    var height = computedStyles.getPropertyValue('height');
+                    var $parent = $globalMenu.parentElement;
+                    if ($parent) $parent.style.height = height;
+                })();
+            </script>
+
+        </div>
+    </div>
+
+    
+<div class="main-header">
+    <div class="row">
+        <div class="large-12 columns">
+            <ul class="breadcrumbs">
+                <li>
+                    <a href="/" title="Home">Home</a>
+                </li>
+                <li>
+                    <a href="/technologies" title="Technologies">Technologies</a>
+                </li>
+                <li>
+                    <a href="/technologies/neon" title="NEON">NEON</a>
+                </li>
+                <li class="current">NEON Intrinsics Reference</li>
+</ul>
+
+
+<h1>NEON Intrinsics Reference</h1>        </div>
+    </div>
+</div>
+
+
+<div class="c-contextual-navigation-wrapper show-for-large-up">
+    <style>
+        .c-contextual-navigation-wrapper,
+        .c-contextual-navigation {
+            min-height: 55px;
+            position: relative;
+        }
+        .c-contextual-navigation {
+            width: 100%;
+        }
+        .c-contextual-navigation.is-stuck {
+            position: fixed;
+            z-index: 999;
+        }
+    </style>
+    <div id="middle" class="c-contextual-navigation full-width-nav">
+        <div class="contain-to-grid">
+            <nav class="top-bar mid-navigation" data-topbar="" role="navigation">
+                <ul class="title-area">
+                    <li class="name"></li>
+                    <li class="toggle-topbar menu-icon"><a href="#"><span></span></a></li>
+                </ul>
+                <section class="top-bar-section mid-nav">
+<ul class="left"><li><a class="" href="/technologies">Overview</a></li><li><a class="" href="/technologies/big-little">big.LITTLE</a></li><li><a class="active" href="/technologies/neon">NEON</a></li><li><a class="" href="/embedded/cmsis">CMSIS</a></li><li class="has-dropdown"><a class="" href="/technologies/dsp">DSP</a><ul class="dropdown"><li><a class="" href="/technologies/dsp">DSP Overview</a></li><li><a class="" href="/technologies/dsp/arm-dsp-ecosystem-partners">Arm DSP ecosystem partners</a></li><li><a class="" href="/technologies/dsp/dsp-for-cortex-r">DSP for Cortex-R</a></li><li><a class="" href="/technologies/dsp/dsp-for-cortex-m">DSP for Cortex-M</a></li><li><a class="" href="/technologies/neon">NEON for Cortex-A and Cortex-R52</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm">Machine Learning on Arm</a><ul class="dropdown"><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm/developer-material">Developer material</a><ul class="dropdown"><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides">How-to guides</a><ul class="dropdown"><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-onnx">Configuring the Arm NN SDK build environment for ONNX</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-tensorflow">Configuring the Arm NN SDK build environment for TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-tensorflow-lite">Configuring the Arm NN SDK build environment for TensorFlow Lite</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-caffe-model-on-openmv-using-cmsis-nn">Deploying a Caffe Model on OpenMV using CMSIS-NN</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-cloud-based-ml-for-speech-transcription">Deploying cloud-based ML for speech transcription</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-caffe">Configuring the Arm NN SDK build environment for Caffe</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-tensorflow-mnist-model-on-arm-nn">Deploying a TensorFlow MNIST model on Arm NN</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-caffe-mnist-model-using-the-arm-nn-sdk">Deploying a Caffe MNIST model using the Arm NN SDK</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/profiling-alexnet-on-raspberry-pi-and-hikey-960-with-the-compute-library">Profiling AlexNet on Raspberry Pi and HiKey 960 with the Compute Library</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/optimizing-neural-networks-for-mobile-and-embedded-devices-with-tensorflow">Optimizing neural networks for mobile and embedded devices with TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/running-alexnet-on-raspberry-pi-with-compute-library">Running AlexNet on Raspberry Pi with Compute Library</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/improving-your-machine-learning-workflow-using-the-arm-nn-sdk">Improving your machine learning workflow using the Arm NN SDK</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/quantizing-neural-networks-to-8-bit-using-tensorflow">Quantizing neural networks to 8-bit using TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/cross-compiling-arm-nn-for-the-raspberry-pi-and-tensorflow">Cross-compiling Arm NN for the Raspberry Pi and TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/teach-your-raspberry-pi-yeah-world">Teach your Raspberry Pi - Yeah world</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/teach-your-raspberry-pi-multi-gesture">Teach your Raspberry Pi - Multi-gesture</a></li></ul></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/webinars">Webinars</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/software-for-machine-learning-on-arm">Software for Machine Learning on Arm</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/research-papers">Research papers</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/white-papers">White papers</a></li></ul></li><li><a class="" href="/technologies/machine-learning-on-arm/ecosystem-partners">Ecosystem partners</a></li><li><a class="" href="https://community.arm.com/p/ml-blog">Blog</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/security-on-arm">Security on Arm</a><ul class="dropdown"><li><a class="" href="/technologies/security-on-arm/arm-technologies">Arm technologies</a></li><li><a class="" href="/technologies/security-on-arm/how-do-i-implement">How do I implement</a></li><li><a class="" href="/technologies/security-on-arm/types-of-attack-and-counter-measures">Types of attack and counter-measures</a></li><li class="has-dropdown"><a class="" href="/technologies/security-on-arm/arm-security-developer-community">Arm Security Developer Community</a><ul class="dropdown"><li><a class="" href="/technologies/security-on-arm/arm-security-developer-community/arm-security-partners">Arm Security Partners</a></li></ul></li></ul></li><li><a class="" href="/technologies/uefi-drivers">UEFI Drivers</a></li><li><a class="" href="/technologies/dynamiq">DynamIQ</a></li><li class="has-dropdown"><a class="" href="/technologies/graphics-technologies">Graphics Technologies</a><ul class="dropdown"><li><a class="" href="/technologies/graphics-technologies/adaptive-scalable-texture-compression">Adaptive Scalable Texture Compression</a></li><li><a class="" href="/technologies/graphics-technologies/arm-frame-buffer-compression">Arm Frame Buffer Compression</a></li><li><a class="" href="/technologies/graphics-technologies/transaction-elimination">Transaction Elimination</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/trustzone">TrustZone</a><ul class="dropdown"><li><a class="" href="/technologies/trustzone/webinar-how-to-implement-a-secure-iot-system-on-armv8-m">Webinar - How to implement a secure IoT system on Armv8-M</a></li></ul></li><li><a class="" href="/technologies/compute-library">Compute Library</a></li><li><a class="" href="/technologies/floating-point">Floating Point</a></li></ul>                </section>
+            </nav>
+        </div>
+    </div>
+    <script>
+        (function() {
+            var $contextualNavigation;
+            var $contextualNavigationWrapper;
+
+            function setHeight() {
+                var computedStyles = getComputedStyle($contextualNavigation);
+                var height = computedStyles.getPropertyValue('height');
+                $contextualNavigationWrapper.style.height = height;
+            };
+
+            function setPosition(evt) {
+                var $globalNavigationWrapper = document.querySelector('.arm-global-menu-wrapper');
+                var $notificationsWrapper = document.querySelector('.c-notifications-wrapper');
+
+                var pageOffset = window.pageYOffset;
+
+                var globalNavigationHeight = (!!$globalNavigationWrapper) ? $globalNavigationWrapper.clientHeight : 0;
+                var notificationsHeight = (!!$notificationsWrapper) ? $notificationsWrapper.clientHeight : 0;
+
+                var sum = globalNavigationHeight + notificationsHeight;
+                if (pageOffset >= sum) {
+                    $contextualNavigation.classList.add('is-stuck');
+                    $contextualNavigation.style.top = sum + 'px';
+                } else {
+                    $contextualNavigation.classList.remove('is-stuck');
+                    $contextualNavigation.style.top = 0;
+                }
+            };
+
+            function repaint(evt) {
+                $contextualNavigation = document.querySelector('.c-contextual-navigation');
+                $contextualNavigationWrapper = $contextualNavigation.parentElement;
+                setHeight();
+                setPosition();
+            };
+
+            window.addEventListener('scroll', repaint);
+            window.addEventListener('resize', repaint);
+
+            repaint();
+        })();
+    </script>
+</div>
+</header>
+
+
+<main class="c-component c-content" id="content" role="main">
+    <!-- START ProductItemContent -->
+<div>
+    
+
+
+<div id="c-d6ca4787-3a02-4c15-91dd-387f84915495" class="o-widget c-generic-content small-text-center large-text-left"  data-widget="generic-content-variation-1">
+    <article class="row">
+        <section class="columns">
+            <h2 class="c-panel__subtitle">NEON Intrinsics</h2>
+            <p>Click on the intrinsic name to display more information about the intrinsic. To search for an intrinsic, enter the name of the intrinsic in the search box. As you type, the matching intrinsics will be displayed.</p>
+        </section>
+    </article>
+</div>
+
+
+<div id="c-5e6d165c-46c9-4b6a-aae4-2aeb4639d070" class="c-widget c-html-snippet"  data-deprecated>
+    <div class="row">
+        <div class="columns">
+            <h2></h2>
+
+<style>
+.intrinsics-search input {
+    height: 2.4rem;
+	width: 80%;
+	display: inline;
+}
+
+.intrinsics-search button {
+    width: 20%;
+    height: 2.4rem;
+	display: inline;
+}
+
+.intrinsic-accordion label,
+.intrinsic-accordion input[type="checkbox"] + label
+{
+	font-family: Consolas, monospace;
+	padding: 0.25em 0.5em;
+	font-size: 1em;
+	position: relative;
+    display: block;
+    cursor: pointer;
+    background: #EFEFEF;
+    border: none;
+	margin: 0;
+    color: #565b5b;
+}
+
+.intrinsic-accordion label b {
+	color: #009fc1;
+}
+
+.intrinsic-accordion .right {
+	float:right;
+	font-family: sans-serif;
+	font-size: 0.8em;
+}
+
+.intrinsic-accordion .intrinsic_name {
+	color: blue;
+}
+
+.intrinsic-accordion .intrinsic a {
+	color: #009fc1;
+    text-decoration: underline;
+    text-decoration-style: solid;
+    font-weight: bold;
+}
+
+.intrinsic-accordion label:hover {
+    background: #F3F3F3;
+}
+
+.intrinsic-accordion input:checked + label,
+.intrinsic-accordion input:checked + label:hover {
+    background: #CDECC5;
+}
+.intrinsic-accordion input {
+    display: none;
+}
+.intrinsic-accordion article {
+    background: rgb(255, 255, 255);
+    /*overflow: hidden;*/
+    display: none;
+    -webkit-transition: all 0.3s ease-in-out;
+    -moz-transition: all 0.3s ease-in-out;
+    -o-transition: all 0.3s ease-in-out;
+    -ms-transition: all 0.3s ease-in-out;
+    transition: all 0.3s ease-in-out;
+}
+
+.intrinsic-accordion input:checked ~ article {
+    -webkit-transition: all 0.5s ease-in-out;
+    -moz-transition: all 0.5s ease-in-out;
+    -o-transition: all 0.5s ease-in-out;
+    -ms-transition: all 0.5s ease-in-out;
+    transition: all 0.5s ease-in-out;
+	display: block;
+}
+
+.intrinsic-accordion .intrinsic {
+    border: 1px solid #ededed;
+}
+
+.intrinsic-accordion .intrinsic article {
+    margin: 1rem;
+}
+
+</style>
+
+<script>
+
+</script>
+<script>
+$(document).ready(function(){
+  $("#js-intrinsics-query").keyup(function(){
+   var bla = $('#js-intrinsics-query').val();
+   $( ".intrinsic" ).each(function(){
+        var htxt=$(this).text();
+        if (htxt.toLowerCase().indexOf(bla.toLowerCase()) > -1) {
+            $(this).show();
+        } else {
+            $(this).hide();
+        }
+    });
+  });
+});
+
+</script>
+</head>
+<body>
+<div class="row">
+  <div class="large-6 columns spacing-2 intrinsics-search">
+	<input id="js-intrinsics-query"><button class="tiny"><i class="fa fa-search"></i></button>
+  </div>
+</div>
+
+<section class="intrinsic-accordion">
+<div class="intrinsic"><input id="vadd_s8" type="checkbox"><label for="vadd_s8"><div>int8x8_t <b><b>vadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_s8" type="checkbox"><label for="vaddq_s8"><div>int8x16_t <b><b>vaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_s16" type="checkbox"><label for="vadd_s16"><div>int16x4_t <b><b>vadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_s16" type="checkbox"><label for="vaddq_s16"><div>int16x8_t <b><b>vaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_s32" type="checkbox"><label for="vadd_s32"><div>int32x2_t <b><b>vadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_s32" type="checkbox"><label for="vaddq_s32"><div>int32x4_t <b><b>vaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_s64" type="checkbox"><label for="vadd_s64"><div>int64x1_t <b><b>vadd_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_s64" type="checkbox"><label for="vaddq_s64"><div>int64x2_t <b><b>vaddq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_u8" type="checkbox"><label for="vadd_u8"><div>uint8x8_t <b><b>vadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_u8" type="checkbox"><label for="vaddq_u8"><div>uint8x16_t <b><b>vaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_u16" type="checkbox"><label for="vadd_u16"><div>uint16x4_t <b><b>vadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_u16" type="checkbox"><label for="vaddq_u16"><div>uint16x8_t <b><b>vaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_u32" type="checkbox"><label for="vadd_u32"><div>uint32x2_t <b><b>vadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_u32" type="checkbox"><label for="vaddq_u32"><div>uint32x4_t <b><b>vaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_u64" type="checkbox"><label for="vadd_u64"><div>uint64x1_t <b><b>vadd_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_u64" type="checkbox"><label for="vaddq_u64"><div>uint64x2_t <b><b>vaddq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_f32" type="checkbox"><label for="vadd_f32"><div>float32x2_t <b><b>vadd_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&amp;FP registers, writes the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fadd-vector-floating-point-add-vector">FADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_f32" type="checkbox"><label for="vaddq_f32"><div>float32x4_t <b><b>vaddq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&amp;FP registers, writes the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fadd-vector-floating-point-add-vector">FADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_f64" type="checkbox"><label for="vadd_f64"><div>float64x1_t <b><b>vadd_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&amp;FP registers, writes the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fadd-vector-floating-point-add-vector">FADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_f64" type="checkbox"><label for="vaddq_f64"><div>float64x2_t <b><b>vaddq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&amp;FP registers, writes the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fadd-vector-floating-point-add-vector">FADD</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddd_s64" type="checkbox"><label for="vaddd_s64"><div>int64_t <b><b>vaddd_s64</b></b> (int64_t a, int64_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddd_u64" type="checkbox"><label for="vaddd_u64"><div>uint64_t <b><b>vaddd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_s8" type="checkbox"><label for="vaddl_s8"><div>int16x8_t <b><b>vaddl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_s16" type="checkbox"><label for="vaddl_s16"><div>int32x4_t <b><b>vaddl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_s32" type="checkbox"><label for="vaddl_s32"><div>int64x2_t <b><b>vaddl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_u8" type="checkbox"><label for="vaddl_u8"><div>uint16x8_t <b><b>vaddl_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_u16" type="checkbox"><label for="vaddl_u16"><div>uint32x4_t <b><b>vaddl_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_u32" type="checkbox"><label for="vaddl_u32"><div>uint64x2_t <b><b>vaddl_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_s8" type="checkbox"><label for="vaddl_high_s8"><div>int16x8_t <b><b>vaddl_high_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_s16" type="checkbox"><label for="vaddl_high_s16"><div>int32x4_t <b><b>vaddl_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_s32" type="checkbox"><label for="vaddl_high_s32"><div>int64x2_t <b><b>vaddl_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_u8" type="checkbox"><label for="vaddl_high_u8"><div>uint16x8_t <b><b>vaddl_high_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_u16" type="checkbox"><label for="vaddl_high_u16"><div>uint32x4_t <b><b>vaddl_high_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_u32" type="checkbox"><label for="vaddl_high_u32"><div>uint64x2_t <b><b>vaddl_high_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_s8" type="checkbox"><label for="vaddw_s8"><div>int16x8_t <b><b>vaddw_s8</b></b> (int16x8_t a, int8x8_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW</a> Vd.8H,Vn.8H,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_s16" type="checkbox"><label for="vaddw_s16"><div>int32x4_t <b><b>vaddw_s16</b></b> (int32x4_t a, int16x4_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW</a> Vd.4S,Vn.4S,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_s32" type="checkbox"><label for="vaddw_s32"><div>int64x2_t <b><b>vaddw_s32</b></b> (int64x2_t a, int32x2_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW</a> Vd.2D,Vn.2D,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_u8" type="checkbox"><label for="vaddw_u8"><div>uint16x8_t <b><b>vaddw_u8</b></b> (uint16x8_t a, uint8x8_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW</a> Vd.8H,Vn.8H,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_u16" type="checkbox"><label for="vaddw_u16"><div>uint32x4_t <b><b>vaddw_u16</b></b> (uint32x4_t a, uint16x4_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW</a> Vd.4S,Vn.4S,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_u32" type="checkbox"><label for="vaddw_u32"><div>uint64x2_t <b><b>vaddw_u32</b></b> (uint64x2_t a, uint32x2_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW</a> Vd.2D,Vn.2D,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_s8" type="checkbox"><label for="vaddw_high_s8"><div>int16x8_t <b><b>vaddw_high_s8</b></b> (int16x8_t a, int8x16_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW2</a> Vd.8H,Vn.8H,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_s16" type="checkbox"><label for="vaddw_high_s16"><div>int32x4_t <b><b>vaddw_high_s16</b></b> (int32x4_t a, int16x8_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW2</a> Vd.4S,Vn.4S,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_s32" type="checkbox"><label for="vaddw_high_s32"><div>int64x2_t <b><b>vaddw_high_s32</b></b> (int64x2_t a, int32x4_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW2</a> Vd.2D,Vn.2D,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_u8" type="checkbox"><label for="vaddw_high_u8"><div>uint16x8_t <b><b>vaddw_high_u8</b></b> (uint16x8_t a, uint8x16_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW2</a> Vd.8H,Vn.8H,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_u16" type="checkbox"><label for="vaddw_high_u16"><div>uint32x4_t <b><b>vaddw_high_u16</b></b> (uint32x4_t a, uint16x8_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW2</a> Vd.4S,Vn.4S,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_u32" type="checkbox"><label for="vaddw_high_u32"><div>uint64x2_t <b><b>vaddw_high_u32</b></b> (uint64x2_t a, uint32x4_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW2</a> Vd.2D,Vn.2D,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_s8" type="checkbox"><label for="vhadd_s8"><div>int8x8_t <b><b>vhadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_s8" type="checkbox"><label for="vhaddq_s8"><div>int8x16_t <b><b>vhaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_s16" type="checkbox"><label for="vhadd_s16"><div>int16x4_t <b><b>vhadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_s16" type="checkbox"><label for="vhaddq_s16"><div>int16x8_t <b><b>vhaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_s32" type="checkbox"><label for="vhadd_s32"><div>int32x2_t <b><b>vhadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_s32" type="checkbox"><label for="vhaddq_s32"><div>int32x4_t <b><b>vhaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_u8" type="checkbox"><label for="vhadd_u8"><div>uint8x8_t <b><b>vhadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_u8" type="checkbox"><label for="vhaddq_u8"><div>uint8x16_t <b><b>vhaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_u16" type="checkbox"><label for="vhadd_u16"><div>uint16x4_t <b><b>vhadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_u16" type="checkbox"><label for="vhaddq_u16"><div>uint16x8_t <b><b>vhaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_u32" type="checkbox"><label for="vhadd_u32"><div>uint32x2_t <b><b>vhadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_u32" type="checkbox"><label for="vhaddq_u32"><div>uint32x4_t <b><b>vhaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_s8" type="checkbox"><label for="vrhadd_s8"><div>int8x8_t <b><b>vrhadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_s8" type="checkbox"><label for="vrhaddq_s8"><div>int8x16_t <b><b>vrhaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_s16" type="checkbox"><label for="vrhadd_s16"><div>int16x4_t <b><b>vrhadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_s16" type="checkbox"><label for="vrhaddq_s16"><div>int16x8_t <b><b>vrhaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_s32" type="checkbox"><label for="vrhadd_s32"><div>int32x2_t <b><b>vrhadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_s32" type="checkbox"><label for="vrhaddq_s32"><div>int32x4_t <b><b>vrhaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_u8" type="checkbox"><label for="vrhadd_u8"><div>uint8x8_t <b><b>vrhadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_u8" type="checkbox"><label for="vrhaddq_u8"><div>uint8x16_t <b><b>vrhaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_u16" type="checkbox"><label for="vrhadd_u16"><div>uint16x4_t <b><b>vrhadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_u16" type="checkbox"><label for="vrhaddq_u16"><div>uint16x8_t <b><b>vrhaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_u32" type="checkbox"><label for="vrhadd_u32"><div>uint32x2_t <b><b>vrhadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_u32" type="checkbox"><label for="vrhaddq_u32"><div>uint32x4_t <b><b>vrhaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_s8" type="checkbox"><label for="vqadd_s8"><div>int8x8_t <b><b>vqadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_s8" type="checkbox"><label for="vqaddq_s8"><div>int8x16_t <b><b>vqaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_s16" type="checkbox"><label for="vqadd_s16"><div>int16x4_t <b><b>vqadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_s16" type="checkbox"><label for="vqaddq_s16"><div>int16x8_t <b><b>vqaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_s32" type="checkbox"><label for="vqadd_s32"><div>int32x2_t <b><b>vqadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_s32" type="checkbox"><label for="vqaddq_s32"><div>int32x4_t <b><b>vqaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_s64" type="checkbox"><label for="vqadd_s64"><div>int64x1_t <b><b>vqadd_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_s64" type="checkbox"><label for="vqaddq_s64"><div>int64x2_t <b><b>vqaddq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_u8" type="checkbox"><label for="vqadd_u8"><div>uint8x8_t <b><b>vqadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_u8" type="checkbox"><label for="vqaddq_u8"><div>uint8x16_t <b><b>vqaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_u16" type="checkbox"><label for="vqadd_u16"><div>uint16x4_t <b><b>vqadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_u16" type="checkbox"><label for="vqaddq_u16"><div>uint16x8_t <b><b>vqaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_u32" type="checkbox"><label for="vqadd_u32"><div>uint32x2_t <b><b>vqadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_u32" type="checkbox"><label for="vqaddq_u32"><div>uint32x4_t <b><b>vqaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_u64" type="checkbox"><label for="vqadd_u64"><div>uint64x1_t <b><b>vqadd_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_u64" type="checkbox"><label for="vqaddq_u64"><div>uint64x2_t <b><b>vqaddq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddb_s8" type="checkbox"><label for="vqaddb_s8"><div>int8_t <b><b>vqaddb_s8</b></b> (int8_t a, int8_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddh_s16" type="checkbox"><label for="vqaddh_s16"><div>int16_t <b><b>vqaddh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqadds_s32" type="checkbox"><label for="vqadds_s32"><div>int32_t <b><b>vqadds_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddd_s64" type="checkbox"><label for="vqaddd_s64"><div>int64_t <b><b>vqaddd_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddb_u8" type="checkbox"><label for="vqaddb_u8"><div>uint8_t <b><b>vqaddb_u8</b></b> (uint8_t a, uint8_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddh_u16" type="checkbox"><label for="vqaddh_u16"><div>uint16_t <b><b>vqaddh_u16</b></b> (uint16_t a, uint16_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqadds_u32" type="checkbox"><label for="vqadds_u32"><div>uint32_t <b><b>vqadds_u32</b></b> (uint32_t a, uint32_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddd_u64" type="checkbox"><label for="vqaddd_u64"><div>uint64_t <b><b>vqaddd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadd_s8" type="checkbox"><label for="vuqadd_s8"><div>int8x8_t <b><b>vuqadd_s8</b></b> (int8x8_t a, uint8x8_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddq_s8" type="checkbox"><label for="vuqaddq_s8"><div>int8x16_t <b><b>vuqaddq_s8</b></b> (int8x16_t a, uint8x16_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadd_s16" type="checkbox"><label for="vuqadd_s16"><div>int16x4_t <b><b>vuqadd_s16</b></b> (int16x4_t a, uint16x4_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddq_s16" type="checkbox"><label for="vuqaddq_s16"><div>int16x8_t <b><b>vuqaddq_s16</b></b> (int16x8_t a, uint16x8_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadd_s32" type="checkbox"><label for="vuqadd_s32"><div>int32x2_t <b><b>vuqadd_s32</b></b> (int32x2_t a, uint32x2_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddq_s32" type="checkbox"><label for="vuqaddq_s32"><div>int32x4_t <b><b>vuqaddq_s32</b></b> (int32x4_t a, uint32x4_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadd_s64" type="checkbox"><label for="vuqadd_s64"><div>int64x1_t <b><b>vuqadd_s64</b></b> (int64x1_t a, uint64x1_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddq_s64" type="checkbox"><label for="vuqaddq_s64"><div>int64x2_t <b><b>vuqaddq_s64</b></b> (int64x2_t a, uint64x2_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddb_s8" type="checkbox"><label for="vuqaddb_s8"><div>int8_t <b><b>vuqaddb_s8</b></b> (int8_t a, uint8_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Bd,Bn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bd <br />
+b &rarr; Bn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddh_s16" type="checkbox"><label for="vuqaddh_s16"><div>int16_t <b><b>vuqaddh_s16</b></b> (int16_t a, uint16_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Hd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hd <br />
+b &rarr; Hn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadds_s32" type="checkbox"><label for="vuqadds_s32"><div>int32_t <b><b>vuqadds_s32</b></b> (int32_t a, uint32_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddd_s64" type="checkbox"><label for="vuqaddd_s64"><div>int64_t <b><b>vuqaddd_s64</b></b> (int64_t a, uint64_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadd_u8" type="checkbox"><label for="vsqadd_u8"><div>uint8x8_t <b><b>vsqadd_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddq_u8" type="checkbox"><label for="vsqaddq_u8"><div>uint8x16_t <b><b>vsqaddq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadd_u16" type="checkbox"><label for="vsqadd_u16"><div>uint16x4_t <b><b>vsqadd_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddq_u16" type="checkbox"><label for="vsqaddq_u16"><div>uint16x8_t <b><b>vsqaddq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadd_u32" type="checkbox"><label for="vsqadd_u32"><div>uint32x2_t <b><b>vsqadd_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddq_u32" type="checkbox"><label for="vsqaddq_u32"><div>uint32x4_t <b><b>vsqaddq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadd_u64" type="checkbox"><label for="vsqadd_u64"><div>uint64x1_t <b><b>vsqadd_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddq_u64" type="checkbox"><label for="vsqaddq_u64"><div>uint64x2_t <b><b>vsqaddq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddb_u8" type="checkbox"><label for="vsqaddb_u8"><div>uint8_t <b><b>vsqaddb_u8</b></b> (uint8_t a, int8_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Bd,Bn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bd <br />
+b &rarr; Bn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddh_u16" type="checkbox"><label for="vsqaddh_u16"><div>uint16_t <b><b>vsqaddh_u16</b></b> (uint16_t a, int16_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Hd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hd <br />
+b &rarr; Hn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadds_u32" type="checkbox"><label for="vsqadds_u32"><div>uint32_t <b><b>vsqadds_u32</b></b> (uint32_t a, int32_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddd_u64" type="checkbox"><label for="vsqaddd_u64"><div>uint64_t <b><b>vsqaddd_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_s16" type="checkbox"><label for="vaddhn_s16"><div>int8x8_t <b><b>vaddhn_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_s32" type="checkbox"><label for="vaddhn_s32"><div>int16x4_t <b><b>vaddhn_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_s64" type="checkbox"><label for="vaddhn_s64"><div>int32x2_t <b><b>vaddhn_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_u16" type="checkbox"><label for="vaddhn_u16"><div>uint8x8_t <b><b>vaddhn_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_u32" type="checkbox"><label for="vaddhn_u32"><div>uint16x4_t <b><b>vaddhn_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_u64" type="checkbox"><label for="vaddhn_u64"><div>uint32x2_t <b><b>vaddhn_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_s16" type="checkbox"><label for="vaddhn_high_s16"><div>int8x16_t <b><b>vaddhn_high_s16</b></b> (int8x8_t r, int16x8_t a, int16x8_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_s32" type="checkbox"><label for="vaddhn_high_s32"><div>int16x8_t <b><b>vaddhn_high_s32</b></b> (int16x4_t r, int32x4_t a, int32x4_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_s64" type="checkbox"><label for="vaddhn_high_s64"><div>int32x4_t <b><b>vaddhn_high_s64</b></b> (int32x2_t r, int64x2_t a, int64x2_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_u16" type="checkbox"><label for="vaddhn_high_u16"><div>uint8x16_t <b><b>vaddhn_high_u16</b></b> (uint8x8_t r, uint16x8_t a, uint16x8_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_u32" type="checkbox"><label for="vaddhn_high_u32"><div>uint16x8_t <b><b>vaddhn_high_u32</b></b> (uint16x4_t r, uint32x4_t a, uint32x4_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_u64" type="checkbox"><label for="vaddhn_high_u64"><div>uint32x4_t <b><b>vaddhn_high_u64</b></b> (uint32x2_t r, uint64x2_t a, uint64x2_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_s16" type="checkbox"><label for="vraddhn_s16"><div>int8x8_t <b><b>vraddhn_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_s32" type="checkbox"><label for="vraddhn_s32"><div>int16x4_t <b><b>vraddhn_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_s64" type="checkbox"><label for="vraddhn_s64"><div>int32x2_t <b><b>vraddhn_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_u16" type="checkbox"><label for="vraddhn_u16"><div>uint8x8_t <b><b>vraddhn_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_u32" type="checkbox"><label for="vraddhn_u32"><div>uint16x4_t <b><b>vraddhn_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_u64" type="checkbox"><label for="vraddhn_u64"><div>uint32x2_t <b><b>vraddhn_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_s16" type="checkbox"><label for="vraddhn_high_s16"><div>int8x16_t <b><b>vraddhn_high_s16</b></b> (int8x8_t r, int16x8_t a, int16x8_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_s32" type="checkbox"><label for="vraddhn_high_s32"><div>int16x8_t <b><b>vraddhn_high_s32</b></b> (int16x4_t r, int32x4_t a, int32x4_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_s64" type="checkbox"><label for="vraddhn_high_s64"><div>int32x4_t <b><b>vraddhn_high_s64</b></b> (int32x2_t r, int64x2_t a, int64x2_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_u16" type="checkbox"><label for="vraddhn_high_u16"><div>uint8x16_t <b><b>vraddhn_high_u16</b></b> (uint8x8_t r, uint16x8_t a, uint16x8_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_u32" type="checkbox"><label for="vraddhn_high_u32"><div>uint16x8_t <b><b>vraddhn_high_u32</b></b> (uint16x4_t r, uint32x4_t a, uint32x4_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_u64" type="checkbox"><label for="vraddhn_high_u64"><div>uint32x4_t <b><b>vraddhn_high_u64</b></b> (uint32x2_t r, uint64x2_t a, uint64x2_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_s8" type="checkbox"><label for="vmul_s8"><div>int8x8_t <b><b>vmul_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_s8" type="checkbox"><label for="vmulq_s8"><div>int8x16_t <b><b>vmulq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_s16" type="checkbox"><label for="vmul_s16"><div>int16x4_t <b><b>vmul_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_s16" type="checkbox"><label for="vmulq_s16"><div>int16x8_t <b><b>vmulq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_s32" type="checkbox"><label for="vmul_s32"><div>int32x2_t <b><b>vmul_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_s32" type="checkbox"><label for="vmulq_s32"><div>int32x4_t <b><b>vmulq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_u8" type="checkbox"><label for="vmul_u8"><div>uint8x8_t <b><b>vmul_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_u8" type="checkbox"><label for="vmulq_u8"><div>uint8x16_t <b><b>vmulq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_u16" type="checkbox"><label for="vmul_u16"><div>uint16x4_t <b><b>vmul_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_u16" type="checkbox"><label for="vmulq_u16"><div>uint16x8_t <b><b>vmulq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_u32" type="checkbox"><label for="vmul_u32"><div>uint32x2_t <b><b>vmul_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_u32" type="checkbox"><label for="vmulq_u32"><div>uint32x4_t <b><b>vmulq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_f32" type="checkbox"><label for="vmul_f32"><div>float32x2_t <b><b>vmul_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_f32" type="checkbox"><label for="vmulq_f32"><div>float32x4_t <b><b>vmulq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_p8" type="checkbox"><label for="vmul_p8"><div>poly8x8_t <b><b>vmul_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Polynomial multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply. This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmul-polynomial-multiply">PMUL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_p8" type="checkbox"><label for="vmulq_p8"><div>poly8x16_t <b><b>vmulq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Polynomial multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply. This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmul-polynomial-multiply">PMUL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_f64" type="checkbox"><label for="vmul_f64"><div>float64x1_t <b><b>vmul_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_f64" type="checkbox"><label for="vmulq_f64"><div>float64x2_t <b><b>vmulq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_f32" type="checkbox"><label for="vmulx_f32"><div>float32x2_t <b><b>vmulx_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_f32" type="checkbox"><label for="vmulxq_f32"><div>float32x4_t <b><b>vmulxq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_f64" type="checkbox"><label for="vmulx_f64"><div>float64x1_t <b><b>vmulx_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_f64" type="checkbox"><label for="vmulxq_f64"><div>float64x2_t <b><b>vmulxq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxs_f32" type="checkbox"><label for="vmulxs_f32"><div>float32_t <b><b>vmulxs_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxd_f64" type="checkbox"><label for="vmulxd_f64"><div>float64_t <b><b>vmulxd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_lane_f32" type="checkbox"><label for="vmulx_lane_f32"><div>float32x2_t <b><b>vmulx_lane_f32</b></b> (float32x2_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_lane_f32" type="checkbox"><label for="vmulxq_lane_f32"><div>float32x4_t <b><b>vmulxq_lane_f32</b></b> (float32x4_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_lane_f64" type="checkbox"><label for="vmulx_lane_f64"><div>float64x1_t <b><b>vmulx_lane_f64</b></b> (float64x1_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_lane_f64" type="checkbox"><label for="vmulxq_lane_f64"><div>float64x2_t <b><b>vmulxq_lane_f64</b></b> (float64x2_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxs_lane_f32" type="checkbox"><label for="vmulxs_lane_f32"><div>float32_t <b><b>vmulxs_lane_f32</b></b> (float32_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxd_lane_f64" type="checkbox"><label for="vmulxd_lane_f64"><div>float64_t <b><b>vmulxd_lane_f64</b></b> (float64_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_laneq_f32" type="checkbox"><label for="vmulx_laneq_f32"><div>float32x2_t <b><b>vmulx_laneq_f32</b></b> (float32x2_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_laneq_f32" type="checkbox"><label for="vmulxq_laneq_f32"><div>float32x4_t <b><b>vmulxq_laneq_f32</b></b> (float32x4_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_laneq_f64" type="checkbox"><label for="vmulx_laneq_f64"><div>float64x1_t <b><b>vmulx_laneq_f64</b></b> (float64x1_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_laneq_f64" type="checkbox"><label for="vmulxq_laneq_f64"><div>float64x2_t <b><b>vmulxq_laneq_f64</b></b> (float64x2_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxs_laneq_f32" type="checkbox"><label for="vmulxs_laneq_f32"><div>float32_t <b><b>vmulxs_laneq_f32</b></b> (float32_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxd_laneq_f64" type="checkbox"><label for="vmulxd_laneq_f64"><div>float64_t <b><b>vmulxd_laneq_f64</b></b> (float64_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdiv_f32" type="checkbox"><label for="vdiv_f32"><div>float32x2_t <b><b>vdiv_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point divide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&amp;FP register, by the floating-point values in the corresponding elements in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fdiv-vector-floating-point-divide-vector">FDIV</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPDiv.3" title="function: bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPDiv</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdivq_f32" type="checkbox"><label for="vdivq_f32"><div>float32x4_t <b><b>vdivq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point divide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&amp;FP register, by the floating-point values in the corresponding elements in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fdiv-vector-floating-point-divide-vector">FDIV</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPDiv.3" title="function: bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPDiv</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdiv_f64" type="checkbox"><label for="vdiv_f64"><div>float64x1_t <b><b>vdiv_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point divide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&amp;FP register, by the floating-point values in the corresponding elements in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fdiv-vector-floating-point-divide-vector">FDIV</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPDiv.3" title="function: bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPDiv</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdivq_f64" type="checkbox"><label for="vdivq_f64"><div>float64x2_t <b><b>vdivq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point divide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&amp;FP register, by the floating-point values in the corresponding elements in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fdiv-vector-floating-point-divide-vector">FDIV</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPDiv.3" title="function: bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPDiv</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_s8" type="checkbox"><label for="vmla_s8"><div>int8x8_t <b><b>vmla_s8</b></b> (int8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_s8" type="checkbox"><label for="vmlaq_s8"><div>int8x16_t <b><b>vmlaq_s8</b></b> (int8x16_t a, int8x16_t b, int8x16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_s16" type="checkbox"><label for="vmla_s16"><div>int16x4_t <b><b>vmla_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_s16" type="checkbox"><label for="vmlaq_s16"><div>int16x8_t <b><b>vmlaq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_s32" type="checkbox"><label for="vmla_s32"><div>int32x2_t <b><b>vmla_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_s32" type="checkbox"><label for="vmlaq_s32"><div>int32x4_t <b><b>vmlaq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_u8" type="checkbox"><label for="vmla_u8"><div>uint8x8_t <b><b>vmla_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_u8" type="checkbox"><label for="vmlaq_u8"><div>uint8x16_t <b><b>vmlaq_u8</b></b> (uint8x16_t a, uint8x16_t b, uint8x16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_u16" type="checkbox"><label for="vmla_u16"><div>uint16x4_t <b><b>vmla_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_u16" type="checkbox"><label for="vmlaq_u16"><div>uint16x8_t <b><b>vmlaq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_u32" type="checkbox"><label for="vmla_u32"><div>uint32x2_t <b><b>vmla_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_u32" type="checkbox"><label for="vmlaq_u32"><div>uint32x4_t <b><b>vmlaq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_f32" type="checkbox"><label for="vmla_f32"><div>float32x2_t <b><b>vmla_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Floating-point multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_f32" type="checkbox"><label for="vmlaq_f32"><div>float32x4_t <b><b>vmlaq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Floating-point multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_f64" type="checkbox"><label for="vmla_f64"><div>float64x1_t <b><b>vmla_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Floating-point multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c[i]) for i = 0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_f64" type="checkbox"><label for="vmlaq_f64"><div>float64x2_t <b><b>vmlaq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Floating-point multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_s8" type="checkbox"><label for="vmlal_s8"><div>int16x8_t <b><b>vmlal_s8</b></b> (int16x8_t a, int8x8_t b, int8x8_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_s16" type="checkbox"><label for="vmlal_s16"><div>int32x4_t <b><b>vmlal_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_s32" type="checkbox"><label for="vmlal_s32"><div>int64x2_t <b><b>vmlal_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_u8" type="checkbox"><label for="vmlal_u8"><div>uint16x8_t <b><b>vmlal_u8</b></b> (uint16x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_u16" type="checkbox"><label for="vmlal_u16"><div>uint32x4_t <b><b>vmlal_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_u32" type="checkbox"><label for="vmlal_u32"><div>uint64x2_t <b><b>vmlal_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_s8" type="checkbox"><label for="vmlal_high_s8"><div>int16x8_t <b><b>vmlal_high_s8</b></b> (int16x8_t a, int8x16_t b, int8x16_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_s16" type="checkbox"><label for="vmlal_high_s16"><div>int32x4_t <b><b>vmlal_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_s32" type="checkbox"><label for="vmlal_high_s32"><div>int64x2_t <b><b>vmlal_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_u8" type="checkbox"><label for="vmlal_high_u8"><div>uint16x8_t <b><b>vmlal_high_u8</b></b> (uint16x8_t a, uint8x16_t b, uint8x16_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_u16" type="checkbox"><label for="vmlal_high_u16"><div>uint32x4_t <b><b>vmlal_high_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_u32" type="checkbox"><label for="vmlal_high_u32"><div>uint64x2_t <b><b>vmlal_high_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_s8" type="checkbox"><label for="vmls_s8"><div>int8x8_t <b><b>vmls_s8</b></b> (int8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_s8" type="checkbox"><label for="vmlsq_s8"><div>int8x16_t <b><b>vmlsq_s8</b></b> (int8x16_t a, int8x16_t b, int8x16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_s16" type="checkbox"><label for="vmls_s16"><div>int16x4_t <b><b>vmls_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_s16" type="checkbox"><label for="vmlsq_s16"><div>int16x8_t <b><b>vmlsq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_s32" type="checkbox"><label for="vmls_s32"><div>int32x2_t <b><b>vmls_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_s32" type="checkbox"><label for="vmlsq_s32"><div>int32x4_t <b><b>vmlsq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_u8" type="checkbox"><label for="vmls_u8"><div>uint8x8_t <b><b>vmls_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_u8" type="checkbox"><label for="vmlsq_u8"><div>uint8x16_t <b><b>vmlsq_u8</b></b> (uint8x16_t a, uint8x16_t b, uint8x16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_u16" type="checkbox"><label for="vmls_u16"><div>uint16x4_t <b><b>vmls_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_u16" type="checkbox"><label for="vmlsq_u16"><div>uint16x8_t <b><b>vmlsq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_u32" type="checkbox"><label for="vmls_u32"><div>uint32x2_t <b><b>vmls_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_u32" type="checkbox"><label for="vmlsq_u32"><div>uint32x4_t <b><b>vmlsq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_f32" type="checkbox"><label for="vmls_f32"><div>float32x2_t <b><b>vmls_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_f32" type="checkbox"><label for="vmlsq_f32"><div>float32x4_t <b><b>vmlsq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_f64" type="checkbox"><label for="vmls_f64"><div>float64x1_t <b><b>vmls_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c[i]) for i = 0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_f64" type="checkbox"><label for="vmlsq_f64"><div>float64x2_t <b><b>vmlsq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_s8" type="checkbox"><label for="vmlsl_s8"><div>int16x8_t <b><b>vmlsl_s8</b></b> (int16x8_t a, int8x8_t b, int8x8_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_s16" type="checkbox"><label for="vmlsl_s16"><div>int32x4_t <b><b>vmlsl_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_s32" type="checkbox"><label for="vmlsl_s32"><div>int64x2_t <b><b>vmlsl_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_u8" type="checkbox"><label for="vmlsl_u8"><div>uint16x8_t <b><b>vmlsl_u8</b></b> (uint16x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_u16" type="checkbox"><label for="vmlsl_u16"><div>uint32x4_t <b><b>vmlsl_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_u32" type="checkbox"><label for="vmlsl_u32"><div>uint64x2_t <b><b>vmlsl_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_s8" type="checkbox"><label for="vmlsl_high_s8"><div>int16x8_t <b><b>vmlsl_high_s8</b></b> (int16x8_t a, int8x16_t b, int8x16_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_s16" type="checkbox"><label for="vmlsl_high_s16"><div>int32x4_t <b><b>vmlsl_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_s32" type="checkbox"><label for="vmlsl_high_s32"><div>int64x2_t <b><b>vmlsl_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_u8" type="checkbox"><label for="vmlsl_high_u8"><div>uint16x8_t <b><b>vmlsl_high_u8</b></b> (uint16x8_t a, uint8x16_t b, uint8x16_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_u16" type="checkbox"><label for="vmlsl_high_u16"><div>uint32x4_t <b><b>vmlsl_high_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_u32" type="checkbox"><label for="vmlsl_high_u32"><div>uint64x2_t <b><b>vmlsl_high_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_f32" type="checkbox"><label for="vfma_f32"><div>float32x2_t <b><b>vfma_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_f32" type="checkbox"><label for="vfmaq_f32"><div>float32x4_t <b><b>vfmaq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfma_f64" type="checkbox"><label for="vfma_f64"><div>float64x1_t <b><b>vfma_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Floating-point fused multiply-add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add (scalar). This instruction multiplies the values of the first two SIMD&amp;FP source registers, adds the product to the value of the third SIMD&amp;FP source register, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmadd-floating-point-fused-multiply-add-scalar">FMADD</a> Dd,Dn,Dm,Da
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Da <br />
+b &rarr; Dn <br />
+c &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) result;
+bits(datasize) operanda = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[a];
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(operanda, operand1, operand2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_f64" type="checkbox"><label for="vfmaq_f64"><div>float64x2_t <b><b>vfmaq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+c &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_lane_f32" type="checkbox"><label for="vfma_lane_f32"><div>float32x2_t <b><b>vfma_lane_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_lane_f32" type="checkbox"><label for="vfmaq_lane_f32"><div>float32x4_t <b><b>vfmaq_lane_f32</b></b> (float32x4_t a, float32x4_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_lane_f64" type="checkbox"><label for="vfma_lane_f64"><div>float64x1_t <b><b>vfma_lane_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_lane_f64" type="checkbox"><label for="vfmaq_lane_f64"><div>float64x2_t <b><b>vfmaq_lane_f64</b></b> (float64x2_t a, float64x2_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmas_lane_f32" type="checkbox"><label for="vfmas_lane_f32"><div>float32_t <b><b>vfmas_lane_f32</b></b> (float32_t a, float32_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmad_lane_f64" type="checkbox"><label for="vfmad_lane_f64"><div>float64_t <b><b>vfmad_lane_f64</b></b> (float64_t a, float64_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_laneq_f32" type="checkbox"><label for="vfma_laneq_f32"><div>float32x2_t <b><b>vfma_laneq_f32</b></b> (float32x2_t a, float32x2_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_laneq_f32" type="checkbox"><label for="vfmaq_laneq_f32"><div>float32x4_t <b><b>vfmaq_laneq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_laneq_f64" type="checkbox"><label for="vfma_laneq_f64"><div>float64x1_t <b><b>vfma_laneq_f64</b></b> (float64x1_t a, float64x1_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_laneq_f64" type="checkbox"><label for="vfmaq_laneq_f64"><div>float64x2_t <b><b>vfmaq_laneq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmas_laneq_f32" type="checkbox"><label for="vfmas_laneq_f32"><div>float32_t <b><b>vfmas_laneq_f32</b></b> (float32_t a, float32_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmad_laneq_f64" type="checkbox"><label for="vfmad_laneq_f64"><div>float64_t <b><b>vfmad_laneq_f64</b></b> (float64_t a, float64_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_f32" type="checkbox"><label for="vfms_f32"><div>float32x2_t <b><b>vfms_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_f32" type="checkbox"><label for="vfmsq_f32"><div>float32x4_t <b><b>vfmsq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfms_f64" type="checkbox"><label for="vfms_f64"><div>float64x1_t <b><b>vfms_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Floating-point fused multiply-subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Fused Multiply-Subtract (scalar). This instruction multiplies the values of the first two SIMD&amp;FP source registers, negates the product, adds that to the value of the third SIMD&amp;FP source register, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmsub-floating-point-fused-multiply-subtract-scalar">FMSUB</a> Dd,Dn,Dm,Da
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Da <br />
+b &rarr; Dn <br />
+c &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) result;
+bits(datasize) operanda = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[a];
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(operand1);
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(operanda, operand1, operand2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_f64" type="checkbox"><label for="vfmsq_f64"><div>float64x2_t <b><b>vfmsq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+c &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_lane_f32" type="checkbox"><label for="vfms_lane_f32"><div>float32x2_t <b><b>vfms_lane_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_lane_f32" type="checkbox"><label for="vfmsq_lane_f32"><div>float32x4_t <b><b>vfmsq_lane_f32</b></b> (float32x4_t a, float32x4_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_lane_f64" type="checkbox"><label for="vfms_lane_f64"><div>float64x1_t <b><b>vfms_lane_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_lane_f64" type="checkbox"><label for="vfmsq_lane_f64"><div>float64x2_t <b><b>vfmsq_lane_f64</b></b> (float64x2_t a, float64x2_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmss_lane_f32" type="checkbox"><label for="vfmss_lane_f32"><div>float32_t <b><b>vfmss_lane_f32</b></b> (float32_t a, float32_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsd_lane_f64" type="checkbox"><label for="vfmsd_lane_f64"><div>float64_t <b><b>vfmsd_lane_f64</b></b> (float64_t a, float64_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_laneq_f32" type="checkbox"><label for="vfms_laneq_f32"><div>float32x2_t <b><b>vfms_laneq_f32</b></b> (float32x2_t a, float32x2_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_laneq_f32" type="checkbox"><label for="vfmsq_laneq_f32"><div>float32x4_t <b><b>vfmsq_laneq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_laneq_f64" type="checkbox"><label for="vfms_laneq_f64"><div>float64x1_t <b><b>vfms_laneq_f64</b></b> (float64x1_t a, float64x1_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_laneq_f64" type="checkbox"><label for="vfmsq_laneq_f64"><div>float64x2_t <b><b>vfmsq_laneq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmss_laneq_f32" type="checkbox"><label for="vfmss_laneq_f32"><div>float32_t <b><b>vfmss_laneq_f32</b></b> (float32_t a, float32_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsd_laneq_f64" type="checkbox"><label for="vfmsd_laneq_f64"><div>float64_t <b><b>vfmsd_laneq_f64</b></b> (float64_t a, float64_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_s16" type="checkbox"><label for="vqdmulh_s16"><div>int16x4_t <b><b>vqdmulh_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_s16" type="checkbox"><label for="vqdmulhq_s16"><div>int16x8_t <b><b>vqdmulhq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_s32" type="checkbox"><label for="vqdmulh_s32"><div>int32x2_t <b><b>vqdmulh_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_s32" type="checkbox"><label for="vqdmulhq_s32"><div>int32x4_t <b><b>vqdmulhq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhh_s16" type="checkbox"><label for="vqdmulhh_s16"><div>int16_t <b><b>vqdmulhh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhs_s32" type="checkbox"><label for="vqdmulhs_s32"><div>int32_t <b><b>vqdmulhs_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_s16" type="checkbox"><label for="vqrdmulh_s16"><div>int16x4_t <b><b>vqrdmulh_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_s16" type="checkbox"><label for="vqrdmulhq_s16"><div>int16x8_t <b><b>vqrdmulhq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_s32" type="checkbox"><label for="vqrdmulh_s32"><div>int32x2_t <b><b>vqrdmulh_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_s32" type="checkbox"><label for="vqrdmulhq_s32"><div>int32x4_t <b><b>vqrdmulhq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhh_s16" type="checkbox"><label for="vqrdmulhh_s16"><div>int16_t <b><b>vqrdmulhh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhs_s32" type="checkbox"><label for="vqrdmulhs_s32"><div>int32_t <b><b>vqrdmulhs_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_s16" type="checkbox"><label for="vqdmlal_s16"><div>int32x4_t <b><b>vqdmlal_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_s32" type="checkbox"><label for="vqdmlal_s32"><div>int64x2_t <b><b>vqdmlal_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlalh_s16" type="checkbox"><label for="vqdmlalh_s16"><div>int32_t <b><b>vqdmlalh_s16</b></b> (int32_t a, int16_t b, int16_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Sd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Hn <br />
+c &rarr; Hm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlals_s32" type="checkbox"><label for="vqdmlals_s32"><div>int64_t <b><b>vqdmlals_s32</b></b> (int64_t a, int32_t b, int32_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Dd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Sn <br />
+c &rarr; Sm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_s16" type="checkbox"><label for="vqdmlal_high_s16"><div>int32x4_t <b><b>vqdmlal_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_s32" type="checkbox"><label for="vqdmlal_high_s32"><div>int64x2_t <b><b>vqdmlal_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_s16" type="checkbox"><label for="vqdmlsl_s16"><div>int32x4_t <b><b>vqdmlsl_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_s32" type="checkbox"><label for="vqdmlsl_s32"><div>int64x2_t <b><b>vqdmlsl_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlslh_s16" type="checkbox"><label for="vqdmlslh_s16"><div>int32_t <b><b>vqdmlslh_s16</b></b> (int32_t a, int16_t b, int16_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Sd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Hn <br />
+c &rarr; Hm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsls_s32" type="checkbox"><label for="vqdmlsls_s32"><div>int64_t <b><b>vqdmlsls_s32</b></b> (int64_t a, int32_t b, int32_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Dd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Sn <br />
+c &rarr; Sm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_s16" type="checkbox"><label for="vqdmlsl_high_s16"><div>int32x4_t <b><b>vqdmlsl_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_s32" type="checkbox"><label for="vqdmlsl_high_s32"><div>int64x2_t <b><b>vqdmlsl_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_s8" type="checkbox"><label for="vmull_s8"><div>int16x8_t <b><b>vmull_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_s16" type="checkbox"><label for="vmull_s16"><div>int32x4_t <b><b>vmull_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_s32" type="checkbox"><label for="vmull_s32"><div>int64x2_t <b><b>vmull_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_u8" type="checkbox"><label for="vmull_u8"><div>uint16x8_t <b><b>vmull_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_u16" type="checkbox"><label for="vmull_u16"><div>uint32x4_t <b><b>vmull_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_u32" type="checkbox"><label for="vmull_u32"><div>uint64x2_t <b><b>vmull_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_p8" type="checkbox"><label for="vmull_p8"><div>poly16x8_t <b><b>vmull_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Polynomial multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmull-pmull2-polynomial-multiply-long">PMULL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_s8" type="checkbox"><label for="vmull_high_s8"><div>int16x8_t <b><b>vmull_high_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_s16" type="checkbox"><label for="vmull_high_s16"><div>int32x4_t <b><b>vmull_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_s32" type="checkbox"><label for="vmull_high_s32"><div>int64x2_t <b><b>vmull_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_u8" type="checkbox"><label for="vmull_high_u8"><div>uint16x8_t <b><b>vmull_high_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_u16" type="checkbox"><label for="vmull_high_u16"><div>uint32x4_t <b><b>vmull_high_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_u32" type="checkbox"><label for="vmull_high_u32"><div>uint64x2_t <b><b>vmull_high_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_p8" type="checkbox"><label for="vmull_high_p8"><div>poly16x8_t <b><b>vmull_high_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Polynomial multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmull-pmull2-polynomial-multiply-long">PMULL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_s16" type="checkbox"><label for="vqdmull_s16"><div>int32x4_t <b><b>vqdmull_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_s32" type="checkbox"><label for="vqdmull_s32"><div>int64x2_t <b><b>vqdmull_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmullh_s16" type="checkbox"><label for="vqdmullh_s16"><div>int32_t <b><b>vqdmullh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Sd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulls_s32" type="checkbox"><label for="vqdmulls_s32"><div>int64_t <b><b>vqdmulls_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Dd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_s16" type="checkbox"><label for="vqdmull_high_s16"><div>int32x4_t <b><b>vqdmull_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_s32" type="checkbox"><label for="vqdmull_high_s32"><div>int64x2_t <b><b>vqdmull_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsub_s8" type="checkbox"><label for="vsub_s8"><div>int8x8_t <b><b>vsub_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_s8" type="checkbox"><label for="vsubq_s8"><div>int8x16_t <b><b>vsubq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_s16" type="checkbox"><label for="vsub_s16"><div>int16x4_t <b><b>vsub_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_s16" type="checkbox"><label for="vsubq_s16"><div>int16x8_t <b><b>vsubq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_s32" type="checkbox"><label for="vsub_s32"><div>int32x2_t <b><b>vsub_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_s32" type="checkbox"><label for="vsubq_s32"><div>int32x4_t <b><b>vsubq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_s64" type="checkbox"><label for="vsub_s64"><div>int64x1_t <b><b>vsub_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_s64" type="checkbox"><label for="vsubq_s64"><div>int64x2_t <b><b>vsubq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_u8" type="checkbox"><label for="vsub_u8"><div>uint8x8_t <b><b>vsub_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_u8" type="checkbox"><label for="vsubq_u8"><div>uint8x16_t <b><b>vsubq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_u16" type="checkbox"><label for="vsub_u16"><div>uint16x4_t <b><b>vsub_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_u16" type="checkbox"><label for="vsubq_u16"><div>uint16x8_t <b><b>vsubq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_u32" type="checkbox"><label for="vsub_u32"><div>uint32x2_t <b><b>vsub_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_u32" type="checkbox"><label for="vsubq_u32"><div>uint32x4_t <b><b>vsubq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_u64" type="checkbox"><label for="vsub_u64"><div>uint64x1_t <b><b>vsub_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_u64" type="checkbox"><label for="vsubq_u64"><div>uint64x2_t <b><b>vsubq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_f32" type="checkbox"><label for="vsub_f32"><div>float32x2_t <b><b>vsub_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register, from the corresponding elements in the vector in the first source SIMD&amp;FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsub-vector-floating-point-subtract-vector">FSUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_f32" type="checkbox"><label for="vsubq_f32"><div>float32x4_t <b><b>vsubq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register, from the corresponding elements in the vector in the first source SIMD&amp;FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsub-vector-floating-point-subtract-vector">FSUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_f64" type="checkbox"><label for="vsub_f64"><div>float64x1_t <b><b>vsub_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register, from the corresponding elements in the vector in the first source SIMD&amp;FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsub-vector-floating-point-subtract-vector">FSUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_f64" type="checkbox"><label for="vsubq_f64"><div>float64x2_t <b><b>vsubq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register, from the corresponding elements in the vector in the first source SIMD&amp;FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsub-vector-floating-point-subtract-vector">FSUB</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubd_s64" type="checkbox"><label for="vsubd_s64"><div>int64_t <b><b>vsubd_s64</b></b> (int64_t a, int64_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubd_u64" type="checkbox"><label for="vsubd_u64"><div>uint64_t <b><b>vsubd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_s8" type="checkbox"><label for="vsubl_s8"><div>int16x8_t <b><b>vsubl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_s16" type="checkbox"><label for="vsubl_s16"><div>int32x4_t <b><b>vsubl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_s32" type="checkbox"><label for="vsubl_s32"><div>int64x2_t <b><b>vsubl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_u8" type="checkbox"><label for="vsubl_u8"><div>uint16x8_t <b><b>vsubl_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_u16" type="checkbox"><label for="vsubl_u16"><div>uint32x4_t <b><b>vsubl_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_u32" type="checkbox"><label for="vsubl_u32"><div>uint64x2_t <b><b>vsubl_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_s8" type="checkbox"><label for="vsubl_high_s8"><div>int16x8_t <b><b>vsubl_high_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_s16" type="checkbox"><label for="vsubl_high_s16"><div>int32x4_t <b><b>vsubl_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_s32" type="checkbox"><label for="vsubl_high_s32"><div>int64x2_t <b><b>vsubl_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_u8" type="checkbox"><label for="vsubl_high_u8"><div>uint16x8_t <b><b>vsubl_high_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_u16" type="checkbox"><label for="vsubl_high_u16"><div>uint32x4_t <b><b>vsubl_high_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_u32" type="checkbox"><label for="vsubl_high_u32"><div>uint64x2_t <b><b>vsubl_high_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_s8" type="checkbox"><label for="vsubw_s8"><div>int16x8_t <b><b>vsubw_s8</b></b> (int16x8_t a, int8x8_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW</a> Vd.8H,Vn.8H,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_s16" type="checkbox"><label for="vsubw_s16"><div>int32x4_t <b><b>vsubw_s16</b></b> (int32x4_t a, int16x4_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW</a> Vd.4S,Vn.4S,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_s32" type="checkbox"><label for="vsubw_s32"><div>int64x2_t <b><b>vsubw_s32</b></b> (int64x2_t a, int32x2_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW</a> Vd.2D,Vn.2D,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_u8" type="checkbox"><label for="vsubw_u8"><div>uint16x8_t <b><b>vsubw_u8</b></b> (uint16x8_t a, uint8x8_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW</a> Vd.8H,Vn.8H,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_u16" type="checkbox"><label for="vsubw_u16"><div>uint32x4_t <b><b>vsubw_u16</b></b> (uint32x4_t a, uint16x4_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW</a> Vd.4S,Vn.4S,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_u32" type="checkbox"><label for="vsubw_u32"><div>uint64x2_t <b><b>vsubw_u32</b></b> (uint64x2_t a, uint32x2_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW</a> Vd.2D,Vn.2D,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_s8" type="checkbox"><label for="vsubw_high_s8"><div>int16x8_t <b><b>vsubw_high_s8</b></b> (int16x8_t a, int8x16_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW2</a> Vd.8H,Vn.8H,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_s16" type="checkbox"><label for="vsubw_high_s16"><div>int32x4_t <b><b>vsubw_high_s16</b></b> (int32x4_t a, int16x8_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW2</a> Vd.4S,Vn.4S,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_s32" type="checkbox"><label for="vsubw_high_s32"><div>int64x2_t <b><b>vsubw_high_s32</b></b> (int64x2_t a, int32x4_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW2</a> Vd.2D,Vn.2D,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_u8" type="checkbox"><label for="vsubw_high_u8"><div>uint16x8_t <b><b>vsubw_high_u8</b></b> (uint16x8_t a, uint8x16_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW2</a> Vd.8H,Vn.8H,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_u16" type="checkbox"><label for="vsubw_high_u16"><div>uint32x4_t <b><b>vsubw_high_u16</b></b> (uint32x4_t a, uint16x8_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW2</a> Vd.4S,Vn.4S,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_u32" type="checkbox"><label for="vsubw_high_u32"><div>uint64x2_t <b><b>vsubw_high_u32</b></b> (uint64x2_t a, uint32x4_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW2</a> Vd.2D,Vn.2D,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_s8" type="checkbox"><label for="vhsub_s8"><div>int8x8_t <b><b>vhsub_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_s8" type="checkbox"><label for="vhsubq_s8"><div>int8x16_t <b><b>vhsubq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_s16" type="checkbox"><label for="vhsub_s16"><div>int16x4_t <b><b>vhsub_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_s16" type="checkbox"><label for="vhsubq_s16"><div>int16x8_t <b><b>vhsubq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_s32" type="checkbox"><label for="vhsub_s32"><div>int32x2_t <b><b>vhsub_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_s32" type="checkbox"><label for="vhsubq_s32"><div>int32x4_t <b><b>vhsubq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_u8" type="checkbox"><label for="vhsub_u8"><div>uint8x8_t <b><b>vhsub_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_u8" type="checkbox"><label for="vhsubq_u8"><div>uint8x16_t <b><b>vhsubq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_u16" type="checkbox"><label for="vhsub_u16"><div>uint16x4_t <b><b>vhsub_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_u16" type="checkbox"><label for="vhsubq_u16"><div>uint16x8_t <b><b>vhsubq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_u32" type="checkbox"><label for="vhsub_u32"><div>uint32x2_t <b><b>vhsub_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_u32" type="checkbox"><label for="vhsubq_u32"><div>uint32x4_t <b><b>vhsubq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_s8" type="checkbox"><label for="vqsub_s8"><div>int8x8_t <b><b>vqsub_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_s8" type="checkbox"><label for="vqsubq_s8"><div>int8x16_t <b><b>vqsubq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_s16" type="checkbox"><label for="vqsub_s16"><div>int16x4_t <b><b>vqsub_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_s16" type="checkbox"><label for="vqsubq_s16"><div>int16x8_t <b><b>vqsubq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_s32" type="checkbox"><label for="vqsub_s32"><div>int32x2_t <b><b>vqsub_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_s32" type="checkbox"><label for="vqsubq_s32"><div>int32x4_t <b><b>vqsubq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_s64" type="checkbox"><label for="vqsub_s64"><div>int64x1_t <b><b>vqsub_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_s64" type="checkbox"><label for="vqsubq_s64"><div>int64x2_t <b><b>vqsubq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_u8" type="checkbox"><label for="vqsub_u8"><div>uint8x8_t <b><b>vqsub_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_u8" type="checkbox"><label for="vqsubq_u8"><div>uint8x16_t <b><b>vqsubq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_u16" type="checkbox"><label for="vqsub_u16"><div>uint16x4_t <b><b>vqsub_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_u16" type="checkbox"><label for="vqsubq_u16"><div>uint16x8_t <b><b>vqsubq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_u32" type="checkbox"><label for="vqsub_u32"><div>uint32x2_t <b><b>vqsub_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_u32" type="checkbox"><label for="vqsubq_u32"><div>uint32x4_t <b><b>vqsubq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_u64" type="checkbox"><label for="vqsub_u64"><div>uint64x1_t <b><b>vqsub_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_u64" type="checkbox"><label for="vqsubq_u64"><div>uint64x2_t <b><b>vqsubq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubb_s8" type="checkbox"><label for="vqsubb_s8"><div>int8_t <b><b>vqsubb_s8</b></b> (int8_t a, int8_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubh_s16" type="checkbox"><label for="vqsubh_s16"><div>int16_t <b><b>vqsubh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubs_s32" type="checkbox"><label for="vqsubs_s32"><div>int32_t <b><b>vqsubs_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubd_s64" type="checkbox"><label for="vqsubd_s64"><div>int64_t <b><b>vqsubd_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubb_u8" type="checkbox"><label for="vqsubb_u8"><div>uint8_t <b><b>vqsubb_u8</b></b> (uint8_t a, uint8_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubh_u16" type="checkbox"><label for="vqsubh_u16"><div>uint16_t <b><b>vqsubh_u16</b></b> (uint16_t a, uint16_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubs_u32" type="checkbox"><label for="vqsubs_u32"><div>uint32_t <b><b>vqsubs_u32</b></b> (uint32_t a, uint32_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubd_u64" type="checkbox"><label for="vqsubd_u64"><div>uint64_t <b><b>vqsubd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_s16" type="checkbox"><label for="vsubhn_s16"><div>int8x8_t <b><b>vsubhn_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_s32" type="checkbox"><label for="vsubhn_s32"><div>int16x4_t <b><b>vsubhn_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_s64" type="checkbox"><label for="vsubhn_s64"><div>int32x2_t <b><b>vsubhn_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_u16" type="checkbox"><label for="vsubhn_u16"><div>uint8x8_t <b><b>vsubhn_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_u32" type="checkbox"><label for="vsubhn_u32"><div>uint16x4_t <b><b>vsubhn_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_u64" type="checkbox"><label for="vsubhn_u64"><div>uint32x2_t <b><b>vsubhn_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_s16" type="checkbox"><label for="vsubhn_high_s16"><div>int8x16_t <b><b>vsubhn_high_s16</b></b> (int8x8_t r, int16x8_t a, int16x8_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_s32" type="checkbox"><label for="vsubhn_high_s32"><div>int16x8_t <b><b>vsubhn_high_s32</b></b> (int16x4_t r, int32x4_t a, int32x4_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_s64" type="checkbox"><label for="vsubhn_high_s64"><div>int32x4_t <b><b>vsubhn_high_s64</b></b> (int32x2_t r, int64x2_t a, int64x2_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_u16" type="checkbox"><label for="vsubhn_high_u16"><div>uint8x16_t <b><b>vsubhn_high_u16</b></b> (uint8x8_t r, uint16x8_t a, uint16x8_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_u32" type="checkbox"><label for="vsubhn_high_u32"><div>uint16x8_t <b><b>vsubhn_high_u32</b></b> (uint16x4_t r, uint32x4_t a, uint32x4_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_u64" type="checkbox"><label for="vsubhn_high_u64"><div>uint32x4_t <b><b>vsubhn_high_u64</b></b> (uint32x2_t r, uint64x2_t a, uint64x2_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_s16" type="checkbox"><label for="vrsubhn_s16"><div>int8x8_t <b><b>vrsubhn_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_s32" type="checkbox"><label for="vrsubhn_s32"><div>int16x4_t <b><b>vrsubhn_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_s64" type="checkbox"><label for="vrsubhn_s64"><div>int32x2_t <b><b>vrsubhn_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_u16" type="checkbox"><label for="vrsubhn_u16"><div>uint8x8_t <b><b>vrsubhn_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_u32" type="checkbox"><label for="vrsubhn_u32"><div>uint16x4_t <b><b>vrsubhn_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_u64" type="checkbox"><label for="vrsubhn_u64"><div>uint32x2_t <b><b>vrsubhn_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_s16" type="checkbox"><label for="vrsubhn_high_s16"><div>int8x16_t <b><b>vrsubhn_high_s16</b></b> (int8x8_t r, int16x8_t a, int16x8_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_s32" type="checkbox"><label for="vrsubhn_high_s32"><div>int16x8_t <b><b>vrsubhn_high_s32</b></b> (int16x4_t r, int32x4_t a, int32x4_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_s64" type="checkbox"><label for="vrsubhn_high_s64"><div>int32x4_t <b><b>vrsubhn_high_s64</b></b> (int32x2_t r, int64x2_t a, int64x2_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_u16" type="checkbox"><label for="vrsubhn_high_u16"><div>uint8x16_t <b><b>vrsubhn_high_u16</b></b> (uint8x8_t r, uint16x8_t a, uint16x8_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_u32" type="checkbox"><label for="vrsubhn_high_u32"><div>uint16x8_t <b><b>vrsubhn_high_u32</b></b> (uint16x4_t r, uint32x4_t a, uint32x4_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_u64" type="checkbox"><label for="vrsubhn_high_u64"><div>uint32x4_t <b><b>vrsubhn_high_u64</b></b> (uint32x2_t r, uint64x2_t a, uint64x2_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceq_s8" type="checkbox"><label for="vceq_s8"><div>uint8x8_t <b><b>vceq_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_s8" type="checkbox"><label for="vceqq_s8"><div>uint8x16_t <b><b>vceqq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_s16" type="checkbox"><label for="vceq_s16"><div>uint16x4_t <b><b>vceq_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_s16" type="checkbox"><label for="vceqq_s16"><div>uint16x8_t <b><b>vceqq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_s32" type="checkbox"><label for="vceq_s32"><div>uint32x2_t <b><b>vceq_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_s32" type="checkbox"><label for="vceqq_s32"><div>uint32x4_t <b><b>vceqq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_u8" type="checkbox"><label for="vceq_u8"><div>uint8x8_t <b><b>vceq_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_u8" type="checkbox"><label for="vceqq_u8"><div>uint8x16_t <b><b>vceqq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_u16" type="checkbox"><label for="vceq_u16"><div>uint16x4_t <b><b>vceq_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_u16" type="checkbox"><label for="vceqq_u16"><div>uint16x8_t <b><b>vceqq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_u32" type="checkbox"><label for="vceq_u32"><div>uint32x2_t <b><b>vceq_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_u32" type="checkbox"><label for="vceqq_u32"><div>uint32x4_t <b><b>vceqq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_f32" type="checkbox"><label for="vceq_f32"><div>uint32x2_t <b><b>vceq_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_f32" type="checkbox"><label for="vceqq_f32"><div>uint32x4_t <b><b>vceqq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_p8" type="checkbox"><label for="vceq_p8"><div>uint8x8_t <b><b>vceq_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_p8" type="checkbox"><label for="vceqq_p8"><div>uint8x16_t <b><b>vceqq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_s64" type="checkbox"><label for="vceq_s64"><div>uint64x1_t <b><b>vceq_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_s64" type="checkbox"><label for="vceqq_s64"><div>uint64x2_t <b><b>vceqq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceq_u64" type="checkbox"><label for="vceq_u64"><div>uint64x1_t <b><b>vceq_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_u64" type="checkbox"><label for="vceqq_u64"><div>uint64x2_t <b><b>vceqq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceq_p64" type="checkbox"><label for="vceq_p64"><div>uint64x1_t <b><b>vceq_p64</b></b> (poly64x1_t a, poly64x1_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_p64" type="checkbox"><label for="vceqq_p64"><div>uint64x2_t <b><b>vceqq_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_f64" type="checkbox"><label for="vceq_f64"><div>uint64x1_t <b><b>vceq_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_f64" type="checkbox"><label for="vceqq_f64"><div>uint64x2_t <b><b>vceqq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqd_s64" type="checkbox"><label for="vceqd_s64"><div>uint64_t <b><b>vceqd_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqd_u64" type="checkbox"><label for="vceqd_u64"><div>uint64_t <b><b>vceqd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqs_f32" type="checkbox"><label for="vceqs_f32"><div>uint32_t <b><b>vceqs_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqd_f64" type="checkbox"><label for="vceqd_f64"><div>uint64_t <b><b>vceqd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_s8" type="checkbox"><label for="vceqz_s8"><div>uint8x8_t <b><b>vceqz_s8</b></b> (int8x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_s8" type="checkbox"><label for="vceqzq_s8"><div>uint8x16_t <b><b>vceqzq_s8</b></b> (int8x16_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_s16" type="checkbox"><label for="vceqz_s16"><div>uint16x4_t <b><b>vceqz_s16</b></b> (int16x4_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4H,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_s16" type="checkbox"><label for="vceqzq_s16"><div>uint16x8_t <b><b>vceqzq_s16</b></b> (int16x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8H,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_s32" type="checkbox"><label for="vceqz_s32"><div>uint32x2_t <b><b>vceqz_s32</b></b> (int32x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_s32" type="checkbox"><label for="vceqzq_s32"><div>uint32x4_t <b><b>vceqzq_s32</b></b> (int32x4_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_u8" type="checkbox"><label for="vceqz_u8"><div>uint8x8_t <b><b>vceqz_u8</b></b> (uint8x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_u8" type="checkbox"><label for="vceqzq_u8"><div>uint8x16_t <b><b>vceqzq_u8</b></b> (uint8x16_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_u16" type="checkbox"><label for="vceqz_u16"><div>uint16x4_t <b><b>vceqz_u16</b></b> (uint16x4_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4H,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_u16" type="checkbox"><label for="vceqzq_u16"><div>uint16x8_t <b><b>vceqzq_u16</b></b> (uint16x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8H,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_u32" type="checkbox"><label for="vceqz_u32"><div>uint32x2_t <b><b>vceqz_u32</b></b> (uint32x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_u32" type="checkbox"><label for="vceqzq_u32"><div>uint32x4_t <b><b>vceqzq_u32</b></b> (uint32x4_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_f32" type="checkbox"><label for="vceqz_f32"><div>uint32x2_t <b><b>vceqz_f32</b></b> (float32x2_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_f32" type="checkbox"><label for="vceqzq_f32"><div>uint32x4_t <b><b>vceqzq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_p8" type="checkbox"><label for="vceqz_p8"><div>uint8x8_t <b><b>vceqz_p8</b></b> (poly8x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_p8" type="checkbox"><label for="vceqzq_p8"><div>uint8x16_t <b><b>vceqzq_p8</b></b> (poly8x16_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_s64" type="checkbox"><label for="vceqz_s64"><div>uint64x1_t <b><b>vceqz_s64</b></b> (int64x1_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_s64" type="checkbox"><label for="vceqzq_s64"><div>uint64x2_t <b><b>vceqzq_s64</b></b> (int64x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_u64" type="checkbox"><label for="vceqz_u64"><div>uint64x1_t <b><b>vceqz_u64</b></b> (uint64x1_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_u64" type="checkbox"><label for="vceqzq_u64"><div>uint64x2_t <b><b>vceqzq_u64</b></b> (uint64x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_p64" type="checkbox"><label for="vceqz_p64"><div>uint64x1_t <b><b>vceqz_p64</b></b> (poly64x1_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_p64" type="checkbox"><label for="vceqzq_p64"><div>uint64x2_t <b><b>vceqzq_p64</b></b> (poly64x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_f64" type="checkbox"><label for="vceqz_f64"><div>uint64x1_t <b><b>vceqz_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_f64" type="checkbox"><label for="vceqzq_f64"><div>uint64x2_t <b><b>vceqzq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzd_s64" type="checkbox"><label for="vceqzd_s64"><div>uint64_t <b><b>vceqzd_s64</b></b> (int64_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzd_u64" type="checkbox"><label for="vceqzd_u64"><div>uint64_t <b><b>vceqzd_u64</b></b> (uint64_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzs_f32" type="checkbox"><label for="vceqzs_f32"><div>uint32_t <b><b>vceqzs_f32</b></b> (float32_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Sd,Sn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzd_f64" type="checkbox"><label for="vceqzd_f64"><div>uint64_t <b><b>vceqzd_f64</b></b> (float64_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcge_s8" type="checkbox"><label for="vcge_s8"><div>uint8x8_t <b><b>vcge_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8B,Vm.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_s8" type="checkbox"><label for="vcgeq_s8"><div>uint8x16_t <b><b>vcgeq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.16B,Vm.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_s16" type="checkbox"><label for="vcge_s16"><div>uint16x4_t <b><b>vcge_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4H,Vm.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_s16" type="checkbox"><label for="vcgeq_s16"><div>uint16x8_t <b><b>vcgeq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8H,Vm.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_s32" type="checkbox"><label for="vcge_s32"><div>uint32x2_t <b><b>vcge_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_s32" type="checkbox"><label for="vcgeq_s32"><div>uint32x4_t <b><b>vcgeq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_u8" type="checkbox"><label for="vcge_u8"><div>uint8x8_t <b><b>vcge_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.8B,Vm.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_u8" type="checkbox"><label for="vcgeq_u8"><div>uint8x16_t <b><b>vcgeq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.16B,Vm.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_u16" type="checkbox"><label for="vcge_u16"><div>uint16x4_t <b><b>vcge_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.4H,Vm.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_u16" type="checkbox"><label for="vcgeq_u16"><div>uint16x8_t <b><b>vcgeq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.8H,Vm.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_u32" type="checkbox"><label for="vcge_u32"><div>uint32x2_t <b><b>vcge_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_u32" type="checkbox"><label for="vcgeq_u32"><div>uint32x4_t <b><b>vcgeq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_f32" type="checkbox"><label for="vcge_f32"><div>uint32x2_t <b><b>vcge_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_f32" type="checkbox"><label for="vcgeq_f32"><div>uint32x4_t <b><b>vcgeq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_s64" type="checkbox"><label for="vcge_s64"><div>uint64x1_t <b><b>vcge_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_s64" type="checkbox"><label for="vcgeq_s64"><div>uint64x2_t <b><b>vcgeq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcge_u64" type="checkbox"><label for="vcge_u64"><div>uint64x1_t <b><b>vcge_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_u64" type="checkbox"><label for="vcgeq_u64"><div>uint64x2_t <b><b>vcgeq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcge_f64" type="checkbox"><label for="vcge_f64"><div>uint64x1_t <b><b>vcge_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_f64" type="checkbox"><label for="vcgeq_f64"><div>uint64x2_t <b><b>vcgeq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcged_s64" type="checkbox"><label for="vcged_s64"><div>uint64_t <b><b>vcged_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcged_u64" type="checkbox"><label for="vcged_u64"><div>uint64_t <b><b>vcged_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcges_f32" type="checkbox"><label for="vcges_f32"><div>uint32_t <b><b>vcges_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcged_f64" type="checkbox"><label for="vcged_f64"><div>uint64_t <b><b>vcged_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_s8" type="checkbox"><label for="vcgez_s8"><div>uint8x8_t <b><b>vcgez_s8</b></b> (int8x8_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_s8" type="checkbox"><label for="vcgezq_s8"><div>uint8x16_t <b><b>vcgezq_s8</b></b> (int8x16_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_s16" type="checkbox"><label for="vcgez_s16"><div>uint16x4_t <b><b>vcgez_s16</b></b> (int16x4_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4H,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_s16" type="checkbox"><label for="vcgezq_s16"><div>uint16x8_t <b><b>vcgezq_s16</b></b> (int16x8_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8H,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_s32" type="checkbox"><label for="vcgez_s32"><div>uint32x2_t <b><b>vcgez_s32</b></b> (int32x2_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_s32" type="checkbox"><label for="vcgezq_s32"><div>uint32x4_t <b><b>vcgezq_s32</b></b> (int32x4_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_s64" type="checkbox"><label for="vcgez_s64"><div>uint64x1_t <b><b>vcgez_s64</b></b> (int64x1_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_s64" type="checkbox"><label for="vcgezq_s64"><div>uint64x2_t <b><b>vcgezq_s64</b></b> (int64x2_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_f32" type="checkbox"><label for="vcgez_f32"><div>uint32x2_t <b><b>vcgez_f32</b></b> (float32x2_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_f32" type="checkbox"><label for="vcgezq_f32"><div>uint32x4_t <b><b>vcgezq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_f64" type="checkbox"><label for="vcgez_f64"><div>uint64x1_t <b><b>vcgez_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_f64" type="checkbox"><label for="vcgezq_f64"><div>uint64x2_t <b><b>vcgezq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezd_s64" type="checkbox"><label for="vcgezd_s64"><div>uint64_t <b><b>vcgezd_s64</b></b> (int64_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezs_f32" type="checkbox"><label for="vcgezs_f32"><div>uint32_t <b><b>vcgezs_f32</b></b> (float32_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Sd,Sn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezd_f64" type="checkbox"><label for="vcgezd_f64"><div>uint64_t <b><b>vcgezd_f64</b></b> (float64_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcle_s8" type="checkbox"><label for="vcle_s8"><div>uint8x8_t <b><b>vcle_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8B,Vm.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_s8" type="checkbox"><label for="vcleq_s8"><div>uint8x16_t <b><b>vcleq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.16B,Vm.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_s16" type="checkbox"><label for="vcle_s16"><div>uint16x4_t <b><b>vcle_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4H,Vm.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_s16" type="checkbox"><label for="vcleq_s16"><div>uint16x8_t <b><b>vcleq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8H,Vm.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_s32" type="checkbox"><label for="vcle_s32"><div>uint32x2_t <b><b>vcle_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_s32" type="checkbox"><label for="vcleq_s32"><div>uint32x4_t <b><b>vcleq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_u8" type="checkbox"><label for="vcle_u8"><div>uint8x8_t <b><b>vcle_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.8B,Vm.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_u8" type="checkbox"><label for="vcleq_u8"><div>uint8x16_t <b><b>vcleq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.16B,Vm.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_u16" type="checkbox"><label for="vcle_u16"><div>uint16x4_t <b><b>vcle_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.4H,Vm.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_u16" type="checkbox"><label for="vcleq_u16"><div>uint16x8_t <b><b>vcleq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.8H,Vm.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_u32" type="checkbox"><label for="vcle_u32"><div>uint32x2_t <b><b>vcle_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_u32" type="checkbox"><label for="vcleq_u32"><div>uint32x4_t <b><b>vcleq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_f32" type="checkbox"><label for="vcle_f32"><div>uint32x2_t <b><b>vcle_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_f32" type="checkbox"><label for="vcleq_f32"><div>uint32x4_t <b><b>vcleq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_s64" type="checkbox"><label for="vcle_s64"><div>uint64x1_t <b><b>vcle_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_s64" type="checkbox"><label for="vcleq_s64"><div>uint64x2_t <b><b>vcleq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcle_u64" type="checkbox"><label for="vcle_u64"><div>uint64x1_t <b><b>vcle_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_u64" type="checkbox"><label for="vcleq_u64"><div>uint64x2_t <b><b>vcleq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcle_f64" type="checkbox"><label for="vcle_f64"><div>uint64x1_t <b><b>vcle_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_f64" type="checkbox"><label for="vcleq_f64"><div>uint64x2_t <b><b>vcleq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcled_s64" type="checkbox"><label for="vcled_s64"><div>uint64_t <b><b>vcled_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcled_u64" type="checkbox"><label for="vcled_u64"><div>uint64_t <b><b>vcled_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcles_f32" type="checkbox"><label for="vcles_f32"><div>uint32_t <b><b>vcles_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Sd,Sm,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcled_f64" type="checkbox"><label for="vcled_f64"><div>uint64_t <b><b>vcled_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_s8" type="checkbox"><label for="vclez_s8"><div>uint8x8_t <b><b>vclez_s8</b></b> (int8x8_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_s8" type="checkbox"><label for="vclezq_s8"><div>uint8x16_t <b><b>vclezq_s8</b></b> (int8x16_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_s16" type="checkbox"><label for="vclez_s16"><div>uint16x4_t <b><b>vclez_s16</b></b> (int16x4_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.4H,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_s16" type="checkbox"><label for="vclezq_s16"><div>uint16x8_t <b><b>vclezq_s16</b></b> (int16x8_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.8H,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_s32" type="checkbox"><label for="vclez_s32"><div>uint32x2_t <b><b>vclez_s32</b></b> (int32x2_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_s32" type="checkbox"><label for="vclezq_s32"><div>uint32x4_t <b><b>vclezq_s32</b></b> (int32x4_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_s64" type="checkbox"><label for="vclez_s64"><div>uint64x1_t <b><b>vclez_s64</b></b> (int64x1_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_s64" type="checkbox"><label for="vclezq_s64"><div>uint64x2_t <b><b>vclezq_s64</b></b> (int64x2_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_f32" type="checkbox"><label for="vclez_f32"><div>uint32x2_t <b><b>vclez_f32</b></b> (float32x2_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_f32" type="checkbox"><label for="vclezq_f32"><div>uint32x4_t <b><b>vclezq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_f64" type="checkbox"><label for="vclez_f64"><div>uint64x1_t <b><b>vclez_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_f64" type="checkbox"><label for="vclezq_f64"><div>uint64x2_t <b><b>vclezq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezd_s64" type="checkbox"><label for="vclezd_s64"><div>uint64_t <b><b>vclezd_s64</b></b> (int64_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezs_f32" type="checkbox"><label for="vclezs_f32"><div>uint32_t <b><b>vclezs_f32</b></b> (float32_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Sd,Sn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezd_f64" type="checkbox"><label for="vclezd_f64"><div>uint64_t <b><b>vclezd_f64</b></b> (float64_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_s8" type="checkbox"><label for="vcgt_s8"><div>uint8x8_t <b><b>vcgt_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_s8" type="checkbox"><label for="vcgtq_s8"><div>uint8x16_t <b><b>vcgtq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_s16" type="checkbox"><label for="vcgt_s16"><div>uint16x4_t <b><b>vcgt_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_s16" type="checkbox"><label for="vcgtq_s16"><div>uint16x8_t <b><b>vcgtq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_s32" type="checkbox"><label for="vcgt_s32"><div>uint32x2_t <b><b>vcgt_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_s32" type="checkbox"><label for="vcgtq_s32"><div>uint32x4_t <b><b>vcgtq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_u8" type="checkbox"><label for="vcgt_u8"><div>uint8x8_t <b><b>vcgt_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_u8" type="checkbox"><label for="vcgtq_u8"><div>uint8x16_t <b><b>vcgtq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_u16" type="checkbox"><label for="vcgt_u16"><div>uint16x4_t <b><b>vcgt_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_u16" type="checkbox"><label for="vcgtq_u16"><div>uint16x8_t <b><b>vcgtq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_u32" type="checkbox"><label for="vcgt_u32"><div>uint32x2_t <b><b>vcgt_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_u32" type="checkbox"><label for="vcgtq_u32"><div>uint32x4_t <b><b>vcgtq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_f32" type="checkbox"><label for="vcgt_f32"><div>uint32x2_t <b><b>vcgt_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_f32" type="checkbox"><label for="vcgtq_f32"><div>uint32x4_t <b><b>vcgtq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_s64" type="checkbox"><label for="vcgt_s64"><div>uint64x1_t <b><b>vcgt_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_s64" type="checkbox"><label for="vcgtq_s64"><div>uint64x2_t <b><b>vcgtq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_u64" type="checkbox"><label for="vcgt_u64"><div>uint64x1_t <b><b>vcgt_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_u64" type="checkbox"><label for="vcgtq_u64"><div>uint64x2_t <b><b>vcgtq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_f64" type="checkbox"><label for="vcgt_f64"><div>uint64x1_t <b><b>vcgt_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_f64" type="checkbox"><label for="vcgtq_f64"><div>uint64x2_t <b><b>vcgtq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtd_s64" type="checkbox"><label for="vcgtd_s64"><div>uint64_t <b><b>vcgtd_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtd_u64" type="checkbox"><label for="vcgtd_u64"><div>uint64_t <b><b>vcgtd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgts_f32" type="checkbox"><label for="vcgts_f32"><div>uint32_t <b><b>vcgts_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtd_f64" type="checkbox"><label for="vcgtd_f64"><div>uint64_t <b><b>vcgtd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_s8" type="checkbox"><label for="vcgtz_s8"><div>uint8x8_t <b><b>vcgtz_s8</b></b> (int8x8_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_s8" type="checkbox"><label for="vcgtzq_s8"><div>uint8x16_t <b><b>vcgtzq_s8</b></b> (int8x16_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_s16" type="checkbox"><label for="vcgtz_s16"><div>uint16x4_t <b><b>vcgtz_s16</b></b> (int16x4_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4H,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_s16" type="checkbox"><label for="vcgtzq_s16"><div>uint16x8_t <b><b>vcgtzq_s16</b></b> (int16x8_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8H,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_s32" type="checkbox"><label for="vcgtz_s32"><div>uint32x2_t <b><b>vcgtz_s32</b></b> (int32x2_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_s32" type="checkbox"><label for="vcgtzq_s32"><div>uint32x4_t <b><b>vcgtzq_s32</b></b> (int32x4_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_s64" type="checkbox"><label for="vcgtz_s64"><div>uint64x1_t <b><b>vcgtz_s64</b></b> (int64x1_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_s64" type="checkbox"><label for="vcgtzq_s64"><div>uint64x2_t <b><b>vcgtzq_s64</b></b> (int64x2_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_f32" type="checkbox"><label for="vcgtz_f32"><div>uint32x2_t <b><b>vcgtz_f32</b></b> (float32x2_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_f32" type="checkbox"><label for="vcgtzq_f32"><div>uint32x4_t <b><b>vcgtzq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_f64" type="checkbox"><label for="vcgtz_f64"><div>uint64x1_t <b><b>vcgtz_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_f64" type="checkbox"><label for="vcgtzq_f64"><div>uint64x2_t <b><b>vcgtzq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzd_s64" type="checkbox"><label for="vcgtzd_s64"><div>uint64_t <b><b>vcgtzd_s64</b></b> (int64_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzs_f32" type="checkbox"><label for="vcgtzs_f32"><div>uint32_t <b><b>vcgtzs_f32</b></b> (float32_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Sd,Sn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzd_f64" type="checkbox"><label for="vcgtzd_f64"><div>uint64_t <b><b>vcgtzd_f64</b></b> (float64_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclt_s8" type="checkbox"><label for="vclt_s8"><div>uint8x8_t <b><b>vclt_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8B,Vm.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_s8" type="checkbox"><label for="vcltq_s8"><div>uint8x16_t <b><b>vcltq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.16B,Vm.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_s16" type="checkbox"><label for="vclt_s16"><div>uint16x4_t <b><b>vclt_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4H,Vm.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_s16" type="checkbox"><label for="vcltq_s16"><div>uint16x8_t <b><b>vcltq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8H,Vm.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_s32" type="checkbox"><label for="vclt_s32"><div>uint32x2_t <b><b>vclt_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_s32" type="checkbox"><label for="vcltq_s32"><div>uint32x4_t <b><b>vcltq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_u8" type="checkbox"><label for="vclt_u8"><div>uint8x8_t <b><b>vclt_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.8B,Vm.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_u8" type="checkbox"><label for="vcltq_u8"><div>uint8x16_t <b><b>vcltq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.16B,Vm.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_u16" type="checkbox"><label for="vclt_u16"><div>uint16x4_t <b><b>vclt_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.4H,Vm.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_u16" type="checkbox"><label for="vcltq_u16"><div>uint16x8_t <b><b>vcltq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.8H,Vm.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_u32" type="checkbox"><label for="vclt_u32"><div>uint32x2_t <b><b>vclt_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_u32" type="checkbox"><label for="vcltq_u32"><div>uint32x4_t <b><b>vcltq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_f32" type="checkbox"><label for="vclt_f32"><div>uint32x2_t <b><b>vclt_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_f32" type="checkbox"><label for="vcltq_f32"><div>uint32x4_t <b><b>vcltq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_s64" type="checkbox"><label for="vclt_s64"><div>uint64x1_t <b><b>vclt_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_s64" type="checkbox"><label for="vcltq_s64"><div>uint64x2_t <b><b>vcltq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclt_u64" type="checkbox"><label for="vclt_u64"><div>uint64x1_t <b><b>vclt_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_u64" type="checkbox"><label for="vcltq_u64"><div>uint64x2_t <b><b>vcltq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclt_f64" type="checkbox"><label for="vclt_f64"><div>uint64x1_t <b><b>vclt_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_f64" type="checkbox"><label for="vcltq_f64"><div>uint64x2_t <b><b>vcltq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltd_s64" type="checkbox"><label for="vcltd_s64"><div>uint64_t <b><b>vcltd_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltd_u64" type="checkbox"><label for="vcltd_u64"><div>uint64_t <b><b>vcltd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclts_f32" type="checkbox"><label for="vclts_f32"><div>uint32_t <b><b>vclts_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Sd,Sm,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltd_f64" type="checkbox"><label for="vcltd_f64"><div>uint64_t <b><b>vcltd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_s8" type="checkbox"><label for="vcltz_s8"><div>uint8x8_t <b><b>vcltz_s8</b></b> (int8x8_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_s8" type="checkbox"><label for="vcltzq_s8"><div>uint8x16_t <b><b>vcltzq_s8</b></b> (int8x16_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_s16" type="checkbox"><label for="vcltz_s16"><div>uint16x4_t <b><b>vcltz_s16</b></b> (int16x4_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.4H,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_s16" type="checkbox"><label for="vcltzq_s16"><div>uint16x8_t <b><b>vcltzq_s16</b></b> (int16x8_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.8H,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_s32" type="checkbox"><label for="vcltz_s32"><div>uint32x2_t <b><b>vcltz_s32</b></b> (int32x2_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_s32" type="checkbox"><label for="vcltzq_s32"><div>uint32x4_t <b><b>vcltzq_s32</b></b> (int32x4_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_s64" type="checkbox"><label for="vcltz_s64"><div>uint64x1_t <b><b>vcltz_s64</b></b> (int64x1_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_s64" type="checkbox"><label for="vcltzq_s64"><div>uint64x2_t <b><b>vcltzq_s64</b></b> (int64x2_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_f32" type="checkbox"><label for="vcltz_f32"><div>uint32x2_t <b><b>vcltz_f32</b></b> (float32x2_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_f32" type="checkbox"><label for="vcltzq_f32"><div>uint32x4_t <b><b>vcltzq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_f64" type="checkbox"><label for="vcltz_f64"><div>uint64x1_t <b><b>vcltz_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_f64" type="checkbox"><label for="vcltzq_f64"><div>uint64x2_t <b><b>vcltzq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzd_s64" type="checkbox"><label for="vcltzd_s64"><div>uint64_t <b><b>vcltzd_s64</b></b> (int64_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzs_f32" type="checkbox"><label for="vcltzs_f32"><div>uint32_t <b><b>vcltzs_f32</b></b> (float32_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Sd,Sn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzd_f64" type="checkbox"><label for="vcltzd_f64"><div>uint64_t <b><b>vcltzd_f64</b></b> (float64_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcage_f32" type="checkbox"><label for="vcage_f32"><div>uint32x2_t <b><b>vcage_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcageq_f32" type="checkbox"><label for="vcageq_f32"><div>uint32x4_t <b><b>vcageq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcage_f64" type="checkbox"><label for="vcage_f64"><div>uint64x1_t <b><b>vcage_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcageq_f64" type="checkbox"><label for="vcageq_f64"><div>uint64x2_t <b><b>vcageq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcages_f32" type="checkbox"><label for="vcages_f32"><div>uint32_t <b><b>vcages_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaged_f64" type="checkbox"><label for="vcaged_f64"><div>uint64_t <b><b>vcaged_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcale_f32" type="checkbox"><label for="vcale_f32"><div>uint32x2_t <b><b>vcale_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcaleq_f32" type="checkbox"><label for="vcaleq_f32"><div>uint32x4_t <b><b>vcaleq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcale_f64" type="checkbox"><label for="vcale_f64"><div>uint64x1_t <b><b>vcale_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaleq_f64" type="checkbox"><label for="vcaleq_f64"><div>uint64x2_t <b><b>vcaleq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcales_f32" type="checkbox"><label for="vcales_f32"><div>uint32_t <b><b>vcales_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Sd,Sm,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaled_f64" type="checkbox"><label for="vcaled_f64"><div>uint64_t <b><b>vcaled_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcagt_f32" type="checkbox"><label for="vcagt_f32"><div>uint32x2_t <b><b>vcagt_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcagtq_f32" type="checkbox"><label for="vcagtq_f32"><div>uint32x4_t <b><b>vcagtq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcagt_f64" type="checkbox"><label for="vcagt_f64"><div>uint64x1_t <b><b>vcagt_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcagtq_f64" type="checkbox"><label for="vcagtq_f64"><div>uint64x2_t <b><b>vcagtq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcagts_f32" type="checkbox"><label for="vcagts_f32"><div>uint32_t <b><b>vcagts_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcagtd_f64" type="checkbox"><label for="vcagtd_f64"><div>uint64_t <b><b>vcagtd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcalt_f32" type="checkbox"><label for="vcalt_f32"><div>uint32x2_t <b><b>vcalt_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcaltq_f32" type="checkbox"><label for="vcaltq_f32"><div>uint32x4_t <b><b>vcaltq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcalt_f64" type="checkbox"><label for="vcalt_f64"><div>uint64x1_t <b><b>vcalt_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaltq_f64" type="checkbox"><label for="vcaltq_f64"><div>uint64x2_t <b><b>vcaltq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcalts_f32" type="checkbox"><label for="vcalts_f32"><div>uint32_t <b><b>vcalts_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Sd,Sm,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaltd_f64" type="checkbox"><label for="vcaltd_f64"><div>uint64_t <b><b>vcaltd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtst_s8" type="checkbox"><label for="vtst_s8"><div>uint8x8_t <b><b>vtst_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_s8" type="checkbox"><label for="vtstq_s8"><div>uint8x16_t <b><b>vtstq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_s16" type="checkbox"><label for="vtst_s16"><div>uint16x4_t <b><b>vtst_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_s16" type="checkbox"><label for="vtstq_s16"><div>uint16x8_t <b><b>vtstq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_s32" type="checkbox"><label for="vtst_s32"><div>uint32x2_t <b><b>vtst_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_s32" type="checkbox"><label for="vtstq_s32"><div>uint32x4_t <b><b>vtstq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_u8" type="checkbox"><label for="vtst_u8"><div>uint8x8_t <b><b>vtst_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_u8" type="checkbox"><label for="vtstq_u8"><div>uint8x16_t <b><b>vtstq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_u16" type="checkbox"><label for="vtst_u16"><div>uint16x4_t <b><b>vtst_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_u16" type="checkbox"><label for="vtstq_u16"><div>uint16x8_t <b><b>vtstq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_u32" type="checkbox"><label for="vtst_u32"><div>uint32x2_t <b><b>vtst_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_u32" type="checkbox"><label for="vtstq_u32"><div>uint32x4_t <b><b>vtstq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_p8" type="checkbox"><label for="vtst_p8"><div>uint8x8_t <b><b>vtst_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_p8" type="checkbox"><label for="vtstq_p8"><div>uint8x16_t <b><b>vtstq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_s64" type="checkbox"><label for="vtst_s64"><div>uint64x1_t <b><b>vtst_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_s64" type="checkbox"><label for="vtstq_s64"><div>uint64x2_t <b><b>vtstq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtst_u64" type="checkbox"><label for="vtst_u64"><div>uint64x1_t <b><b>vtst_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_u64" type="checkbox"><label for="vtstq_u64"><div>uint64x2_t <b><b>vtstq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtst_p64" type="checkbox"><label for="vtst_p64"><div>uint64x1_t <b><b>vtst_p64</b></b> (poly64x1_t a, poly64x1_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_p64" type="checkbox"><label for="vtstq_p64"><div>uint64x2_t <b><b>vtstq_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstd_s64" type="checkbox"><label for="vtstd_s64"><div>uint64_t <b><b>vtstd_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtstd_u64" type="checkbox"><label for="vtstd_u64"><div>uint64_t <b><b>vtstd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabd_s8" type="checkbox"><label for="vabd_s8"><div>int8x8_t <b><b>vabd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_s8" type="checkbox"><label for="vabdq_s8"><div>int8x16_t <b><b>vabdq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_s16" type="checkbox"><label for="vabd_s16"><div>int16x4_t <b><b>vabd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_s16" type="checkbox"><label for="vabdq_s16"><div>int16x8_t <b><b>vabdq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_s32" type="checkbox"><label for="vabd_s32"><div>int32x2_t <b><b>vabd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_s32" type="checkbox"><label for="vabdq_s32"><div>int32x4_t <b><b>vabdq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_u8" type="checkbox"><label for="vabd_u8"><div>uint8x8_t <b><b>vabd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_u8" type="checkbox"><label for="vabdq_u8"><div>uint8x16_t <b><b>vabdq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_u16" type="checkbox"><label for="vabd_u16"><div>uint16x4_t <b><b>vabd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_u16" type="checkbox"><label for="vabdq_u16"><div>uint16x8_t <b><b>vabdq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_u32" type="checkbox"><label for="vabd_u32"><div>uint32x2_t <b><b>vabd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_u32" type="checkbox"><label for="vabdq_u32"><div>uint32x4_t <b><b>vabdq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_f32" type="checkbox"><label for="vabd_f32"><div>float32x2_t <b><b>vabd_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_f32" type="checkbox"><label for="vabdq_f32"><div>float32x4_t <b><b>vabdq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_f64" type="checkbox"><label for="vabd_f64"><div>float64x1_t <b><b>vabd_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_f64" type="checkbox"><label for="vabdq_f64"><div>float64x2_t <b><b>vabdq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabds_f32" type="checkbox"><label for="vabds_f32"><div>float32_t <b><b>vabds_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdd_f64" type="checkbox"><label for="vabdd_f64"><div>float64_t <b><b>vabdd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_s8" type="checkbox"><label for="vabdl_s8"><div>int16x8_t <b><b>vabdl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_s16" type="checkbox"><label for="vabdl_s16"><div>int32x4_t <b><b>vabdl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_s32" type="checkbox"><label for="vabdl_s32"><div>int64x2_t <b><b>vabdl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_u8" type="checkbox"><label for="vabdl_u8"><div>uint16x8_t <b><b>vabdl_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_u16" type="checkbox"><label for="vabdl_u16"><div>uint32x4_t <b><b>vabdl_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_u32" type="checkbox"><label for="vabdl_u32"><div>uint64x2_t <b><b>vabdl_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_s8" type="checkbox"><label for="vabdl_high_s8"><div>int16x8_t <b><b>vabdl_high_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_s16" type="checkbox"><label for="vabdl_high_s16"><div>int32x4_t <b><b>vabdl_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_s32" type="checkbox"><label for="vabdl_high_s32"><div>int64x2_t <b><b>vabdl_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_u8" type="checkbox"><label for="vabdl_high_u8"><div>uint16x8_t <b><b>vabdl_high_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_u16" type="checkbox"><label for="vabdl_high_u16"><div>uint32x4_t <b><b>vabdl_high_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_u32" type="checkbox"><label for="vabdl_high_u32"><div>uint64x2_t <b><b>vabdl_high_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaba_s8" type="checkbox"><label for="vaba_s8"><div>int8x8_t <b><b>vaba_s8</b></b> (int8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_s8" type="checkbox"><label for="vabaq_s8"><div>int8x16_t <b><b>vabaq_s8</b></b> (int8x16_t a, int8x16_t b, int8x16_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_s16" type="checkbox"><label for="vaba_s16"><div>int16x4_t <b><b>vaba_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_s16" type="checkbox"><label for="vabaq_s16"><div>int16x8_t <b><b>vabaq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_s32" type="checkbox"><label for="vaba_s32"><div>int32x2_t <b><b>vaba_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_s32" type="checkbox"><label for="vabaq_s32"><div>int32x4_t <b><b>vabaq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_u8" type="checkbox"><label for="vaba_u8"><div>uint8x8_t <b><b>vaba_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_u8" type="checkbox"><label for="vabaq_u8"><div>uint8x16_t <b><b>vabaq_u8</b></b> (uint8x16_t a, uint8x16_t b, uint8x16_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_u16" type="checkbox"><label for="vaba_u16"><div>uint16x4_t <b><b>vaba_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_u16" type="checkbox"><label for="vabaq_u16"><div>uint16x8_t <b><b>vabaq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_u32" type="checkbox"><label for="vaba_u32"><div>uint32x2_t <b><b>vaba_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_u32" type="checkbox"><label for="vabaq_u32"><div>uint32x4_t <b><b>vabaq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_s8" type="checkbox"><label for="vabal_s8"><div>int16x8_t <b><b>vabal_s8</b></b> (int16x8_t a, int8x8_t b, int8x8_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_s16" type="checkbox"><label for="vabal_s16"><div>int32x4_t <b><b>vabal_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_s32" type="checkbox"><label for="vabal_s32"><div>int64x2_t <b><b>vabal_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_u8" type="checkbox"><label for="vabal_u8"><div>uint16x8_t <b><b>vabal_u8</b></b> (uint16x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_u16" type="checkbox"><label for="vabal_u16"><div>uint32x4_t <b><b>vabal_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_u32" type="checkbox"><label for="vabal_u32"><div>uint64x2_t <b><b>vabal_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_s8" type="checkbox"><label for="vabal_high_s8"><div>int16x8_t <b><b>vabal_high_s8</b></b> (int16x8_t a, int8x16_t b, int8x16_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_s16" type="checkbox"><label for="vabal_high_s16"><div>int32x4_t <b><b>vabal_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_s32" type="checkbox"><label for="vabal_high_s32"><div>int64x2_t <b><b>vabal_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_u8" type="checkbox"><label for="vabal_high_u8"><div>uint16x8_t <b><b>vabal_high_u8</b></b> (uint16x8_t a, uint8x16_t b, uint8x16_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_u16" type="checkbox"><label for="vabal_high_u16"><div>uint32x4_t <b><b>vabal_high_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_u32" type="checkbox"><label for="vabal_high_u32"><div>uint64x2_t <b><b>vabal_high_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmax_s8" type="checkbox"><label for="vmax_s8"><div>int8x8_t <b><b>vmax_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_s8" type="checkbox"><label for="vmaxq_s8"><div>int8x16_t <b><b>vmaxq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_s16" type="checkbox"><label for="vmax_s16"><div>int16x4_t <b><b>vmax_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_s16" type="checkbox"><label for="vmaxq_s16"><div>int16x8_t <b><b>vmaxq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_s32" type="checkbox"><label for="vmax_s32"><div>int32x2_t <b><b>vmax_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_s32" type="checkbox"><label for="vmaxq_s32"><div>int32x4_t <b><b>vmaxq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_u8" type="checkbox"><label for="vmax_u8"><div>uint8x8_t <b><b>vmax_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_u8" type="checkbox"><label for="vmaxq_u8"><div>uint8x16_t <b><b>vmaxq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_u16" type="checkbox"><label for="vmax_u16"><div>uint16x4_t <b><b>vmax_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_u16" type="checkbox"><label for="vmaxq_u16"><div>uint16x8_t <b><b>vmaxq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_u32" type="checkbox"><label for="vmax_u32"><div>uint32x2_t <b><b>vmax_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_u32" type="checkbox"><label for="vmaxq_u32"><div>uint32x4_t <b><b>vmaxq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_f32" type="checkbox"><label for="vmax_f32"><div>float32x2_t <b><b>vmax_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmax-vector-floating-point-maximum-vector">FMAX</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_f32" type="checkbox"><label for="vmaxq_f32"><div>float32x4_t <b><b>vmaxq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmax-vector-floating-point-maximum-vector">FMAX</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_f64" type="checkbox"><label for="vmax_f64"><div>float64x1_t <b><b>vmax_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmax-vector-floating-point-maximum-vector">FMAX</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_f64" type="checkbox"><label for="vmaxq_f64"><div>float64x2_t <b><b>vmaxq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmax-vector-floating-point-maximum-vector">FMAX</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmin_s8" type="checkbox"><label for="vmin_s8"><div>int8x8_t <b><b>vmin_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_s8" type="checkbox"><label for="vminq_s8"><div>int8x16_t <b><b>vminq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_s16" type="checkbox"><label for="vmin_s16"><div>int16x4_t <b><b>vmin_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_s16" type="checkbox"><label for="vminq_s16"><div>int16x8_t <b><b>vminq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_s32" type="checkbox"><label for="vmin_s32"><div>int32x2_t <b><b>vmin_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_s32" type="checkbox"><label for="vminq_s32"><div>int32x4_t <b><b>vminq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_u8" type="checkbox"><label for="vmin_u8"><div>uint8x8_t <b><b>vmin_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_u8" type="checkbox"><label for="vminq_u8"><div>uint8x16_t <b><b>vminq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_u16" type="checkbox"><label for="vmin_u16"><div>uint16x4_t <b><b>vmin_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_u16" type="checkbox"><label for="vminq_u16"><div>uint16x8_t <b><b>vminq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_u32" type="checkbox"><label for="vmin_u32"><div>uint32x2_t <b><b>vmin_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_u32" type="checkbox"><label for="vminq_u32"><div>uint32x4_t <b><b>vminq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_f32" type="checkbox"><label for="vmin_f32"><div>float32x2_t <b><b>vmin_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmin-vector-floating-point-minimum-vector">FMIN</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_f32" type="checkbox"><label for="vminq_f32"><div>float32x4_t <b><b>vminq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmin-vector-floating-point-minimum-vector">FMIN</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_f64" type="checkbox"><label for="vmin_f64"><div>float64x1_t <b><b>vmin_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmin-vector-floating-point-minimum-vector">FMIN</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminq_f64" type="checkbox"><label for="vminq_f64"><div>float64x2_t <b><b>vminq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmin-vector-floating-point-minimum-vector">FMIN</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnm_f32" type="checkbox"><label for="vmaxnm_f32"><div>float32x2_t <b><b>vmaxnm_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point maximum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnm-vector-floating-point-maximum-number-vector">FMAXNM</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmq_f32" type="checkbox"><label for="vmaxnmq_f32"><div>float32x4_t <b><b>vmaxnmq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point maximum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnm-vector-floating-point-maximum-number-vector">FMAXNM</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnm_f64" type="checkbox"><label for="vmaxnm_f64"><div>float64x1_t <b><b>vmaxnm_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point maximum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnm-vector-floating-point-maximum-number-vector">FMAXNM</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmq_f64" type="checkbox"><label for="vmaxnmq_f64"><div>float64x2_t <b><b>vmaxnmq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point maximum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnm-vector-floating-point-maximum-number-vector">FMAXNM</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnm_f32" type="checkbox"><label for="vminnm_f32"><div>float32x2_t <b><b>vminnm_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point minimum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnm-vector-floating-point-minimum-number-vector">FMINNM</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminnmq_f32" type="checkbox"><label for="vminnmq_f32"><div>float32x4_t <b><b>vminnmq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point minimum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnm-vector-floating-point-minimum-number-vector">FMINNM</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminnm_f64" type="checkbox"><label for="vminnm_f64"><div>float64x1_t <b><b>vminnm_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point minimum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnm-vector-floating-point-minimum-number-vector">FMINNM</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnmq_f64" type="checkbox"><label for="vminnmq_f64"><div>float64x2_t <b><b>vminnmq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point minimum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnm-vector-floating-point-minimum-number-vector">FMINNM</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshl_s8" type="checkbox"><label for="vshl_s8"><div>int8x8_t <b><b>vshl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_s8" type="checkbox"><label for="vshlq_s8"><div>int8x16_t <b><b>vshlq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_s16" type="checkbox"><label for="vshl_s16"><div>int16x4_t <b><b>vshl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_s16" type="checkbox"><label for="vshlq_s16"><div>int16x8_t <b><b>vshlq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_s32" type="checkbox"><label for="vshl_s32"><div>int32x2_t <b><b>vshl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_s32" type="checkbox"><label for="vshlq_s32"><div>int32x4_t <b><b>vshlq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_s64" type="checkbox"><label for="vshl_s64"><div>int64x1_t <b><b>vshl_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_s64" type="checkbox"><label for="vshlq_s64"><div>int64x2_t <b><b>vshlq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_u8" type="checkbox"><label for="vshl_u8"><div>uint8x8_t <b><b>vshl_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_u8" type="checkbox"><label for="vshlq_u8"><div>uint8x16_t <b><b>vshlq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_u16" type="checkbox"><label for="vshl_u16"><div>uint16x4_t <b><b>vshl_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_u16" type="checkbox"><label for="vshlq_u16"><div>uint16x8_t <b><b>vshlq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_u32" type="checkbox"><label for="vshl_u32"><div>uint32x2_t <b><b>vshl_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_u32" type="checkbox"><label for="vshlq_u32"><div>uint32x4_t <b><b>vshlq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_u64" type="checkbox"><label for="vshl_u64"><div>uint64x1_t <b><b>vshl_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_u64" type="checkbox"><label for="vshlq_u64"><div>uint64x2_t <b><b>vshlq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshld_s64" type="checkbox"><label for="vshld_s64"><div>int64_t <b><b>vshld_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshld_u64" type="checkbox"><label for="vshld_u64"><div>uint64_t <b><b>vshld_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_s8" type="checkbox"><label for="vqshl_s8"><div>int8x8_t <b><b>vqshl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_s8" type="checkbox"><label for="vqshlq_s8"><div>int8x16_t <b><b>vqshlq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_s16" type="checkbox"><label for="vqshl_s16"><div>int16x4_t <b><b>vqshl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_s16" type="checkbox"><label for="vqshlq_s16"><div>int16x8_t <b><b>vqshlq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_s32" type="checkbox"><label for="vqshl_s32"><div>int32x2_t <b><b>vqshl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_s32" type="checkbox"><label for="vqshlq_s32"><div>int32x4_t <b><b>vqshlq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_s64" type="checkbox"><label for="vqshl_s64"><div>int64x1_t <b><b>vqshl_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_s64" type="checkbox"><label for="vqshlq_s64"><div>int64x2_t <b><b>vqshlq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_u8" type="checkbox"><label for="vqshl_u8"><div>uint8x8_t <b><b>vqshl_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_u8" type="checkbox"><label for="vqshlq_u8"><div>uint8x16_t <b><b>vqshlq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_u16" type="checkbox"><label for="vqshl_u16"><div>uint16x4_t <b><b>vqshl_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_u16" type="checkbox"><label for="vqshlq_u16"><div>uint16x8_t <b><b>vqshlq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_u32" type="checkbox"><label for="vqshl_u32"><div>uint32x2_t <b><b>vqshl_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_u32" type="checkbox"><label for="vqshlq_u32"><div>uint32x4_t <b><b>vqshlq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_u64" type="checkbox"><label for="vqshl_u64"><div>uint64x1_t <b><b>vqshl_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_u64" type="checkbox"><label for="vqshlq_u64"><div>uint64x2_t <b><b>vqshlq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlb_s8" type="checkbox"><label for="vqshlb_s8"><div>int8_t <b><b>vqshlb_s8</b></b> (int8_t a, int8_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlh_s16" type="checkbox"><label for="vqshlh_s16"><div>int16_t <b><b>vqshlh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshls_s32" type="checkbox"><label for="vqshls_s32"><div>int32_t <b><b>vqshls_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshld_s64" type="checkbox"><label for="vqshld_s64"><div>int64_t <b><b>vqshld_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlb_u8" type="checkbox"><label for="vqshlb_u8"><div>uint8_t <b><b>vqshlb_u8</b></b> (uint8_t a, int8_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlh_u16" type="checkbox"><label for="vqshlh_u16"><div>uint16_t <b><b>vqshlh_u16</b></b> (uint16_t a, int16_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshls_u32" type="checkbox"><label for="vqshls_u32"><div>uint32_t <b><b>vqshls_u32</b></b> (uint32_t a, int32_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshld_u64" type="checkbox"><label for="vqshld_u64"><div>uint64_t <b><b>vqshld_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_s8" type="checkbox"><label for="vrshl_s8"><div>int8x8_t <b><b>vrshl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_s8" type="checkbox"><label for="vrshlq_s8"><div>int8x16_t <b><b>vrshlq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_s16" type="checkbox"><label for="vrshl_s16"><div>int16x4_t <b><b>vrshl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_s16" type="checkbox"><label for="vrshlq_s16"><div>int16x8_t <b><b>vrshlq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_s32" type="checkbox"><label for="vrshl_s32"><div>int32x2_t <b><b>vrshl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_s32" type="checkbox"><label for="vrshlq_s32"><div>int32x4_t <b><b>vrshlq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_s64" type="checkbox"><label for="vrshl_s64"><div>int64x1_t <b><b>vrshl_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_s64" type="checkbox"><label for="vrshlq_s64"><div>int64x2_t <b><b>vrshlq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_u8" type="checkbox"><label for="vrshl_u8"><div>uint8x8_t <b><b>vrshl_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_u8" type="checkbox"><label for="vrshlq_u8"><div>uint8x16_t <b><b>vrshlq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_u16" type="checkbox"><label for="vrshl_u16"><div>uint16x4_t <b><b>vrshl_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_u16" type="checkbox"><label for="vrshlq_u16"><div>uint16x8_t <b><b>vrshlq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_u32" type="checkbox"><label for="vrshl_u32"><div>uint32x2_t <b><b>vrshl_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_u32" type="checkbox"><label for="vrshlq_u32"><div>uint32x4_t <b><b>vrshlq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_u64" type="checkbox"><label for="vrshl_u64"><div>uint64x1_t <b><b>vrshl_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_u64" type="checkbox"><label for="vrshlq_u64"><div>uint64x2_t <b><b>vrshlq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshld_s64" type="checkbox"><label for="vrshld_s64"><div>int64_t <b><b>vrshld_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshld_u64" type="checkbox"><label for="vrshld_u64"><div>uint64_t <b><b>vrshld_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_s8" type="checkbox"><label for="vqrshl_s8"><div>int8x8_t <b><b>vqrshl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_s8" type="checkbox"><label for="vqrshlq_s8"><div>int8x16_t <b><b>vqrshlq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_s16" type="checkbox"><label for="vqrshl_s16"><div>int16x4_t <b><b>vqrshl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_s16" type="checkbox"><label for="vqrshlq_s16"><div>int16x8_t <b><b>vqrshlq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_s32" type="checkbox"><label for="vqrshl_s32"><div>int32x2_t <b><b>vqrshl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_s32" type="checkbox"><label for="vqrshlq_s32"><div>int32x4_t <b><b>vqrshlq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_s64" type="checkbox"><label for="vqrshl_s64"><div>int64x1_t <b><b>vqrshl_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_s64" type="checkbox"><label for="vqrshlq_s64"><div>int64x2_t <b><b>vqrshlq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_u8" type="checkbox"><label for="vqrshl_u8"><div>uint8x8_t <b><b>vqrshl_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_u8" type="checkbox"><label for="vqrshlq_u8"><div>uint8x16_t <b><b>vqrshlq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_u16" type="checkbox"><label for="vqrshl_u16"><div>uint16x4_t <b><b>vqrshl_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_u16" type="checkbox"><label for="vqrshlq_u16"><div>uint16x8_t <b><b>vqrshlq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_u32" type="checkbox"><label for="vqrshl_u32"><div>uint32x2_t <b><b>vqrshl_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_u32" type="checkbox"><label for="vqrshlq_u32"><div>uint32x4_t <b><b>vqrshlq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_u64" type="checkbox"><label for="vqrshl_u64"><div>uint64x1_t <b><b>vqrshl_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_u64" type="checkbox"><label for="vqrshlq_u64"><div>uint64x2_t <b><b>vqrshlq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlb_s8" type="checkbox"><label for="vqrshlb_s8"><div>int8_t <b><b>vqrshlb_s8</b></b> (int8_t a, int8_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlh_s16" type="checkbox"><label for="vqrshlh_s16"><div>int16_t <b><b>vqrshlh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshls_s32" type="checkbox"><label for="vqrshls_s32"><div>int32_t <b><b>vqrshls_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshld_s64" type="checkbox"><label for="vqrshld_s64"><div>int64_t <b><b>vqrshld_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlb_u8" type="checkbox"><label for="vqrshlb_u8"><div>uint8_t <b><b>vqrshlb_u8</b></b> (uint8_t a, int8_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlh_u16" type="checkbox"><label for="vqrshlh_u16"><div>uint16_t <b><b>vqrshlh_u16</b></b> (uint16_t a, int16_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshls_u32" type="checkbox"><label for="vqrshls_u32"><div>uint32_t <b><b>vqrshls_u32</b></b> (uint32_t a, int32_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshld_u64" type="checkbox"><label for="vqrshld_u64"><div>uint64_t <b><b>vqrshld_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_s8" type="checkbox"><label for="vshr_n_s8"><div>int8x8_t <b><b>vshr_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_s8" type="checkbox"><label for="vshrq_n_s8"><div>int8x16_t <b><b>vshrq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_s16" type="checkbox"><label for="vshr_n_s16"><div>int16x4_t <b><b>vshr_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_s16" type="checkbox"><label for="vshrq_n_s16"><div>int16x8_t <b><b>vshrq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_s32" type="checkbox"><label for="vshr_n_s32"><div>int32x2_t <b><b>vshr_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_s32" type="checkbox"><label for="vshrq_n_s32"><div>int32x4_t <b><b>vshrq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_s64" type="checkbox"><label for="vshr_n_s64"><div>int64x1_t <b><b>vshr_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_s64" type="checkbox"><label for="vshrq_n_s64"><div>int64x2_t <b><b>vshrq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_u8" type="checkbox"><label for="vshr_n_u8"><div>uint8x8_t <b><b>vshr_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_u8" type="checkbox"><label for="vshrq_n_u8"><div>uint8x16_t <b><b>vshrq_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_u16" type="checkbox"><label for="vshr_n_u16"><div>uint16x4_t <b><b>vshr_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_u16" type="checkbox"><label for="vshrq_n_u16"><div>uint16x8_t <b><b>vshrq_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_u32" type="checkbox"><label for="vshr_n_u32"><div>uint32x2_t <b><b>vshr_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_u32" type="checkbox"><label for="vshrq_n_u32"><div>uint32x4_t <b><b>vshrq_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_u64" type="checkbox"><label for="vshr_n_u64"><div>uint64x1_t <b><b>vshr_n_u64</b></b> (uint64x1_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_u64" type="checkbox"><label for="vshrq_n_u64"><div>uint64x2_t <b><b>vshrq_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrd_n_s64" type="checkbox"><label for="vshrd_n_s64"><div>int64_t <b><b>vshrd_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrd_n_u64" type="checkbox"><label for="vshrd_n_u64"><div>uint64_t <b><b>vshrd_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_s8" type="checkbox"><label for="vshl_n_s8"><div>int8x8_t <b><b>vshl_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_s8" type="checkbox"><label for="vshlq_n_s8"><div>int8x16_t <b><b>vshlq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_s16" type="checkbox"><label for="vshl_n_s16"><div>int16x4_t <b><b>vshl_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_s16" type="checkbox"><label for="vshlq_n_s16"><div>int16x8_t <b><b>vshlq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_s32" type="checkbox"><label for="vshl_n_s32"><div>int32x2_t <b><b>vshl_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_s32" type="checkbox"><label for="vshlq_n_s32"><div>int32x4_t <b><b>vshlq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_s64" type="checkbox"><label for="vshl_n_s64"><div>int64x1_t <b><b>vshl_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_s64" type="checkbox"><label for="vshlq_n_s64"><div>int64x2_t <b><b>vshlq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_u8" type="checkbox"><label for="vshl_n_u8"><div>uint8x8_t <b><b>vshl_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_u8" type="checkbox"><label for="vshlq_n_u8"><div>uint8x16_t <b><b>vshlq_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_u16" type="checkbox"><label for="vshl_n_u16"><div>uint16x4_t <b><b>vshl_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_u16" type="checkbox"><label for="vshlq_n_u16"><div>uint16x8_t <b><b>vshlq_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_u32" type="checkbox"><label for="vshl_n_u32"><div>uint32x2_t <b><b>vshl_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_u32" type="checkbox"><label for="vshlq_n_u32"><div>uint32x4_t <b><b>vshlq_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_u64" type="checkbox"><label for="vshl_n_u64"><div>uint64x1_t <b><b>vshl_n_u64</b></b> (uint64x1_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_u64" type="checkbox"><label for="vshlq_n_u64"><div>uint64x2_t <b><b>vshlq_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshld_n_s64" type="checkbox"><label for="vshld_n_s64"><div>int64_t <b><b>vshld_n_s64</b></b> (int64_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshld_n_u64" type="checkbox"><label for="vshld_n_u64"><div>uint64_t <b><b>vshld_n_u64</b></b> (uint64_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_s8" type="checkbox"><label for="vrshr_n_s8"><div>int8x8_t <b><b>vrshr_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_s8" type="checkbox"><label for="vrshrq_n_s8"><div>int8x16_t <b><b>vrshrq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_s16" type="checkbox"><label for="vrshr_n_s16"><div>int16x4_t <b><b>vrshr_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_s16" type="checkbox"><label for="vrshrq_n_s16"><div>int16x8_t <b><b>vrshrq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_s32" type="checkbox"><label for="vrshr_n_s32"><div>int32x2_t <b><b>vrshr_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_s32" type="checkbox"><label for="vrshrq_n_s32"><div>int32x4_t <b><b>vrshrq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_s64" type="checkbox"><label for="vrshr_n_s64"><div>int64x1_t <b><b>vrshr_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_s64" type="checkbox"><label for="vrshrq_n_s64"><div>int64x2_t <b><b>vrshrq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_u8" type="checkbox"><label for="vrshr_n_u8"><div>uint8x8_t <b><b>vrshr_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_u8" type="checkbox"><label for="vrshrq_n_u8"><div>uint8x16_t <b><b>vrshrq_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_u16" type="checkbox"><label for="vrshr_n_u16"><div>uint16x4_t <b><b>vrshr_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_u16" type="checkbox"><label for="vrshrq_n_u16"><div>uint16x8_t <b><b>vrshrq_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_u32" type="checkbox"><label for="vrshr_n_u32"><div>uint32x2_t <b><b>vrshr_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_u32" type="checkbox"><label for="vrshrq_n_u32"><div>uint32x4_t <b><b>vrshrq_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_u64" type="checkbox"><label for="vrshr_n_u64"><div>uint64x1_t <b><b>vrshr_n_u64</b></b> (uint64x1_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_u64" type="checkbox"><label for="vrshrq_n_u64"><div>uint64x2_t <b><b>vrshrq_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrd_n_s64" type="checkbox"><label for="vrshrd_n_s64"><div>int64_t <b><b>vrshrd_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrd_n_u64" type="checkbox"><label for="vrshrd_n_u64"><div>uint64_t <b><b>vrshrd_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_s8" type="checkbox"><label for="vsra_n_s8"><div>int8x8_t <b><b>vsra_n_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_s8" type="checkbox"><label for="vsraq_n_s8"><div>int8x16_t <b><b>vsraq_n_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_s16" type="checkbox"><label for="vsra_n_s16"><div>int16x4_t <b><b>vsra_n_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_s16" type="checkbox"><label for="vsraq_n_s16"><div>int16x8_t <b><b>vsraq_n_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_s32" type="checkbox"><label for="vsra_n_s32"><div>int32x2_t <b><b>vsra_n_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_s32" type="checkbox"><label for="vsraq_n_s32"><div>int32x4_t <b><b>vsraq_n_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_s64" type="checkbox"><label for="vsra_n_s64"><div>int64x1_t <b><b>vsra_n_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_s64" type="checkbox"><label for="vsraq_n_s64"><div>int64x2_t <b><b>vsraq_n_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_u8" type="checkbox"><label for="vsra_n_u8"><div>uint8x8_t <b><b>vsra_n_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_u8" type="checkbox"><label for="vsraq_n_u8"><div>uint8x16_t <b><b>vsraq_n_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_u16" type="checkbox"><label for="vsra_n_u16"><div>uint16x4_t <b><b>vsra_n_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_u16" type="checkbox"><label for="vsraq_n_u16"><div>uint16x8_t <b><b>vsraq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_u32" type="checkbox"><label for="vsra_n_u32"><div>uint32x2_t <b><b>vsra_n_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_u32" type="checkbox"><label for="vsraq_n_u32"><div>uint32x4_t <b><b>vsraq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_u64" type="checkbox"><label for="vsra_n_u64"><div>uint64x1_t <b><b>vsra_n_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_u64" type="checkbox"><label for="vsraq_n_u64"><div>uint64x2_t <b><b>vsraq_n_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsrad_n_s64" type="checkbox"><label for="vsrad_n_s64"><div>int64_t <b><b>vsrad_n_s64</b></b> (int64_t a, int64_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsrad_n_u64" type="checkbox"><label for="vsrad_n_u64"><div>uint64_t <b><b>vsrad_n_u64</b></b> (uint64_t a, uint64_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_s8" type="checkbox"><label for="vrsra_n_s8"><div>int8x8_t <b><b>vrsra_n_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_s8" type="checkbox"><label for="vrsraq_n_s8"><div>int8x16_t <b><b>vrsraq_n_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_s16" type="checkbox"><label for="vrsra_n_s16"><div>int16x4_t <b><b>vrsra_n_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_s16" type="checkbox"><label for="vrsraq_n_s16"><div>int16x8_t <b><b>vrsraq_n_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_s32" type="checkbox"><label for="vrsra_n_s32"><div>int32x2_t <b><b>vrsra_n_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_s32" type="checkbox"><label for="vrsraq_n_s32"><div>int32x4_t <b><b>vrsraq_n_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_s64" type="checkbox"><label for="vrsra_n_s64"><div>int64x1_t <b><b>vrsra_n_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_s64" type="checkbox"><label for="vrsraq_n_s64"><div>int64x2_t <b><b>vrsraq_n_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_u8" type="checkbox"><label for="vrsra_n_u8"><div>uint8x8_t <b><b>vrsra_n_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_u8" type="checkbox"><label for="vrsraq_n_u8"><div>uint8x16_t <b><b>vrsraq_n_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_u16" type="checkbox"><label for="vrsra_n_u16"><div>uint16x4_t <b><b>vrsra_n_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_u16" type="checkbox"><label for="vrsraq_n_u16"><div>uint16x8_t <b><b>vrsraq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_u32" type="checkbox"><label for="vrsra_n_u32"><div>uint32x2_t <b><b>vrsra_n_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_u32" type="checkbox"><label for="vrsraq_n_u32"><div>uint32x4_t <b><b>vrsraq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_u64" type="checkbox"><label for="vrsra_n_u64"><div>uint64x1_t <b><b>vrsra_n_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_u64" type="checkbox"><label for="vrsraq_n_u64"><div>uint64x2_t <b><b>vrsraq_n_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsrad_n_s64" type="checkbox"><label for="vrsrad_n_s64"><div>int64_t <b><b>vrsrad_n_s64</b></b> (int64_t a, int64_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsrad_n_u64" type="checkbox"><label for="vrsrad_n_u64"><div>uint64_t <b><b>vrsrad_n_u64</b></b> (uint64_t a, uint64_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_s8" type="checkbox"><label for="vqshl_n_s8"><div>int8x8_t <b><b>vqshl_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_s8" type="checkbox"><label for="vqshlq_n_s8"><div>int8x16_t <b><b>vqshlq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_s16" type="checkbox"><label for="vqshl_n_s16"><div>int16x4_t <b><b>vqshl_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_s16" type="checkbox"><label for="vqshlq_n_s16"><div>int16x8_t <b><b>vqshlq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_s32" type="checkbox"><label for="vqshl_n_s32"><div>int32x2_t <b><b>vqshl_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_s32" type="checkbox"><label for="vqshlq_n_s32"><div>int32x4_t <b><b>vqshlq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_s64" type="checkbox"><label for="vqshl_n_s64"><div>int64x1_t <b><b>vqshl_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_s64" type="checkbox"><label for="vqshlq_n_s64"><div>int64x2_t <b><b>vqshlq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_u8" type="checkbox"><label for="vqshl_n_u8"><div>uint8x8_t <b><b>vqshl_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_u8" type="checkbox"><label for="vqshlq_n_u8"><div>uint8x16_t <b><b>vqshlq_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_u16" type="checkbox"><label for="vqshl_n_u16"><div>uint16x4_t <b><b>vqshl_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_u16" type="checkbox"><label for="vqshlq_n_u16"><div>uint16x8_t <b><b>vqshlq_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_u32" type="checkbox"><label for="vqshl_n_u32"><div>uint32x2_t <b><b>vqshl_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_u32" type="checkbox"><label for="vqshlq_n_u32"><div>uint32x4_t <b><b>vqshlq_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_u64" type="checkbox"><label for="vqshl_n_u64"><div>uint64x1_t <b><b>vqshl_n_u64</b></b> (uint64x1_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_u64" type="checkbox"><label for="vqshlq_n_u64"><div>uint64x2_t <b><b>vqshlq_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlb_n_s8" type="checkbox"><label for="vqshlb_n_s8"><div>int8_t <b><b>vqshlb_n_s8</b></b> (int8_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Bd,Bn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlh_n_s16" type="checkbox"><label for="vqshlh_n_s16"><div>int16_t <b><b>vqshlh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Hd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshls_n_s32" type="checkbox"><label for="vqshls_n_s32"><div>int32_t <b><b>vqshls_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshld_n_s64" type="checkbox"><label for="vqshld_n_s64"><div>int64_t <b><b>vqshld_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlb_n_u8" type="checkbox"><label for="vqshlb_n_u8"><div>uint8_t <b><b>vqshlb_n_u8</b></b> (uint8_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Bd,Bn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlh_n_u16" type="checkbox"><label for="vqshlh_n_u16"><div>uint16_t <b><b>vqshlh_n_u16</b></b> (uint16_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Hd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshls_n_u32" type="checkbox"><label for="vqshls_n_u32"><div>uint32_t <b><b>vqshls_n_u32</b></b> (uint32_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshld_n_u64" type="checkbox"><label for="vqshld_n_u64"><div>uint64_t <b><b>vqshld_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlu_n_s8" type="checkbox"><label for="vqshlu_n_s8"><div>uint8x8_t <b><b>vqshlu_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshluq_n_s8" type="checkbox"><label for="vqshluq_n_s8"><div>uint8x16_t <b><b>vqshluq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlu_n_s16" type="checkbox"><label for="vqshlu_n_s16"><div>uint16x4_t <b><b>vqshlu_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshluq_n_s16" type="checkbox"><label for="vqshluq_n_s16"><div>uint16x8_t <b><b>vqshluq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlu_n_s32" type="checkbox"><label for="vqshlu_n_s32"><div>uint32x2_t <b><b>vqshlu_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshluq_n_s32" type="checkbox"><label for="vqshluq_n_s32"><div>uint32x4_t <b><b>vqshluq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlu_n_s64" type="checkbox"><label for="vqshlu_n_s64"><div>uint64x1_t <b><b>vqshlu_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshluq_n_s64" type="checkbox"><label for="vqshluq_n_s64"><div>uint64x2_t <b><b>vqshluq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlub_n_s8" type="checkbox"><label for="vqshlub_n_s8"><div>uint8_t <b><b>vqshlub_n_s8</b></b> (int8_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Bd,Bn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshluh_n_s16" type="checkbox"><label for="vqshluh_n_s16"><div>uint16_t <b><b>vqshluh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Hd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlus_n_s32" type="checkbox"><label for="vqshlus_n_s32"><div>uint32_t <b><b>vqshlus_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlud_n_s64" type="checkbox"><label for="vqshlud_n_s64"><div>uint64_t <b><b>vqshlud_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_s16" type="checkbox"><label for="vshrn_n_s16"><div>int8x8_t <b><b>vshrn_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_s32" type="checkbox"><label for="vshrn_n_s32"><div>int16x4_t <b><b>vshrn_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_s64" type="checkbox"><label for="vshrn_n_s64"><div>int32x2_t <b><b>vshrn_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_u16" type="checkbox"><label for="vshrn_n_u16"><div>uint8x8_t <b><b>vshrn_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_u32" type="checkbox"><label for="vshrn_n_u32"><div>uint16x4_t <b><b>vshrn_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_u64" type="checkbox"><label for="vshrn_n_u64"><div>uint32x2_t <b><b>vshrn_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_s16" type="checkbox"><label for="vshrn_high_n_s16"><div>int8x16_t <b><b>vshrn_high_n_s16</b></b> (int8x8_t r, int16x8_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_s32" type="checkbox"><label for="vshrn_high_n_s32"><div>int16x8_t <b><b>vshrn_high_n_s32</b></b> (int16x4_t r, int32x4_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_s64" type="checkbox"><label for="vshrn_high_n_s64"><div>int32x4_t <b><b>vshrn_high_n_s64</b></b> (int32x2_t r, int64x2_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_u16" type="checkbox"><label for="vshrn_high_n_u16"><div>uint8x16_t <b><b>vshrn_high_n_u16</b></b> (uint8x8_t r, uint16x8_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_u32" type="checkbox"><label for="vshrn_high_n_u32"><div>uint16x8_t <b><b>vshrn_high_n_u32</b></b> (uint16x4_t r, uint32x4_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_u64" type="checkbox"><label for="vshrn_high_n_u64"><div>uint32x4_t <b><b>vshrn_high_n_u64</b></b> (uint32x2_t r, uint64x2_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_n_s16" type="checkbox"><label for="vqshrun_n_s16"><div>uint8x8_t <b><b>vqshrun_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_n_s32" type="checkbox"><label for="vqshrun_n_s32"><div>uint16x4_t <b><b>vqshrun_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_n_s64" type="checkbox"><label for="vqshrun_n_s64"><div>uint32x2_t <b><b>vqshrun_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrunh_n_s16" type="checkbox"><label for="vqshrunh_n_s16"><div>uint8_t <b><b>vqshrunh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Bd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshruns_n_s32" type="checkbox"><label for="vqshruns_n_s32"><div>uint16_t <b><b>vqshruns_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Hd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrund_n_s64" type="checkbox"><label for="vqshrund_n_s64"><div>uint32_t <b><b>vqshrund_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Sd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_high_n_s16" type="checkbox"><label for="vqshrun_high_n_s16"><div>uint8x16_t <b><b>vqshrun_high_n_s16</b></b> (uint8x8_t r, int16x8_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_high_n_s32" type="checkbox"><label for="vqshrun_high_n_s32"><div>uint16x8_t <b><b>vqshrun_high_n_s32</b></b> (uint16x4_t r, int32x4_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_high_n_s64" type="checkbox"><label for="vqshrun_high_n_s64"><div>uint32x4_t <b><b>vqshrun_high_n_s64</b></b> (uint32x2_t r, int64x2_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_n_s16" type="checkbox"><label for="vqrshrun_n_s16"><div>uint8x8_t <b><b>vqrshrun_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_n_s32" type="checkbox"><label for="vqrshrun_n_s32"><div>uint16x4_t <b><b>vqrshrun_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_n_s64" type="checkbox"><label for="vqrshrun_n_s64"><div>uint32x2_t <b><b>vqrshrun_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrunh_n_s16" type="checkbox"><label for="vqrshrunh_n_s16"><div>uint8_t <b><b>vqrshrunh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Bd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshruns_n_s32" type="checkbox"><label for="vqrshruns_n_s32"><div>uint16_t <b><b>vqrshruns_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Hd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrund_n_s64" type="checkbox"><label for="vqrshrund_n_s64"><div>uint32_t <b><b>vqrshrund_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Sd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_high_n_s16" type="checkbox"><label for="vqrshrun_high_n_s16"><div>uint8x16_t <b><b>vqrshrun_high_n_s16</b></b> (uint8x8_t r, int16x8_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_high_n_s32" type="checkbox"><label for="vqrshrun_high_n_s32"><div>uint16x8_t <b><b>vqrshrun_high_n_s32</b></b> (uint16x4_t r, int32x4_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_high_n_s64" type="checkbox"><label for="vqrshrun_high_n_s64"><div>uint32x4_t <b><b>vqrshrun_high_n_s64</b></b> (uint32x2_t r, int64x2_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_s16" type="checkbox"><label for="vqshrn_n_s16"><div>int8x8_t <b><b>vqshrn_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_s32" type="checkbox"><label for="vqshrn_n_s32"><div>int16x4_t <b><b>vqshrn_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_s64" type="checkbox"><label for="vqshrn_n_s64"><div>int32x2_t <b><b>vqshrn_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_u16" type="checkbox"><label for="vqshrn_n_u16"><div>uint8x8_t <b><b>vqshrn_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_u32" type="checkbox"><label for="vqshrn_n_u32"><div>uint16x4_t <b><b>vqshrn_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_u64" type="checkbox"><label for="vqshrn_n_u64"><div>uint32x2_t <b><b>vqshrn_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrnh_n_s16" type="checkbox"><label for="vqshrnh_n_s16"><div>int8_t <b><b>vqshrnh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Bd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrns_n_s32" type="checkbox"><label for="vqshrns_n_s32"><div>int16_t <b><b>vqshrns_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Hd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrnd_n_s64" type="checkbox"><label for="vqshrnd_n_s64"><div>int32_t <b><b>vqshrnd_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Sd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrnh_n_u16" type="checkbox"><label for="vqshrnh_n_u16"><div>uint8_t <b><b>vqshrnh_n_u16</b></b> (uint16_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Bd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrns_n_u32" type="checkbox"><label for="vqshrns_n_u32"><div>uint16_t <b><b>vqshrns_n_u32</b></b> (uint32_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Hd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrnd_n_u64" type="checkbox"><label for="vqshrnd_n_u64"><div>uint32_t <b><b>vqshrnd_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Sd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_s16" type="checkbox"><label for="vqshrn_high_n_s16"><div>int8x16_t <b><b>vqshrn_high_n_s16</b></b> (int8x8_t r, int16x8_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_s32" type="checkbox"><label for="vqshrn_high_n_s32"><div>int16x8_t <b><b>vqshrn_high_n_s32</b></b> (int16x4_t r, int32x4_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_s64" type="checkbox"><label for="vqshrn_high_n_s64"><div>int32x4_t <b><b>vqshrn_high_n_s64</b></b> (int32x2_t r, int64x2_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_u16" type="checkbox"><label for="vqshrn_high_n_u16"><div>uint8x16_t <b><b>vqshrn_high_n_u16</b></b> (uint8x8_t r, uint16x8_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_u32" type="checkbox"><label for="vqshrn_high_n_u32"><div>uint16x8_t <b><b>vqshrn_high_n_u32</b></b> (uint16x4_t r, uint32x4_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_u64" type="checkbox"><label for="vqshrn_high_n_u64"><div>uint32x4_t <b><b>vqshrn_high_n_u64</b></b> (uint32x2_t r, uint64x2_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_s16" type="checkbox"><label for="vrshrn_n_s16"><div>int8x8_t <b><b>vrshrn_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_s32" type="checkbox"><label for="vrshrn_n_s32"><div>int16x4_t <b><b>vrshrn_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_s64" type="checkbox"><label for="vrshrn_n_s64"><div>int32x2_t <b><b>vrshrn_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_u16" type="checkbox"><label for="vrshrn_n_u16"><div>uint8x8_t <b><b>vrshrn_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_u32" type="checkbox"><label for="vrshrn_n_u32"><div>uint16x4_t <b><b>vrshrn_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_u64" type="checkbox"><label for="vrshrn_n_u64"><div>uint32x2_t <b><b>vrshrn_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_s16" type="checkbox"><label for="vrshrn_high_n_s16"><div>int8x16_t <b><b>vrshrn_high_n_s16</b></b> (int8x8_t r, int16x8_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_s32" type="checkbox"><label for="vrshrn_high_n_s32"><div>int16x8_t <b><b>vrshrn_high_n_s32</b></b> (int16x4_t r, int32x4_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_s64" type="checkbox"><label for="vrshrn_high_n_s64"><div>int32x4_t <b><b>vrshrn_high_n_s64</b></b> (int32x2_t r, int64x2_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_u16" type="checkbox"><label for="vrshrn_high_n_u16"><div>uint8x16_t <b><b>vrshrn_high_n_u16</b></b> (uint8x8_t r, uint16x8_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_u32" type="checkbox"><label for="vrshrn_high_n_u32"><div>uint16x8_t <b><b>vrshrn_high_n_u32</b></b> (uint16x4_t r, uint32x4_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_u64" type="checkbox"><label for="vrshrn_high_n_u64"><div>uint32x4_t <b><b>vrshrn_high_n_u64</b></b> (uint32x2_t r, uint64x2_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; 32(Vd) <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_s16" type="checkbox"><label for="vqrshrn_n_s16"><div>int8x8_t <b><b>vqrshrn_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_s32" type="checkbox"><label for="vqrshrn_n_s32"><div>int16x4_t <b><b>vqrshrn_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_s64" type="checkbox"><label for="vqrshrn_n_s64"><div>int32x2_t <b><b>vqrshrn_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_u16" type="checkbox"><label for="vqrshrn_n_u16"><div>uint8x8_t <b><b>vqrshrn_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_u32" type="checkbox"><label for="vqrshrn_n_u32"><div>uint16x4_t <b><b>vqrshrn_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_u64" type="checkbox"><label for="vqrshrn_n_u64"><div>uint32x2_t <b><b>vqrshrn_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrnh_n_s16" type="checkbox"><label for="vqrshrnh_n_s16"><div>int8_t <b><b>vqrshrnh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Bd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrns_n_s32" type="checkbox"><label for="vqrshrns_n_s32"><div>int16_t <b><b>vqrshrns_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Hd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrnd_n_s64" type="checkbox"><label for="vqrshrnd_n_s64"><div>int32_t <b><b>vqrshrnd_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Sd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrnh_n_u16" type="checkbox"><label for="vqrshrnh_n_u16"><div>uint8_t <b><b>vqrshrnh_n_u16</b></b> (uint16_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Bd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrns_n_u32" type="checkbox"><label for="vqrshrns_n_u32"><div>uint16_t <b><b>vqrshrns_n_u32</b></b> (uint32_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Hd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrnd_n_u64" type="checkbox"><label for="vqrshrnd_n_u64"><div>uint32_t <b><b>vqrshrnd_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Sd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_s16" type="checkbox"><label for="vqrshrn_high_n_s16"><div>int8x16_t <b><b>vqrshrn_high_n_s16</b></b> (int8x8_t r, int16x8_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_s32" type="checkbox"><label for="vqrshrn_high_n_s32"><div>int16x8_t <b><b>vqrshrn_high_n_s32</b></b> (int16x4_t r, int32x4_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_s64" type="checkbox"><label for="vqrshrn_high_n_s64"><div>int32x4_t <b><b>vqrshrn_high_n_s64</b></b> (int32x2_t r, int64x2_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_u16" type="checkbox"><label for="vqrshrn_high_n_u16"><div>uint8x16_t <b><b>vqrshrn_high_n_u16</b></b> (uint8x8_t r, uint16x8_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_u32" type="checkbox"><label for="vqrshrn_high_n_u32"><div>uint16x8_t <b><b>vqrshrn_high_n_u32</b></b> (uint16x4_t r, uint32x4_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_u64" type="checkbox"><label for="vqrshrn_high_n_u64"><div>uint32x4_t <b><b>vqrshrn_high_n_u64</b></b> (uint32x2_t r, uint64x2_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s8" type="checkbox"><label for="vshll_n_s8"><div>int16x8_t <b><b>vshll_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.8H,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s16" type="checkbox"><label for="vshll_n_s16"><div>int32x4_t <b><b>vshll_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.4S,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s32" type="checkbox"><label for="vshll_n_s32"><div>int64x2_t <b><b>vshll_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.2D,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u8" type="checkbox"><label for="vshll_n_u8"><div>uint16x8_t <b><b>vshll_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.8H,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u16" type="checkbox"><label for="vshll_n_u16"><div>uint32x4_t <b><b>vshll_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.4S,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u32" type="checkbox"><label for="vshll_n_u32"><div>uint64x2_t <b><b>vshll_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.2D,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s8" type="checkbox"><label for="vshll_high_n_s8"><div>int16x8_t <b><b>vshll_high_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.8H,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s16" type="checkbox"><label for="vshll_high_n_s16"><div>int32x4_t <b><b>vshll_high_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.4S,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s32" type="checkbox"><label for="vshll_high_n_s32"><div>int64x2_t <b><b>vshll_high_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.2D,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u8" type="checkbox"><label for="vshll_high_n_u8"><div>uint16x8_t <b><b>vshll_high_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.8H,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u16" type="checkbox"><label for="vshll_high_n_u16"><div>uint32x4_t <b><b>vshll_high_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.4S,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u32" type="checkbox"><label for="vshll_high_n_u32"><div>uint64x2_t <b><b>vshll_high_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.2D,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s8" type="checkbox"><label for="vshll_n_s8"><div>int16x8_t <b><b>vshll_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.8H,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+8 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s16" type="checkbox"><label for="vshll_n_s16"><div>int32x4_t <b><b>vshll_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.4S,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+16 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s32" type="checkbox"><label for="vshll_n_s32"><div>int64x2_t <b><b>vshll_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.2D,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+32 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u8" type="checkbox"><label for="vshll_n_u8"><div>uint16x8_t <b><b>vshll_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.8H,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+8 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u16" type="checkbox"><label for="vshll_n_u16"><div>uint32x4_t <b><b>vshll_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.4S,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+16 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u32" type="checkbox"><label for="vshll_n_u32"><div>uint64x2_t <b><b>vshll_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.2D,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+32 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s8" type="checkbox"><label for="vshll_high_n_s8"><div>int16x8_t <b><b>vshll_high_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.8H,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+8 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s16" type="checkbox"><label for="vshll_high_n_s16"><div>int32x4_t <b><b>vshll_high_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.4S,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+16 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s32" type="checkbox"><label for="vshll_high_n_s32"><div>int64x2_t <b><b>vshll_high_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.2D,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+32 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u8" type="checkbox"><label for="vshll_high_n_u8"><div>uint16x8_t <b><b>vshll_high_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.8H,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+8 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u16" type="checkbox"><label for="vshll_high_n_u16"><div>uint32x4_t <b><b>vshll_high_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.4S,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+16 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u32" type="checkbox"><label for="vshll_high_n_u32"><div>uint64x2_t <b><b>vshll_high_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.2D,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+32 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_s8" type="checkbox"><label for="vsri_n_s8"><div>int8x8_t <b><b>vsri_n_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_s8" type="checkbox"><label for="vsriq_n_s8"><div>int8x16_t <b><b>vsriq_n_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_s16" type="checkbox"><label for="vsri_n_s16"><div>int16x4_t <b><b>vsri_n_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_s16" type="checkbox"><label for="vsriq_n_s16"><div>int16x8_t <b><b>vsriq_n_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_s32" type="checkbox"><label for="vsri_n_s32"><div>int32x2_t <b><b>vsri_n_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_s32" type="checkbox"><label for="vsriq_n_s32"><div>int32x4_t <b><b>vsriq_n_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_s64" type="checkbox"><label for="vsri_n_s64"><div>int64x1_t <b><b>vsri_n_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_s64" type="checkbox"><label for="vsriq_n_s64"><div>int64x2_t <b><b>vsriq_n_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_u8" type="checkbox"><label for="vsri_n_u8"><div>uint8x8_t <b><b>vsri_n_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_u8" type="checkbox"><label for="vsriq_n_u8"><div>uint8x16_t <b><b>vsriq_n_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_u16" type="checkbox"><label for="vsri_n_u16"><div>uint16x4_t <b><b>vsri_n_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_u16" type="checkbox"><label for="vsriq_n_u16"><div>uint16x8_t <b><b>vsriq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_u32" type="checkbox"><label for="vsri_n_u32"><div>uint32x2_t <b><b>vsri_n_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_u32" type="checkbox"><label for="vsriq_n_u32"><div>uint32x4_t <b><b>vsriq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_u64" type="checkbox"><label for="vsri_n_u64"><div>uint64x1_t <b><b>vsri_n_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_u64" type="checkbox"><label for="vsriq_n_u64"><div>uint64x2_t <b><b>vsriq_n_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_p64" type="checkbox"><label for="vsri_n_p64"><div>poly64x1_t <b><b>vsri_n_p64</b></b> (poly64x1_t a, poly64x1_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_p64" type="checkbox"><label for="vsriq_n_p64"><div>poly64x2_t <b><b>vsriq_n_p64</b></b> (poly64x2_t a, poly64x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_p8" type="checkbox"><label for="vsri_n_p8"><div>poly8x8_t <b><b>vsri_n_p8</b></b> (poly8x8_t a, poly8x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_p8" type="checkbox"><label for="vsriq_n_p8"><div>poly8x16_t <b><b>vsriq_n_p8</b></b> (poly8x16_t a, poly8x16_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_p16" type="checkbox"><label for="vsri_n_p16"><div>poly16x4_t <b><b>vsri_n_p16</b></b> (poly16x4_t a, poly16x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_p16" type="checkbox"><label for="vsriq_n_p16"><div>poly16x8_t <b><b>vsriq_n_p16</b></b> (poly16x8_t a, poly16x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsrid_n_s64" type="checkbox"><label for="vsrid_n_s64"><div>int64_t <b><b>vsrid_n_s64</b></b> (int64_t a, int64_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsrid_n_u64" type="checkbox"><label for="vsrid_n_u64"><div>uint64_t <b><b>vsrid_n_u64</b></b> (uint64_t a, uint64_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_s8" type="checkbox"><label for="vsli_n_s8"><div>int8x8_t <b><b>vsli_n_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_s8" type="checkbox"><label for="vsliq_n_s8"><div>int8x16_t <b><b>vsliq_n_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_s16" type="checkbox"><label for="vsli_n_s16"><div>int16x4_t <b><b>vsli_n_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_s16" type="checkbox"><label for="vsliq_n_s16"><div>int16x8_t <b><b>vsliq_n_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_s32" type="checkbox"><label for="vsli_n_s32"><div>int32x2_t <b><b>vsli_n_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_s32" type="checkbox"><label for="vsliq_n_s32"><div>int32x4_t <b><b>vsliq_n_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_s64" type="checkbox"><label for="vsli_n_s64"><div>int64x1_t <b><b>vsli_n_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_s64" type="checkbox"><label for="vsliq_n_s64"><div>int64x2_t <b><b>vsliq_n_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_u8" type="checkbox"><label for="vsli_n_u8"><div>uint8x8_t <b><b>vsli_n_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_u8" type="checkbox"><label for="vsliq_n_u8"><div>uint8x16_t <b><b>vsliq_n_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_u16" type="checkbox"><label for="vsli_n_u16"><div>uint16x4_t <b><b>vsli_n_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_u16" type="checkbox"><label for="vsliq_n_u16"><div>uint16x8_t <b><b>vsliq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_u32" type="checkbox"><label for="vsli_n_u32"><div>uint32x2_t <b><b>vsli_n_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_u32" type="checkbox"><label for="vsliq_n_u32"><div>uint32x4_t <b><b>vsliq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_u64" type="checkbox"><label for="vsli_n_u64"><div>uint64x1_t <b><b>vsli_n_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_u64" type="checkbox"><label for="vsliq_n_u64"><div>uint64x2_t <b><b>vsliq_n_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_p64" type="checkbox"><label for="vsli_n_p64"><div>poly64x1_t <b><b>vsli_n_p64</b></b> (poly64x1_t a, poly64x1_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_p64" type="checkbox"><label for="vsliq_n_p64"><div>poly64x2_t <b><b>vsliq_n_p64</b></b> (poly64x2_t a, poly64x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_p8" type="checkbox"><label for="vsli_n_p8"><div>poly8x8_t <b><b>vsli_n_p8</b></b> (poly8x8_t a, poly8x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_p8" type="checkbox"><label for="vsliq_n_p8"><div>poly8x16_t <b><b>vsliq_n_p8</b></b> (poly8x16_t a, poly8x16_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_p16" type="checkbox"><label for="vsli_n_p16"><div>poly16x4_t <b><b>vsli_n_p16</b></b> (poly16x4_t a, poly16x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_p16" type="checkbox"><label for="vsliq_n_p16"><div>poly16x8_t <b><b>vsliq_n_p16</b></b> (poly16x8_t a, poly16x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vslid_n_s64" type="checkbox"><label for="vslid_n_s64"><div>int64_t <b><b>vslid_n_s64</b></b> (int64_t a, int64_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vslid_n_u64" type="checkbox"><label for="vslid_n_u64"><div>uint64_t <b><b>vslid_n_u64</b></b> (uint64_t a, uint64_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_s32_f32" type="checkbox"><label for="vcvt_s32_f32"><div>int32x2_t <b><b>vcvt_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_s32_f32" type="checkbox"><label for="vcvtq_s32_f32"><div>int32x4_t <b><b>vcvtq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_u32_f32" type="checkbox"><label for="vcvt_u32_f32"><div>uint32x2_t <b><b>vcvt_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_u32_f32" type="checkbox"><label for="vcvtq_u32_f32"><div>uint32x4_t <b><b>vcvtq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtn_s32_f32" type="checkbox"><label for="vcvtn_s32_f32"><div>int32x2_t <b><b>vcvtn_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnq_s32_f32" type="checkbox"><label for="vcvtnq_s32_f32"><div>int32x4_t <b><b>vcvtnq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtn_u32_f32" type="checkbox"><label for="vcvtn_u32_f32"><div>uint32x2_t <b><b>vcvtn_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnq_u32_f32" type="checkbox"><label for="vcvtnq_u32_f32"><div>uint32x4_t <b><b>vcvtnq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtm_s32_f32" type="checkbox"><label for="vcvtm_s32_f32"><div>int32x2_t <b><b>vcvtm_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmq_s32_f32" type="checkbox"><label for="vcvtmq_s32_f32"><div>int32x4_t <b><b>vcvtmq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtm_u32_f32" type="checkbox"><label for="vcvtm_u32_f32"><div>uint32x2_t <b><b>vcvtm_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmq_u32_f32" type="checkbox"><label for="vcvtmq_u32_f32"><div>uint32x4_t <b><b>vcvtmq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtp_s32_f32" type="checkbox"><label for="vcvtp_s32_f32"><div>int32x2_t <b><b>vcvtp_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpq_s32_f32" type="checkbox"><label for="vcvtpq_s32_f32"><div>int32x4_t <b><b>vcvtpq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtp_u32_f32" type="checkbox"><label for="vcvtp_u32_f32"><div>uint32x2_t <b><b>vcvtp_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpq_u32_f32" type="checkbox"><label for="vcvtpq_u32_f32"><div>uint32x4_t <b><b>vcvtpq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvta_s32_f32" type="checkbox"><label for="vcvta_s32_f32"><div>int32x2_t <b><b>vcvta_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtaq_s32_f32" type="checkbox"><label for="vcvtaq_s32_f32"><div>int32x4_t <b><b>vcvtaq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvta_u32_f32" type="checkbox"><label for="vcvta_u32_f32"><div>uint32x2_t <b><b>vcvta_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtaq_u32_f32" type="checkbox"><label for="vcvtaq_u32_f32"><div>uint32x4_t <b><b>vcvtaq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_s32_f32" type="checkbox"><label for="vcvts_s32_f32"><div>int32_t <b><b>vcvts_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_u32_f32" type="checkbox"><label for="vcvts_u32_f32"><div>uint32_t <b><b>vcvts_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtns_s32_f32" type="checkbox"><label for="vcvtns_s32_f32"><div>int32_t <b><b>vcvtns_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtns_u32_f32" type="checkbox"><label for="vcvtns_u32_f32"><div>uint32_t <b><b>vcvtns_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtms_s32_f32" type="checkbox"><label for="vcvtms_s32_f32"><div>int32_t <b><b>vcvtms_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtms_u32_f32" type="checkbox"><label for="vcvtms_u32_f32"><div>uint32_t <b><b>vcvtms_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtps_s32_f32" type="checkbox"><label for="vcvtps_s32_f32"><div>int32_t <b><b>vcvtps_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtps_u32_f32" type="checkbox"><label for="vcvtps_u32_f32"><div>uint32_t <b><b>vcvtps_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtas_s32_f32" type="checkbox"><label for="vcvtas_s32_f32"><div>int32_t <b><b>vcvtas_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtas_u32_f32" type="checkbox"><label for="vcvtas_u32_f32"><div>uint32_t <b><b>vcvtas_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_s64_f64" type="checkbox"><label for="vcvt_s64_f64"><div>int64x1_t <b><b>vcvt_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_s64_f64" type="checkbox"><label for="vcvtq_s64_f64"><div>int64x2_t <b><b>vcvtq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_u64_f64" type="checkbox"><label for="vcvt_u64_f64"><div>uint64x1_t <b><b>vcvt_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_u64_f64" type="checkbox"><label for="vcvtq_u64_f64"><div>uint64x2_t <b><b>vcvtq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtn_s64_f64" type="checkbox"><label for="vcvtn_s64_f64"><div>int64x1_t <b><b>vcvtn_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnq_s64_f64" type="checkbox"><label for="vcvtnq_s64_f64"><div>int64x2_t <b><b>vcvtnq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtn_u64_f64" type="checkbox"><label for="vcvtn_u64_f64"><div>uint64x1_t <b><b>vcvtn_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnq_u64_f64" type="checkbox"><label for="vcvtnq_u64_f64"><div>uint64x2_t <b><b>vcvtnq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtm_s64_f64" type="checkbox"><label for="vcvtm_s64_f64"><div>int64x1_t <b><b>vcvtm_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmq_s64_f64" type="checkbox"><label for="vcvtmq_s64_f64"><div>int64x2_t <b><b>vcvtmq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtm_u64_f64" type="checkbox"><label for="vcvtm_u64_f64"><div>uint64x1_t <b><b>vcvtm_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmq_u64_f64" type="checkbox"><label for="vcvtmq_u64_f64"><div>uint64x2_t <b><b>vcvtmq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtp_s64_f64" type="checkbox"><label for="vcvtp_s64_f64"><div>int64x1_t <b><b>vcvtp_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpq_s64_f64" type="checkbox"><label for="vcvtpq_s64_f64"><div>int64x2_t <b><b>vcvtpq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtp_u64_f64" type="checkbox"><label for="vcvtp_u64_f64"><div>uint64x1_t <b><b>vcvtp_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpq_u64_f64" type="checkbox"><label for="vcvtpq_u64_f64"><div>uint64x2_t <b><b>vcvtpq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvta_s64_f64" type="checkbox"><label for="vcvta_s64_f64"><div>int64x1_t <b><b>vcvta_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtaq_s64_f64" type="checkbox"><label for="vcvtaq_s64_f64"><div>int64x2_t <b><b>vcvtaq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvta_u64_f64" type="checkbox"><label for="vcvta_u64_f64"><div>uint64x1_t <b><b>vcvta_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtaq_u64_f64" type="checkbox"><label for="vcvtaq_u64_f64"><div>uint64x2_t <b><b>vcvtaq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_s64_f64" type="checkbox"><label for="vcvtd_s64_f64"><div>int64_t <b><b>vcvtd_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_u64_f64" type="checkbox"><label for="vcvtd_u64_f64"><div>uint64_t <b><b>vcvtd_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnd_s64_f64" type="checkbox"><label for="vcvtnd_s64_f64"><div>int64_t <b><b>vcvtnd_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnd_u64_f64" type="checkbox"><label for="vcvtnd_u64_f64"><div>uint64_t <b><b>vcvtnd_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmd_s64_f64" type="checkbox"><label for="vcvtmd_s64_f64"><div>int64_t <b><b>vcvtmd_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmd_u64_f64" type="checkbox"><label for="vcvtmd_u64_f64"><div>uint64_t <b><b>vcvtmd_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpd_s64_f64" type="checkbox"><label for="vcvtpd_s64_f64"><div>int64_t <b><b>vcvtpd_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpd_u64_f64" type="checkbox"><label for="vcvtpd_u64_f64"><div>uint64_t <b><b>vcvtpd_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtad_s64_f64" type="checkbox"><label for="vcvtad_s64_f64"><div>int64_t <b><b>vcvtad_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtad_u64_f64" type="checkbox"><label for="vcvtad_u64_f64"><div>uint64_t <b><b>vcvtad_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_s32_f32" type="checkbox"><label for="vcvt_n_s32_f32"><div>int32x2_t <b><b>vcvt_n_s32_f32</b></b> (float32x2_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_s32_f32" type="checkbox"><label for="vcvtq_n_s32_f32"><div>int32x4_t <b><b>vcvtq_n_s32_f32</b></b> (float32x4_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_u32_f32" type="checkbox"><label for="vcvt_n_u32_f32"><div>uint32x2_t <b><b>vcvt_n_u32_f32</b></b> (float32x2_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_u32_f32" type="checkbox"><label for="vcvtq_n_u32_f32"><div>uint32x4_t <b><b>vcvtq_n_u32_f32</b></b> (float32x4_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_n_s32_f32" type="checkbox"><label for="vcvts_n_s32_f32"><div>int32_t <b><b>vcvts_n_s32_f32</b></b> (float32_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_n_u32_f32" type="checkbox"><label for="vcvts_n_u32_f32"><div>uint32_t <b><b>vcvts_n_u32_f32</b></b> (float32_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_s64_f64" type="checkbox"><label for="vcvt_n_s64_f64"><div>int64x1_t <b><b>vcvt_n_s64_f64</b></b> (float64x1_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_s64_f64" type="checkbox"><label for="vcvtq_n_s64_f64"><div>int64x2_t <b><b>vcvtq_n_s64_f64</b></b> (float64x2_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_u64_f64" type="checkbox"><label for="vcvt_n_u64_f64"><div>uint64x1_t <b><b>vcvt_n_u64_f64</b></b> (float64x1_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_u64_f64" type="checkbox"><label for="vcvtq_n_u64_f64"><div>uint64x2_t <b><b>vcvtq_n_u64_f64</b></b> (float64x2_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_n_s64_f64" type="checkbox"><label for="vcvtd_n_s64_f64"><div>int64_t <b><b>vcvtd_n_s64_f64</b></b> (float64_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_n_u64_f64" type="checkbox"><label for="vcvtd_n_u64_f64"><div>uint64_t <b><b>vcvtd_n_u64_f64</b></b> (float64_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f32_s32" type="checkbox"><label for="vcvt_f32_s32"><div>float32x2_t <b><b>vcvt_f32_s32</b></b> (int32x2_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_f32_s32" type="checkbox"><label for="vcvtq_f32_s32"><div>float32x4_t <b><b>vcvtq_f32_s32</b></b> (int32x4_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f32_u32" type="checkbox"><label for="vcvt_f32_u32"><div>float32x2_t <b><b>vcvt_f32_u32</b></b> (uint32x2_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_f32_u32" type="checkbox"><label for="vcvtq_f32_u32"><div>float32x4_t <b><b>vcvtq_f32_u32</b></b> (uint32x4_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_f32_s32" type="checkbox"><label for="vcvts_f32_s32"><div>float32_t <b><b>vcvts_f32_s32</b></b> (int32_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_f32_u32" type="checkbox"><label for="vcvts_f32_u32"><div>float32_t <b><b>vcvts_f32_u32</b></b> (uint32_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f64_s64" type="checkbox"><label for="vcvt_f64_s64"><div>float64x1_t <b><b>vcvt_f64_s64</b></b> (int64x1_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_f64_s64" type="checkbox"><label for="vcvtq_f64_s64"><div>float64x2_t <b><b>vcvtq_f64_s64</b></b> (int64x2_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f64_u64" type="checkbox"><label for="vcvt_f64_u64"><div>float64x1_t <b><b>vcvt_f64_u64</b></b> (uint64x1_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_f64_u64" type="checkbox"><label for="vcvtq_f64_u64"><div>float64x2_t <b><b>vcvtq_f64_u64</b></b> (uint64x2_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_f64_s64" type="checkbox"><label for="vcvtd_f64_s64"><div>float64_t <b><b>vcvtd_f64_s64</b></b> (int64_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_f64_u64" type="checkbox"><label for="vcvtd_f64_u64"><div>float64_t <b><b>vcvtd_f64_u64</b></b> (uint64_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_f32_s32" type="checkbox"><label for="vcvt_n_f32_s32"><div>float32x2_t <b><b>vcvt_n_f32_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_f32_s32" type="checkbox"><label for="vcvtq_n_f32_s32"><div>float32x4_t <b><b>vcvtq_n_f32_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_f32_u32" type="checkbox"><label for="vcvt_n_f32_u32"><div>float32x2_t <b><b>vcvt_n_f32_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_f32_u32" type="checkbox"><label for="vcvtq_n_f32_u32"><div>float32x4_t <b><b>vcvtq_n_f32_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_n_f32_s32" type="checkbox"><label for="vcvts_n_f32_s32"><div>float32_t <b><b>vcvts_n_f32_s32</b></b> (int32_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_n_f32_u32" type="checkbox"><label for="vcvts_n_f32_u32"><div>float32_t <b><b>vcvts_n_f32_u32</b></b> (uint32_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_f64_s64" type="checkbox"><label for="vcvt_n_f64_s64"><div>float64x1_t <b><b>vcvt_n_f64_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_f64_s64" type="checkbox"><label for="vcvtq_n_f64_s64"><div>float64x2_t <b><b>vcvtq_n_f64_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_f64_u64" type="checkbox"><label for="vcvt_n_f64_u64"><div>float64x1_t <b><b>vcvt_n_f64_u64</b></b> (uint64x1_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_f64_u64" type="checkbox"><label for="vcvtq_n_f64_u64"><div>float64x2_t <b><b>vcvtq_n_f64_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_n_f64_s64" type="checkbox"><label for="vcvtd_n_f64_s64"><div>float64_t <b><b>vcvtd_n_f64_s64</b></b> (int64_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_n_f64_u64" type="checkbox"><label for="vcvtd_n_f64_u64"><div>float64_t <b><b>vcvtd_n_f64_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f16_f32" type="checkbox"><label for="vcvt_f16_f32"><div>float16x4_t <b><b>vcvt_f16_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to lower precision narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&amp;FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtn-fcvtn2-floating-point-convert-to-lower-precision-narrow-vector">FCVTN</a> Vd.4H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_high_f16_f32" type="checkbox"><label for="vcvt_high_f16_f32"><div>float16x8_t <b><b>vcvt_high_f16_f32</b></b> (float16x4_t r, float32x4_t a)<span class="right">Floating-point convert to lower precision narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&amp;FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtn-fcvtn2-floating-point-convert-to-lower-precision-narrow-vector">FCVTN2</a> Vd.8H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f32_f64" type="checkbox"><label for="vcvt_f32_f64"><div>float32x2_t <b><b>vcvt_f32_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to lower precision narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&amp;FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtn-fcvtn2-floating-point-convert-to-lower-precision-narrow-vector">FCVTN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_high_f32_f64" type="checkbox"><label for="vcvt_high_f32_f64"><div>float32x4_t <b><b>vcvt_high_f32_f64</b></b> (float32x2_t r, float64x2_t a)<span class="right">Floating-point convert to lower precision narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&amp;FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtn-fcvtn2-floating-point-convert-to-lower-precision-narrow-vector">FCVTN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f32_f16" type="checkbox"><label for="vcvt_f32_f16"><div>float32x4_t <b><b>vcvt_f32_f16</b></b> (float16x4_t a)<span class="right">Floating-point convert to higher precision long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&amp;FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes each result to the equivalent element of the vector in the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtl-fcvtl2-floating-point-convert-to-higher-precision-long-vector">FCVTL</a> Vd.4S,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_high_f32_f16" type="checkbox"><label for="vcvt_high_f32_f16"><div>float32x4_t <b><b>vcvt_high_f32_f16</b></b> (float16x8_t a)<span class="right">Floating-point convert to higher precision long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&amp;FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes each result to the equivalent element of the vector in the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtl-fcvtl2-floating-point-convert-to-higher-precision-long-vector">FCVTL2</a> Vd.4S,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f64_f32" type="checkbox"><label for="vcvt_f64_f32"><div>float64x2_t <b><b>vcvt_f64_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to higher precision long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&amp;FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes each result to the equivalent element of the vector in the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtl-fcvtl2-floating-point-convert-to-higher-precision-long-vector">FCVTL</a> Vd.2D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_high_f64_f32" type="checkbox"><label for="vcvt_high_f64_f32"><div>float64x2_t <b><b>vcvt_high_f64_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to higher precision long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&amp;FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes each result to the equivalent element of the vector in the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtl-fcvtl2-floating-point-convert-to-higher-precision-long-vector">FCVTL2</a> Vd.2D,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtx_f32_f64" type="checkbox"><label for="vcvtx_f32_f64"><div>float32x2_t <b><b>vcvtx_f32_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to lower precision narrow, rounding to odd</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&amp;FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtxn-fcvtxn2-floating-point-convert-to-lower-precision-narrow-rounding-to-odd-vector">FCVTXN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.3" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr, FPRounding rounding)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding_ODD" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding_ODD</a>);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtxd_f32_f64" type="checkbox"><label for="vcvtxd_f32_f64"><div>float32_t <b><b>vcvtxd_f32_f64</b></b> (float64_t a)<span class="right">Floating-point convert to lower precision narrow, rounding to odd</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&amp;FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtxn-fcvtxn2-floating-point-convert-to-lower-precision-narrow-rounding-to-odd-vector">FCVTXN</a> Sd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.3" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr, FPRounding rounding)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding_ODD" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding_ODD</a>);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtx_high_f32_f64" type="checkbox"><label for="vcvtx_high_f32_f64"><div>float32x4_t <b><b>vcvtx_high_f32_f64</b></b> (float32x2_t r, float64x2_t a)<span class="right">Floating-point convert to lower precision narrow, rounding to odd</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&amp;FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtxn-fcvtxn2-floating-point-convert-to-lower-precision-narrow-rounding-to-odd-vector">FCVTXN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.3" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr, FPRounding rounding)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding_ODD" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding_ODD</a>);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrnd_f32" type="checkbox"><label for="vrnd_f32"><div>float32x2_t <b><b>vrnd_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintz-vector-floating-point-round-to-integral-toward-zero-vector">FRINTZ</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndq_f32" type="checkbox"><label for="vrndq_f32"><div>float32x4_t <b><b>vrndq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintz-vector-floating-point-round-to-integral-toward-zero-vector">FRINTZ</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrnd_f64" type="checkbox"><label for="vrnd_f64"><div>float64x1_t <b><b>vrnd_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintz-vector-floating-point-round-to-integral-toward-zero-vector">FRINTZ</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndq_f64" type="checkbox"><label for="vrndq_f64"><div>float64x2_t <b><b>vrndq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintz-vector-floating-point-round-to-integral-toward-zero-vector">FRINTZ</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndn_f32" type="checkbox"><label for="vrndn_f32"><div>float32x2_t <b><b>vrndn_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndnq_f32" type="checkbox"><label for="vrndnq_f32"><div>float32x4_t <b><b>vrndnq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndn_f64" type="checkbox"><label for="vrndn_f64"><div>float64x1_t <b><b>vrndn_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndnq_f64" type="checkbox"><label for="vrndnq_f64"><div>float64x2_t <b><b>vrndnq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndns_f32" type="checkbox"><label for="vrndns_f32"><div>float32_t <b><b>vrndns_f32</b></b> (float32_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndm_f32" type="checkbox"><label for="vrndm_f32"><div>float32x2_t <b><b>vrndm_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintm-vector-floating-point-round-to-integral-toward-minus-infinity-vector">FRINTM</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndmq_f32" type="checkbox"><label for="vrndmq_f32"><div>float32x4_t <b><b>vrndmq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintm-vector-floating-point-round-to-integral-toward-minus-infinity-vector">FRINTM</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndm_f64" type="checkbox"><label for="vrndm_f64"><div>float64x1_t <b><b>vrndm_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintm-vector-floating-point-round-to-integral-toward-minus-infinity-vector">FRINTM</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndmq_f64" type="checkbox"><label for="vrndmq_f64"><div>float64x2_t <b><b>vrndmq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintm-vector-floating-point-round-to-integral-toward-minus-infinity-vector">FRINTM</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndp_f32" type="checkbox"><label for="vrndp_f32"><div>float32x2_t <b><b>vrndp_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintp-vector-floating-point-round-to-integral-toward-plus-infinity-vector">FRINTP</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndpq_f32" type="checkbox"><label for="vrndpq_f32"><div>float32x4_t <b><b>vrndpq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintp-vector-floating-point-round-to-integral-toward-plus-infinity-vector">FRINTP</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndp_f64" type="checkbox"><label for="vrndp_f64"><div>float64x1_t <b><b>vrndp_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintp-vector-floating-point-round-to-integral-toward-plus-infinity-vector">FRINTP</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndpq_f64" type="checkbox"><label for="vrndpq_f64"><div>float64x2_t <b><b>vrndpq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintp-vector-floating-point-round-to-integral-toward-plus-infinity-vector">FRINTP</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrnda_f32" type="checkbox"><label for="vrnda_f32"><div>float32x2_t <b><b>vrnda_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinta-vector-floating-point-round-to-integral-to-nearest-with-ties-to-away-vector">FRINTA</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndaq_f32" type="checkbox"><label for="vrndaq_f32"><div>float32x4_t <b><b>vrndaq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinta-vector-floating-point-round-to-integral-to-nearest-with-ties-to-away-vector">FRINTA</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrnda_f64" type="checkbox"><label for="vrnda_f64"><div>float64x1_t <b><b>vrnda_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinta-vector-floating-point-round-to-integral-to-nearest-with-ties-to-away-vector">FRINTA</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndaq_f64" type="checkbox"><label for="vrndaq_f64"><div>float64x2_t <b><b>vrndaq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinta-vector-floating-point-round-to-integral-to-nearest-with-ties-to-away-vector">FRINTA</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndi_f32" type="checkbox"><label for="vrndi_f32"><div>float32x2_t <b><b>vrndi_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinti-vector-floating-point-round-to-integral-using-current-rounding-mode-vector">FRINTI</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndiq_f32" type="checkbox"><label for="vrndiq_f32"><div>float32x4_t <b><b>vrndiq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinti-vector-floating-point-round-to-integral-using-current-rounding-mode-vector">FRINTI</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndi_f64" type="checkbox"><label for="vrndi_f64"><div>float64x1_t <b><b>vrndi_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinti-vector-floating-point-round-to-integral-using-current-rounding-mode-vector">FRINTI</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndiq_f64" type="checkbox"><label for="vrndiq_f64"><div>float64x2_t <b><b>vrndiq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinti-vector-floating-point-round-to-integral-using-current-rounding-mode-vector">FRINTI</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndx_f32" type="checkbox"><label for="vrndx_f32"><div>float32x2_t <b><b>vrndx_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral exact, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintx-vector-floating-point-round-to-integral-exact-using-current-rounding-mode-vector">FRINTX</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndxq_f32" type="checkbox"><label for="vrndxq_f32"><div>float32x4_t <b><b>vrndxq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral exact, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintx-vector-floating-point-round-to-integral-exact-using-current-rounding-mode-vector">FRINTX</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndx_f64" type="checkbox"><label for="vrndx_f64"><div>float64x1_t <b><b>vrndx_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral exact, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintx-vector-floating-point-round-to-integral-exact-using-current-rounding-mode-vector">FRINTX</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndxq_f64" type="checkbox"><label for="vrndxq_f64"><div>float64x2_t <b><b>vrndxq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral exact, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintx-vector-floating-point-round-to-integral-exact-using-current-rounding-mode-vector">FRINTX</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_s16" type="checkbox"><label for="vmovn_s16"><div>int8x8_t <b><b>vmovn_s16</b></b> (int16x8_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.8B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_s32" type="checkbox"><label for="vmovn_s32"><div>int16x4_t <b><b>vmovn_s32</b></b> (int32x4_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.4H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_s64" type="checkbox"><label for="vmovn_s64"><div>int32x2_t <b><b>vmovn_s64</b></b> (int64x2_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_u16" type="checkbox"><label for="vmovn_u16"><div>uint8x8_t <b><b>vmovn_u16</b></b> (uint16x8_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.8B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_u32" type="checkbox"><label for="vmovn_u32"><div>uint16x4_t <b><b>vmovn_u32</b></b> (uint32x4_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.4H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_u64" type="checkbox"><label for="vmovn_u64"><div>uint32x2_t <b><b>vmovn_u64</b></b> (uint64x2_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_s16" type="checkbox"><label for="vmovn_high_s16"><div>int8x16_t <b><b>vmovn_high_s16</b></b> (int8x8_t r, int16x8_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.16B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_s32" type="checkbox"><label for="vmovn_high_s32"><div>int16x8_t <b><b>vmovn_high_s32</b></b> (int16x4_t r, int32x4_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.8H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_s64" type="checkbox"><label for="vmovn_high_s64"><div>int32x4_t <b><b>vmovn_high_s64</b></b> (int32x2_t r, int64x2_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_u16" type="checkbox"><label for="vmovn_high_u16"><div>uint8x16_t <b><b>vmovn_high_u16</b></b> (uint8x8_t r, uint16x8_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.16B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_u32" type="checkbox"><label for="vmovn_high_u32"><div>uint16x8_t <b><b>vmovn_high_u32</b></b> (uint16x4_t r, uint32x4_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.8H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_u64" type="checkbox"><label for="vmovn_high_u64"><div>uint32x4_t <b><b>vmovn_high_u64</b></b> (uint32x2_t r, uint64x2_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_s8" type="checkbox"><label for="vmovl_s8"><div>int16x8_t <b><b>vmovl_s8</b></b> (int8x8_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.8H,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_s16" type="checkbox"><label for="vmovl_s16"><div>int32x4_t <b><b>vmovl_s16</b></b> (int16x4_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.4S,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_s32" type="checkbox"><label for="vmovl_s32"><div>int64x2_t <b><b>vmovl_s32</b></b> (int32x2_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.2D,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_u8" type="checkbox"><label for="vmovl_u8"><div>uint16x8_t <b><b>vmovl_u8</b></b> (uint8x8_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.8H,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_u16" type="checkbox"><label for="vmovl_u16"><div>uint32x4_t <b><b>vmovl_u16</b></b> (uint16x4_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.4S,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_u32" type="checkbox"><label for="vmovl_u32"><div>uint64x2_t <b><b>vmovl_u32</b></b> (uint32x2_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.2D,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_s8" type="checkbox"><label for="vmovl_high_s8"><div>int16x8_t <b><b>vmovl_high_s8</b></b> (int8x16_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.8H,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_s16" type="checkbox"><label for="vmovl_high_s16"><div>int32x4_t <b><b>vmovl_high_s16</b></b> (int16x8_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.4S,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_s32" type="checkbox"><label for="vmovl_high_s32"><div>int64x2_t <b><b>vmovl_high_s32</b></b> (int32x4_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.2D,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_u8" type="checkbox"><label for="vmovl_high_u8"><div>uint16x8_t <b><b>vmovl_high_u8</b></b> (uint8x16_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.8H,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_u16" type="checkbox"><label for="vmovl_high_u16"><div>uint32x4_t <b><b>vmovl_high_u16</b></b> (uint16x8_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.4S,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_u32" type="checkbox"><label for="vmovl_high_u32"><div>uint64x2_t <b><b>vmovl_high_u32</b></b> (uint32x4_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.2D,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_s16" type="checkbox"><label for="vqmovn_s16"><div>int8x8_t <b><b>vqmovn_s16</b></b> (int16x8_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Vd.8B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_s32" type="checkbox"><label for="vqmovn_s32"><div>int16x4_t <b><b>vqmovn_s32</b></b> (int32x4_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Vd.4H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_s64" type="checkbox"><label for="vqmovn_s64"><div>int32x2_t <b><b>vqmovn_s64</b></b> (int64x2_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_u16" type="checkbox"><label for="vqmovn_u16"><div>uint8x8_t <b><b>vqmovn_u16</b></b> (uint16x8_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Vd.8B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_u32" type="checkbox"><label for="vqmovn_u32"><div>uint16x4_t <b><b>vqmovn_u32</b></b> (uint32x4_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Vd.4H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_u64" type="checkbox"><label for="vqmovn_u64"><div>uint32x2_t <b><b>vqmovn_u64</b></b> (uint64x2_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovnh_s16" type="checkbox"><label for="vqmovnh_s16"><div>int8_t <b><b>vqmovnh_s16</b></b> (int16_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Bd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovns_s32" type="checkbox"><label for="vqmovns_s32"><div>int16_t <b><b>vqmovns_s32</b></b> (int32_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Hd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovnd_s64" type="checkbox"><label for="vqmovnd_s64"><div>int32_t <b><b>vqmovnd_s64</b></b> (int64_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Sd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovnh_u16" type="checkbox"><label for="vqmovnh_u16"><div>uint8_t <b><b>vqmovnh_u16</b></b> (uint16_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Bd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovns_u32" type="checkbox"><label for="vqmovns_u32"><div>uint16_t <b><b>vqmovns_u32</b></b> (uint32_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Hd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovnd_u64" type="checkbox"><label for="vqmovnd_u64"><div>uint32_t <b><b>vqmovnd_u64</b></b> (uint64_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Sd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_s16" type="checkbox"><label for="vqmovn_high_s16"><div>int8x16_t <b><b>vqmovn_high_s16</b></b> (int8x8_t r, int16x8_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN2</a> Vd.16B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_s32" type="checkbox"><label for="vqmovn_high_s32"><div>int16x8_t <b><b>vqmovn_high_s32</b></b> (int16x4_t r, int32x4_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN2</a> Vd.8H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_s64" type="checkbox"><label for="vqmovn_high_s64"><div>int32x4_t <b><b>vqmovn_high_s64</b></b> (int32x2_t r, int64x2_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_u16" type="checkbox"><label for="vqmovn_high_u16"><div>uint8x16_t <b><b>vqmovn_high_u16</b></b> (uint8x8_t r, uint16x8_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN2</a> Vd.16B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_u32" type="checkbox"><label for="vqmovn_high_u32"><div>uint16x8_t <b><b>vqmovn_high_u32</b></b> (uint16x4_t r, uint32x4_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN2</a> Vd.8H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_u64" type="checkbox"><label for="vqmovn_high_u64"><div>uint32x4_t <b><b>vqmovn_high_u64</b></b> (uint32x2_t r, uint64x2_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_s16" type="checkbox"><label for="vqmovun_s16"><div>uint8x8_t <b><b>vqmovun_s16</b></b> (int16x8_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Vd.8B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_s32" type="checkbox"><label for="vqmovun_s32"><div>uint16x4_t <b><b>vqmovun_s32</b></b> (int32x4_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Vd.4H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_s64" type="checkbox"><label for="vqmovun_s64"><div>uint32x2_t <b><b>vqmovun_s64</b></b> (int64x2_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovunh_s16" type="checkbox"><label for="vqmovunh_s16"><div>uint8_t <b><b>vqmovunh_s16</b></b> (int16_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Bd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovuns_s32" type="checkbox"><label for="vqmovuns_s32"><div>uint16_t <b><b>vqmovuns_s32</b></b> (int32_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Hd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovund_s64" type="checkbox"><label for="vqmovund_s64"><div>uint32_t <b><b>vqmovund_s64</b></b> (int64_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Sd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_high_s16" type="checkbox"><label for="vqmovun_high_s16"><div>uint8x16_t <b><b>vqmovun_high_s16</b></b> (uint8x8_t r, int16x8_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN2</a> Vd.16B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_high_s32" type="checkbox"><label for="vqmovun_high_s32"><div>uint16x8_t <b><b>vqmovun_high_s32</b></b> (uint16x4_t r, int32x4_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN2</a> Vd.8H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_high_s64" type="checkbox"><label for="vqmovun_high_s64"><div>uint32x4_t <b><b>vqmovun_high_s64</b></b> (uint32x2_t r, int64x2_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_s16" type="checkbox"><label for="vmla_lane_s16"><div>int16x4_t <b><b>vmla_lane_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_s16" type="checkbox"><label for="vmlaq_lane_s16"><div>int16x8_t <b><b>vmlaq_lane_s16</b></b> (int16x8_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_s32" type="checkbox"><label for="vmla_lane_s32"><div>int32x2_t <b><b>vmla_lane_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_s32" type="checkbox"><label for="vmlaq_lane_s32"><div>int32x4_t <b><b>vmlaq_lane_s32</b></b> (int32x4_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_u16" type="checkbox"><label for="vmla_lane_u16"><div>uint16x4_t <b><b>vmla_lane_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_u16" type="checkbox"><label for="vmlaq_lane_u16"><div>uint16x8_t <b><b>vmlaq_lane_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_u32" type="checkbox"><label for="vmla_lane_u32"><div>uint32x2_t <b><b>vmla_lane_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_u32" type="checkbox"><label for="vmlaq_lane_u32"><div>uint32x4_t <b><b>vmlaq_lane_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x2_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_f32" type="checkbox"><label for="vmla_lane_f32"><div>float32x2_t <b><b>vmla_lane_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t v, const int lane)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_f32" type="checkbox"><label for="vmlaq_lane_f32"><div>float32x4_t <b><b>vmlaq_lane_f32</b></b> (float32x4_t a, float32x4_t b, float32x2_t v, const int lane)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_s16" type="checkbox"><label for="vmla_laneq_s16"><div>int16x4_t <b><b>vmla_laneq_s16</b></b> (int16x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_s16" type="checkbox"><label for="vmlaq_laneq_s16"><div>int16x8_t <b><b>vmlaq_laneq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_s32" type="checkbox"><label for="vmla_laneq_s32"><div>int32x2_t <b><b>vmla_laneq_s32</b></b> (int32x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_s32" type="checkbox"><label for="vmlaq_laneq_s32"><div>int32x4_t <b><b>vmlaq_laneq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_u16" type="checkbox"><label for="vmla_laneq_u16"><div>uint16x4_t <b><b>vmla_laneq_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x8_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_u16" type="checkbox"><label for="vmlaq_laneq_u16"><div>uint16x8_t <b><b>vmlaq_laneq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_u32" type="checkbox"><label for="vmla_laneq_u32"><div>uint32x2_t <b><b>vmla_laneq_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_u32" type="checkbox"><label for="vmlaq_laneq_u32"><div>uint32x4_t <b><b>vmlaq_laneq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_f32" type="checkbox"><label for="vmla_laneq_f32"><div>float32x2_t <b><b>vmla_laneq_f32</b></b> (float32x2_t a, float32x2_t b, float32x4_t v, const int lane)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_f32" type="checkbox"><label for="vmlaq_laneq_f32"><div>float32x4_t <b><b>vmlaq_laneq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t v, const int lane)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_lane_s16" type="checkbox"><label for="vmlal_lane_s16"><div>int32x4_t <b><b>vmlal_lane_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_lane_s32" type="checkbox"><label for="vmlal_lane_s32"><div>int64x2_t <b><b>vmlal_lane_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_lane_u16" type="checkbox"><label for="vmlal_lane_u16"><div>uint32x4_t <b><b>vmlal_lane_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_lane_u32" type="checkbox"><label for="vmlal_lane_u32"><div>uint64x2_t <b><b>vmlal_lane_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_lane_s16" type="checkbox"><label for="vmlal_high_lane_s16"><div>int32x4_t <b><b>vmlal_high_lane_s16</b></b> (int32x4_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_lane_s32" type="checkbox"><label for="vmlal_high_lane_s32"><div>int64x2_t <b><b>vmlal_high_lane_s32</b></b> (int64x2_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_lane_u16" type="checkbox"><label for="vmlal_high_lane_u16"><div>uint32x4_t <b><b>vmlal_high_lane_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x4_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_lane_u32" type="checkbox"><label for="vmlal_high_lane_u32"><div>uint64x2_t <b><b>vmlal_high_lane_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x2_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_laneq_s16" type="checkbox"><label for="vmlal_laneq_s16"><div>int32x4_t <b><b>vmlal_laneq_s16</b></b> (int32x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_laneq_s32" type="checkbox"><label for="vmlal_laneq_s32"><div>int64x2_t <b><b>vmlal_laneq_s32</b></b> (int64x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_laneq_u16" type="checkbox"><label for="vmlal_laneq_u16"><div>uint32x4_t <b><b>vmlal_laneq_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x8_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_laneq_u32" type="checkbox"><label for="vmlal_laneq_u32"><div>uint64x2_t <b><b>vmlal_laneq_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x4_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_laneq_s16" type="checkbox"><label for="vmlal_high_laneq_s16"><div>int32x4_t <b><b>vmlal_high_laneq_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_laneq_s32" type="checkbox"><label for="vmlal_high_laneq_s32"><div>int64x2_t <b><b>vmlal_high_laneq_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_laneq_u16" type="checkbox"><label for="vmlal_high_laneq_u16"><div>uint32x4_t <b><b>vmlal_high_laneq_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_laneq_u32" type="checkbox"><label for="vmlal_high_laneq_u32"><div>uint64x2_t <b><b>vmlal_high_laneq_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_lane_s16" type="checkbox"><label for="vqdmlal_lane_s16"><div>int32x4_t <b><b>vqdmlal_lane_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_lane_s32" type="checkbox"><label for="vqdmlal_lane_s32"><div>int64x2_t <b><b>vqdmlal_lane_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlalh_lane_s16" type="checkbox"><label for="vqdmlalh_lane_s16"><div>int32_t <b><b>vqdmlalh_lane_s16</b></b> (int32_t a, int16_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Sd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Hn <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlals_lane_s32" type="checkbox"><label for="vqdmlals_lane_s32"><div>int64_t <b><b>vqdmlals_lane_s32</b></b> (int64_t a, int32_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Dd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_lane_s16" type="checkbox"><label for="vqdmlal_high_lane_s16"><div>int32x4_t <b><b>vqdmlal_high_lane_s16</b></b> (int32x4_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_lane_s32" type="checkbox"><label for="vqdmlal_high_lane_s32"><div>int64x2_t <b><b>vqdmlal_high_lane_s32</b></b> (int64x2_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_laneq_s16" type="checkbox"><label for="vqdmlal_laneq_s16"><div>int32x4_t <b><b>vqdmlal_laneq_s16</b></b> (int32x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_laneq_s32" type="checkbox"><label for="vqdmlal_laneq_s32"><div>int64x2_t <b><b>vqdmlal_laneq_s32</b></b> (int64x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlalh_laneq_s16" type="checkbox"><label for="vqdmlalh_laneq_s16"><div>int32_t <b><b>vqdmlalh_laneq_s16</b></b> (int32_t a, int16_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Sd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Hn <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlals_laneq_s32" type="checkbox"><label for="vqdmlals_laneq_s32"><div>int64_t <b><b>vqdmlals_laneq_s32</b></b> (int64_t a, int32_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Dd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_laneq_s16" type="checkbox"><label for="vqdmlal_high_laneq_s16"><div>int32x4_t <b><b>vqdmlal_high_laneq_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_laneq_s32" type="checkbox"><label for="vqdmlal_high_laneq_s32"><div>int64x2_t <b><b>vqdmlal_high_laneq_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_s16" type="checkbox"><label for="vmls_lane_s16"><div>int16x4_t <b><b>vmls_lane_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_s16" type="checkbox"><label for="vmlsq_lane_s16"><div>int16x8_t <b><b>vmlsq_lane_s16</b></b> (int16x8_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_s32" type="checkbox"><label for="vmls_lane_s32"><div>int32x2_t <b><b>vmls_lane_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_s32" type="checkbox"><label for="vmlsq_lane_s32"><div>int32x4_t <b><b>vmlsq_lane_s32</b></b> (int32x4_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_u16" type="checkbox"><label for="vmls_lane_u16"><div>uint16x4_t <b><b>vmls_lane_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_u16" type="checkbox"><label for="vmlsq_lane_u16"><div>uint16x8_t <b><b>vmlsq_lane_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_u32" type="checkbox"><label for="vmls_lane_u32"><div>uint32x2_t <b><b>vmls_lane_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_u32" type="checkbox"><label for="vmlsq_lane_u32"><div>uint32x4_t <b><b>vmlsq_lane_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_f32" type="checkbox"><label for="vmls_lane_f32"><div>float32x2_t <b><b>vmls_lane_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_f32" type="checkbox"><label for="vmlsq_lane_f32"><div>float32x4_t <b><b>vmlsq_lane_f32</b></b> (float32x4_t a, float32x4_t b, float32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_s16" type="checkbox"><label for="vmls_laneq_s16"><div>int16x4_t <b><b>vmls_laneq_s16</b></b> (int16x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_s16" type="checkbox"><label for="vmlsq_laneq_s16"><div>int16x8_t <b><b>vmlsq_laneq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_s32" type="checkbox"><label for="vmls_laneq_s32"><div>int32x2_t <b><b>vmls_laneq_s32</b></b> (int32x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_s32" type="checkbox"><label for="vmlsq_laneq_s32"><div>int32x4_t <b><b>vmlsq_laneq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_u16" type="checkbox"><label for="vmls_laneq_u16"><div>uint16x4_t <b><b>vmls_laneq_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x8_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_u16" type="checkbox"><label for="vmlsq_laneq_u16"><div>uint16x8_t <b><b>vmlsq_laneq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_u32" type="checkbox"><label for="vmls_laneq_u32"><div>uint32x2_t <b><b>vmls_laneq_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_u32" type="checkbox"><label for="vmlsq_laneq_u32"><div>uint32x4_t <b><b>vmlsq_laneq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_f32" type="checkbox"><label for="vmls_laneq_f32"><div>float32x2_t <b><b>vmls_laneq_f32</b></b> (float32x2_t a, float32x2_t b, float32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_f32" type="checkbox"><label for="vmlsq_laneq_f32"><div>float32x4_t <b><b>vmlsq_laneq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_lane_s16" type="checkbox"><label for="vmlsl_lane_s16"><div>int32x4_t <b><b>vmlsl_lane_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_lane_s32" type="checkbox"><label for="vmlsl_lane_s32"><div>int64x2_t <b><b>vmlsl_lane_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_lane_u16" type="checkbox"><label for="vmlsl_lane_u16"><div>uint32x4_t <b><b>vmlsl_lane_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_lane_u32" type="checkbox"><label for="vmlsl_lane_u32"><div>uint64x2_t <b><b>vmlsl_lane_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_lane_s16" type="checkbox"><label for="vmlsl_high_lane_s16"><div>int32x4_t <b><b>vmlsl_high_lane_s16</b></b> (int32x4_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_lane_s32" type="checkbox"><label for="vmlsl_high_lane_s32"><div>int64x2_t <b><b>vmlsl_high_lane_s32</b></b> (int64x2_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_lane_u16" type="checkbox"><label for="vmlsl_high_lane_u16"><div>uint32x4_t <b><b>vmlsl_high_lane_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x4_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_lane_u32" type="checkbox"><label for="vmlsl_high_lane_u32"><div>uint64x2_t <b><b>vmlsl_high_lane_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x2_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_laneq_s16" type="checkbox"><label for="vmlsl_laneq_s16"><div>int32x4_t <b><b>vmlsl_laneq_s16</b></b> (int32x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_laneq_s32" type="checkbox"><label for="vmlsl_laneq_s32"><div>int64x2_t <b><b>vmlsl_laneq_s32</b></b> (int64x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_laneq_u16" type="checkbox"><label for="vmlsl_laneq_u16"><div>uint32x4_t <b><b>vmlsl_laneq_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x8_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_laneq_u32" type="checkbox"><label for="vmlsl_laneq_u32"><div>uint64x2_t <b><b>vmlsl_laneq_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x4_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_laneq_s16" type="checkbox"><label for="vmlsl_high_laneq_s16"><div>int32x4_t <b><b>vmlsl_high_laneq_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_laneq_s32" type="checkbox"><label for="vmlsl_high_laneq_s32"><div>int64x2_t <b><b>vmlsl_high_laneq_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_laneq_u16" type="checkbox"><label for="vmlsl_high_laneq_u16"><div>uint32x4_t <b><b>vmlsl_high_laneq_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_laneq_u32" type="checkbox"><label for="vmlsl_high_laneq_u32"><div>uint64x2_t <b><b>vmlsl_high_laneq_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_lane_s16" type="checkbox"><label for="vqdmlsl_lane_s16"><div>int32x4_t <b><b>vqdmlsl_lane_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_lane_s32" type="checkbox"><label for="vqdmlsl_lane_s32"><div>int64x2_t <b><b>vqdmlsl_lane_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlslh_lane_s16" type="checkbox"><label for="vqdmlslh_lane_s16"><div>int32_t <b><b>vqdmlslh_lane_s16</b></b> (int32_t a, int16_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Sd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Hn <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsls_lane_s32" type="checkbox"><label for="vqdmlsls_lane_s32"><div>int64_t <b><b>vqdmlsls_lane_s32</b></b> (int64_t a, int32_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Dd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_lane_s16" type="checkbox"><label for="vqdmlsl_high_lane_s16"><div>int32x4_t <b><b>vqdmlsl_high_lane_s16</b></b> (int32x4_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_lane_s32" type="checkbox"><label for="vqdmlsl_high_lane_s32"><div>int64x2_t <b><b>vqdmlsl_high_lane_s32</b></b> (int64x2_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_laneq_s16" type="checkbox"><label for="vqdmlsl_laneq_s16"><div>int32x4_t <b><b>vqdmlsl_laneq_s16</b></b> (int32x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_laneq_s32" type="checkbox"><label for="vqdmlsl_laneq_s32"><div>int64x2_t <b><b>vqdmlsl_laneq_s32</b></b> (int64x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlslh_laneq_s16" type="checkbox"><label for="vqdmlslh_laneq_s16"><div>int32_t <b><b>vqdmlslh_laneq_s16</b></b> (int32_t a, int16_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Sd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Hn <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsls_laneq_s32" type="checkbox"><label for="vqdmlsls_laneq_s32"><div>int64_t <b><b>vqdmlsls_laneq_s32</b></b> (int64_t a, int32_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Dd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_laneq_s16" type="checkbox"><label for="vqdmlsl_high_laneq_s16"><div>int32x4_t <b><b>vqdmlsl_high_laneq_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_laneq_s32" type="checkbox"><label for="vqdmlsl_high_laneq_s32"><div>int64x2_t <b><b>vqdmlsl_high_laneq_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_s16" type="checkbox"><label for="vmul_n_s16"><div>int16x4_t <b><b>vmul_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_s16" type="checkbox"><label for="vmulq_n_s16"><div>int16x8_t <b><b>vmulq_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_s32" type="checkbox"><label for="vmul_n_s32"><div>int32x2_t <b><b>vmul_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_s32" type="checkbox"><label for="vmulq_n_s32"><div>int32x4_t <b><b>vmulq_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_u16" type="checkbox"><label for="vmul_n_u16"><div>uint16x4_t <b><b>vmul_n_u16</b></b> (uint16x4_t a, uint16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_u16" type="checkbox"><label for="vmulq_n_u16"><div>uint16x8_t <b><b>vmulq_n_u16</b></b> (uint16x8_t a, uint16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_u32" type="checkbox"><label for="vmul_n_u32"><div>uint32x2_t <b><b>vmul_n_u32</b></b> (uint32x2_t a, uint32_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_u32" type="checkbox"><label for="vmulq_n_u32"><div>uint32x4_t <b><b>vmulq_n_u32</b></b> (uint32x4_t a, uint32_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_f32" type="checkbox"><label for="vmul_n_f32"><div>float32x2_t <b><b>vmul_n_f32</b></b> (float32x2_t a, float32_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_f32" type="checkbox"><label for="vmulq_n_f32"><div>float32x4_t <b><b>vmulq_n_f32</b></b> (float32x4_t a, float32_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_f64" type="checkbox"><label for="vmul_n_f64"><div>float64x1_t <b><b>vmul_n_f64</b></b> (float64x1_t a, float64_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Vm.D[0] </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_f64" type="checkbox"><label for="vmulq_n_f64"><div>float64x2_t <b><b>vmulq_n_f64</b></b> (float64x2_t a, float64_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2D,Vn.2D,Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.D[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_s16" type="checkbox"><label for="vmul_lane_s16"><div>int16x4_t <b><b>vmul_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_s16" type="checkbox"><label for="vmulq_lane_s16"><div>int16x8_t <b><b>vmulq_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_s32" type="checkbox"><label for="vmul_lane_s32"><div>int32x2_t <b><b>vmul_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_s32" type="checkbox"><label for="vmulq_lane_s32"><div>int32x4_t <b><b>vmulq_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_u16" type="checkbox"><label for="vmul_lane_u16"><div>uint16x4_t <b><b>vmul_lane_u16</b></b> (uint16x4_t a, uint16x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_u16" type="checkbox"><label for="vmulq_lane_u16"><div>uint16x8_t <b><b>vmulq_lane_u16</b></b> (uint16x8_t a, uint16x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_u32" type="checkbox"><label for="vmul_lane_u32"><div>uint32x2_t <b><b>vmul_lane_u32</b></b> (uint32x2_t a, uint32x2_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_u32" type="checkbox"><label for="vmulq_lane_u32"><div>uint32x4_t <b><b>vmulq_lane_u32</b></b> (uint32x4_t a, uint32x2_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_f32" type="checkbox"><label for="vmul_lane_f32"><div>float32x2_t <b><b>vmul_lane_f32</b></b> (float32x2_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_f32" type="checkbox"><label for="vmulq_lane_f32"><div>float32x4_t <b><b>vmulq_lane_f32</b></b> (float32x4_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_f64" type="checkbox"><label for="vmul_lane_f64"><div>float64x1_t <b><b>vmul_lane_f64</b></b> (float64x1_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_f64" type="checkbox"><label for="vmulq_lane_f64"><div>float64x2_t <b><b>vmulq_lane_f64</b></b> (float64x2_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmuls_lane_f32" type="checkbox"><label for="vmuls_lane_f32"><div>float32_t <b><b>vmuls_lane_f32</b></b> (float32_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmuld_lane_f64" type="checkbox"><label for="vmuld_lane_f64"><div>float64_t <b><b>vmuld_lane_f64</b></b> (float64_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_s16" type="checkbox"><label for="vmul_laneq_s16"><div>int16x4_t <b><b>vmul_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_s16" type="checkbox"><label for="vmulq_laneq_s16"><div>int16x8_t <b><b>vmulq_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_s32" type="checkbox"><label for="vmul_laneq_s32"><div>int32x2_t <b><b>vmul_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_s32" type="checkbox"><label for="vmulq_laneq_s32"><div>int32x4_t <b><b>vmulq_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_u16" type="checkbox"><label for="vmul_laneq_u16"><div>uint16x4_t <b><b>vmul_laneq_u16</b></b> (uint16x4_t a, uint16x8_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_u16" type="checkbox"><label for="vmulq_laneq_u16"><div>uint16x8_t <b><b>vmulq_laneq_u16</b></b> (uint16x8_t a, uint16x8_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_u32" type="checkbox"><label for="vmul_laneq_u32"><div>uint32x2_t <b><b>vmul_laneq_u32</b></b> (uint32x2_t a, uint32x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_u32" type="checkbox"><label for="vmulq_laneq_u32"><div>uint32x4_t <b><b>vmulq_laneq_u32</b></b> (uint32x4_t a, uint32x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_f32" type="checkbox"><label for="vmul_laneq_f32"><div>float32x2_t <b><b>vmul_laneq_f32</b></b> (float32x2_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_f32" type="checkbox"><label for="vmulq_laneq_f32"><div>float32x4_t <b><b>vmulq_laneq_f32</b></b> (float32x4_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_f64" type="checkbox"><label for="vmul_laneq_f64"><div>float64x1_t <b><b>vmul_laneq_f64</b></b> (float64x1_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_f64" type="checkbox"><label for="vmulq_laneq_f64"><div>float64x2_t <b><b>vmulq_laneq_f64</b></b> (float64x2_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmuls_laneq_f32" type="checkbox"><label for="vmuls_laneq_f32"><div>float32_t <b><b>vmuls_laneq_f32</b></b> (float32_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmuld_laneq_f64" type="checkbox"><label for="vmuld_laneq_f64"><div>float64_t <b><b>vmuld_laneq_f64</b></b> (float64_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_n_s16" type="checkbox"><label for="vmull_n_s16"><div>int32x4_t <b><b>vmull_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_n_s32" type="checkbox"><label for="vmull_n_s32"><div>int64x2_t <b><b>vmull_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_n_u16" type="checkbox"><label for="vmull_n_u16"><div>uint32x4_t <b><b>vmull_n_u16</b></b> (uint16x4_t a, uint16_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_n_u32" type="checkbox"><label for="vmull_n_u32"><div>uint64x2_t <b><b>vmull_n_u32</b></b> (uint32x2_t a, uint32_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_n_s16" type="checkbox"><label for="vmull_high_n_s16"><div>int32x4_t <b><b>vmull_high_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_n_s32" type="checkbox"><label for="vmull_high_n_s32"><div>int64x2_t <b><b>vmull_high_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_n_u16" type="checkbox"><label for="vmull_high_n_u16"><div>uint32x4_t <b><b>vmull_high_n_u16</b></b> (uint16x8_t a, uint16_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_n_u32" type="checkbox"><label for="vmull_high_n_u32"><div>uint64x2_t <b><b>vmull_high_n_u32</b></b> (uint32x4_t a, uint32_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_lane_s16" type="checkbox"><label for="vmull_lane_s16"><div>int32x4_t <b><b>vmull_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_lane_s32" type="checkbox"><label for="vmull_lane_s32"><div>int64x2_t <b><b>vmull_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_lane_u16" type="checkbox"><label for="vmull_lane_u16"><div>uint32x4_t <b><b>vmull_lane_u16</b></b> (uint16x4_t a, uint16x4_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_lane_u32" type="checkbox"><label for="vmull_lane_u32"><div>uint64x2_t <b><b>vmull_lane_u32</b></b> (uint32x2_t a, uint32x2_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_lane_s16" type="checkbox"><label for="vmull_high_lane_s16"><div>int32x4_t <b><b>vmull_high_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_lane_s32" type="checkbox"><label for="vmull_high_lane_s32"><div>int64x2_t <b><b>vmull_high_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_lane_u16" type="checkbox"><label for="vmull_high_lane_u16"><div>uint32x4_t <b><b>vmull_high_lane_u16</b></b> (uint16x8_t a, uint16x4_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_lane_u32" type="checkbox"><label for="vmull_high_lane_u32"><div>uint64x2_t <b><b>vmull_high_lane_u32</b></b> (uint32x4_t a, uint32x2_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_laneq_s16" type="checkbox"><label for="vmull_laneq_s16"><div>int32x4_t <b><b>vmull_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_laneq_s32" type="checkbox"><label for="vmull_laneq_s32"><div>int64x2_t <b><b>vmull_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_laneq_u16" type="checkbox"><label for="vmull_laneq_u16"><div>uint32x4_t <b><b>vmull_laneq_u16</b></b> (uint16x4_t a, uint16x8_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_laneq_u32" type="checkbox"><label for="vmull_laneq_u32"><div>uint64x2_t <b><b>vmull_laneq_u32</b></b> (uint32x2_t a, uint32x4_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_laneq_s16" type="checkbox"><label for="vmull_high_laneq_s16"><div>int32x4_t <b><b>vmull_high_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_laneq_s32" type="checkbox"><label for="vmull_high_laneq_s32"><div>int64x2_t <b><b>vmull_high_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_laneq_u16" type="checkbox"><label for="vmull_high_laneq_u16"><div>uint32x4_t <b><b>vmull_high_laneq_u16</b></b> (uint16x8_t a, uint16x8_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_laneq_u32" type="checkbox"><label for="vmull_high_laneq_u32"><div>uint64x2_t <b><b>vmull_high_laneq_u32</b></b> (uint32x4_t a, uint32x4_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_n_s16" type="checkbox"><label for="vqdmull_n_s16"><div>int32x4_t <b><b>vqdmull_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_n_s32" type="checkbox"><label for="vqdmull_n_s32"><div>int64x2_t <b><b>vqdmull_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_n_s16" type="checkbox"><label for="vqdmull_high_n_s16"><div>int32x4_t <b><b>vqdmull_high_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_n_s32" type="checkbox"><label for="vqdmull_high_n_s32"><div>int64x2_t <b><b>vqdmull_high_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_lane_s16" type="checkbox"><label for="vqdmull_lane_s16"><div>int32x4_t <b><b>vqdmull_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_lane_s32" type="checkbox"><label for="vqdmull_lane_s32"><div>int64x2_t <b><b>vqdmull_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmullh_lane_s16" type="checkbox"><label for="vqdmullh_lane_s16"><div>int32_t <b><b>vqdmullh_lane_s16</b></b> (int16_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Sd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulls_lane_s32" type="checkbox"><label for="vqdmulls_lane_s32"><div>int64_t <b><b>vqdmulls_lane_s32</b></b> (int32_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Dd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_lane_s16" type="checkbox"><label for="vqdmull_high_lane_s16"><div>int32x4_t <b><b>vqdmull_high_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_lane_s32" type="checkbox"><label for="vqdmull_high_lane_s32"><div>int64x2_t <b><b>vqdmull_high_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_laneq_s16" type="checkbox"><label for="vqdmull_laneq_s16"><div>int32x4_t <b><b>vqdmull_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_laneq_s32" type="checkbox"><label for="vqdmull_laneq_s32"><div>int64x2_t <b><b>vqdmull_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmullh_laneq_s16" type="checkbox"><label for="vqdmullh_laneq_s16"><div>int32_t <b><b>vqdmullh_laneq_s16</b></b> (int16_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Sd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulls_laneq_s32" type="checkbox"><label for="vqdmulls_laneq_s32"><div>int64_t <b><b>vqdmulls_laneq_s32</b></b> (int32_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Dd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_laneq_s16" type="checkbox"><label for="vqdmull_high_laneq_s16"><div>int32x4_t <b><b>vqdmull_high_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_laneq_s32" type="checkbox"><label for="vqdmull_high_laneq_s32"><div>int64x2_t <b><b>vqdmull_high_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_n_s16" type="checkbox"><label for="vqdmulh_n_s16"><div>int16x4_t <b><b>vqdmulh_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_n_s16" type="checkbox"><label for="vqdmulhq_n_s16"><div>int16x8_t <b><b>vqdmulhq_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_n_s32" type="checkbox"><label for="vqdmulh_n_s32"><div>int32x2_t <b><b>vqdmulh_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_n_s32" type="checkbox"><label for="vqdmulhq_n_s32"><div>int32x4_t <b><b>vqdmulhq_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_lane_s16" type="checkbox"><label for="vqdmulh_lane_s16"><div>int16x4_t <b><b>vqdmulh_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_lane_s16" type="checkbox"><label for="vqdmulhq_lane_s16"><div>int16x8_t <b><b>vqdmulhq_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_lane_s32" type="checkbox"><label for="vqdmulh_lane_s32"><div>int32x2_t <b><b>vqdmulh_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_lane_s32" type="checkbox"><label for="vqdmulhq_lane_s32"><div>int32x4_t <b><b>vqdmulhq_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhh_lane_s16" type="checkbox"><label for="vqdmulhh_lane_s16"><div>int16_t <b><b>vqdmulhh_lane_s16</b></b> (int16_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Hd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhs_lane_s32" type="checkbox"><label for="vqdmulhs_lane_s32"><div>int32_t <b><b>vqdmulhs_lane_s32</b></b> (int32_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Sd,Sn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_laneq_s16" type="checkbox"><label for="vqdmulh_laneq_s16"><div>int16x4_t <b><b>vqdmulh_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_laneq_s16" type="checkbox"><label for="vqdmulhq_laneq_s16"><div>int16x8_t <b><b>vqdmulhq_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_laneq_s32" type="checkbox"><label for="vqdmulh_laneq_s32"><div>int32x2_t <b><b>vqdmulh_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_laneq_s32" type="checkbox"><label for="vqdmulhq_laneq_s32"><div>int32x4_t <b><b>vqdmulhq_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhh_laneq_s16" type="checkbox"><label for="vqdmulhh_laneq_s16"><div>int16_t <b><b>vqdmulhh_laneq_s16</b></b> (int16_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Hd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhs_laneq_s32" type="checkbox"><label for="vqdmulhs_laneq_s32"><div>int32_t <b><b>vqdmulhs_laneq_s32</b></b> (int32_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Sd,Sn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_n_s16" type="checkbox"><label for="vqrdmulh_n_s16"><div>int16x4_t <b><b>vqrdmulh_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_n_s16" type="checkbox"><label for="vqrdmulhq_n_s16"><div>int16x8_t <b><b>vqrdmulhq_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_n_s32" type="checkbox"><label for="vqrdmulh_n_s32"><div>int32x2_t <b><b>vqrdmulh_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_n_s32" type="checkbox"><label for="vqrdmulhq_n_s32"><div>int32x4_t <b><b>vqrdmulhq_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_lane_s16" type="checkbox"><label for="vqrdmulh_lane_s16"><div>int16x4_t <b><b>vqrdmulh_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_lane_s16" type="checkbox"><label for="vqrdmulhq_lane_s16"><div>int16x8_t <b><b>vqrdmulhq_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_lane_s32" type="checkbox"><label for="vqrdmulh_lane_s32"><div>int32x2_t <b><b>vqrdmulh_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_lane_s32" type="checkbox"><label for="vqrdmulhq_lane_s32"><div>int32x4_t <b><b>vqrdmulhq_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhh_lane_s16" type="checkbox"><label for="vqrdmulhh_lane_s16"><div>int16_t <b><b>vqrdmulhh_lane_s16</b></b> (int16_t a, int16x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Hd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhs_lane_s32" type="checkbox"><label for="vqrdmulhs_lane_s32"><div>int32_t <b><b>vqrdmulhs_lane_s32</b></b> (int32_t a, int32x2_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_laneq_s16" type="checkbox"><label for="vqrdmulh_laneq_s16"><div>int16x4_t <b><b>vqrdmulh_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_laneq_s16" type="checkbox"><label for="vqrdmulhq_laneq_s16"><div>int16x8_t <b><b>vqrdmulhq_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_laneq_s32" type="checkbox"><label for="vqrdmulh_laneq_s32"><div>int32x2_t <b><b>vqrdmulh_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_laneq_s32" type="checkbox"><label for="vqrdmulhq_laneq_s32"><div>int32x4_t <b><b>vqrdmulhq_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhh_laneq_s16" type="checkbox"><label for="vqrdmulhh_laneq_s16"><div>int16_t <b><b>vqrdmulhh_laneq_s16</b></b> (int16_t a, int16x8_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Hd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhs_laneq_s32" type="checkbox"><label for="vqrdmulhs_laneq_s32"><div>int32_t <b><b>vqrdmulhs_laneq_s32</b></b> (int32_t a, int32x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_s16" type="checkbox"><label for="vmla_n_s16"><div>int16x4_t <b><b>vmla_n_s16</b></b> (int16x4_t a, int16x4_t b, int16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_s16" type="checkbox"><label for="vmlaq_n_s16"><div>int16x8_t <b><b>vmlaq_n_s16</b></b> (int16x8_t a, int16x8_t b, int16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_s32" type="checkbox"><label for="vmla_n_s32"><div>int32x2_t <b><b>vmla_n_s32</b></b> (int32x2_t a, int32x2_t b, int32_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_s32" type="checkbox"><label for="vmlaq_n_s32"><div>int32x4_t <b><b>vmlaq_n_s32</b></b> (int32x4_t a, int32x4_t b, int32_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_u16" type="checkbox"><label for="vmla_n_u16"><div>uint16x4_t <b><b>vmla_n_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_u16" type="checkbox"><label for="vmlaq_n_u16"><div>uint16x8_t <b><b>vmlaq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_u32" type="checkbox"><label for="vmla_n_u32"><div>uint32x2_t <b><b>vmla_n_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_u32" type="checkbox"><label for="vmlaq_n_u32"><div>uint32x4_t <b><b>vmlaq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_f32" type="checkbox"><label for="vmla_n_f32"><div>float32x2_t <b><b>vmla_n_f32</b></b> (float32x2_t a, float32x2_t b, float32_t c)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_f32" type="checkbox"><label for="vmlaq_n_f32"><div>float32x4_t <b><b>vmlaq_n_f32</b></b> (float32x4_t a, float32x4_t b, float32_t c)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_n_s16" type="checkbox"><label for="vmlal_n_s16"><div>int32x4_t <b><b>vmlal_n_s16</b></b> (int32x4_t a, int16x4_t b, int16_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_n_s32" type="checkbox"><label for="vmlal_n_s32"><div>int64x2_t <b><b>vmlal_n_s32</b></b> (int64x2_t a, int32x2_t b, int32_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_n_u16" type="checkbox"><label for="vmlal_n_u16"><div>uint32x4_t <b><b>vmlal_n_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_n_u32" type="checkbox"><label for="vmlal_n_u32"><div>uint64x2_t <b><b>vmlal_n_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_n_s16" type="checkbox"><label for="vmlal_high_n_s16"><div>int32x4_t <b><b>vmlal_high_n_s16</b></b> (int32x4_t a, int16x8_t b, int16_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_n_s32" type="checkbox"><label for="vmlal_high_n_s32"><div>int64x2_t <b><b>vmlal_high_n_s32</b></b> (int64x2_t a, int32x4_t b, int32_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_n_u16" type="checkbox"><label for="vmlal_high_n_u16"><div>uint32x4_t <b><b>vmlal_high_n_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_n_u32" type="checkbox"><label for="vmlal_high_n_u32"><div>uint64x2_t <b><b>vmlal_high_n_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_n_s16" type="checkbox"><label for="vqdmlal_n_s16"><div>int32x4_t <b><b>vqdmlal_n_s16</b></b> (int32x4_t a, int16x4_t b, int16_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_n_s32" type="checkbox"><label for="vqdmlal_n_s32"><div>int64x2_t <b><b>vqdmlal_n_s32</b></b> (int64x2_t a, int32x2_t b, int32_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_n_s16" type="checkbox"><label for="vqdmlal_high_n_s16"><div>int32x4_t <b><b>vqdmlal_high_n_s16</b></b> (int32x4_t a, int16x8_t b, int16_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_n_s32" type="checkbox"><label for="vqdmlal_high_n_s32"><div>int64x2_t <b><b>vqdmlal_high_n_s32</b></b> (int64x2_t a, int32x4_t b, int32_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_s16" type="checkbox"><label for="vmls_n_s16"><div>int16x4_t <b><b>vmls_n_s16</b></b> (int16x4_t a, int16x4_t b, int16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_s16" type="checkbox"><label for="vmlsq_n_s16"><div>int16x8_t <b><b>vmlsq_n_s16</b></b> (int16x8_t a, int16x8_t b, int16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_s32" type="checkbox"><label for="vmls_n_s32"><div>int32x2_t <b><b>vmls_n_s32</b></b> (int32x2_t a, int32x2_t b, int32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_s32" type="checkbox"><label for="vmlsq_n_s32"><div>int32x4_t <b><b>vmlsq_n_s32</b></b> (int32x4_t a, int32x4_t b, int32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_u16" type="checkbox"><label for="vmls_n_u16"><div>uint16x4_t <b><b>vmls_n_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_u16" type="checkbox"><label for="vmlsq_n_u16"><div>uint16x8_t <b><b>vmlsq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_u32" type="checkbox"><label for="vmls_n_u32"><div>uint32x2_t <b><b>vmls_n_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_u32" type="checkbox"><label for="vmlsq_n_u32"><div>uint32x4_t <b><b>vmlsq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_f32" type="checkbox"><label for="vmls_n_f32"><div>float32x2_t <b><b>vmls_n_f32</b></b> (float32x2_t a, float32x2_t b, float32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_f32" type="checkbox"><label for="vmlsq_n_f32"><div>float32x4_t <b><b>vmlsq_n_f32</b></b> (float32x4_t a, float32x4_t b, float32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_n_s16" type="checkbox"><label for="vmlsl_n_s16"><div>int32x4_t <b><b>vmlsl_n_s16</b></b> (int32x4_t a, int16x4_t b, int16_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_n_s32" type="checkbox"><label for="vmlsl_n_s32"><div>int64x2_t <b><b>vmlsl_n_s32</b></b> (int64x2_t a, int32x2_t b, int32_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_n_u16" type="checkbox"><label for="vmlsl_n_u16"><div>uint32x4_t <b><b>vmlsl_n_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_n_u32" type="checkbox"><label for="vmlsl_n_u32"><div>uint64x2_t <b><b>vmlsl_n_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_n_s16" type="checkbox"><label for="vmlsl_high_n_s16"><div>int32x4_t <b><b>vmlsl_high_n_s16</b></b> (int32x4_t a, int16x8_t b, int16_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_n_s32" type="checkbox"><label for="vmlsl_high_n_s32"><div>int64x2_t <b><b>vmlsl_high_n_s32</b></b> (int64x2_t a, int32x4_t b, int32_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_n_u16" type="checkbox"><label for="vmlsl_high_n_u16"><div>uint32x4_t <b><b>vmlsl_high_n_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_n_u32" type="checkbox"><label for="vmlsl_high_n_u32"><div>uint64x2_t <b><b>vmlsl_high_n_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_n_s16" type="checkbox"><label for="vqdmlsl_n_s16"><div>int32x4_t <b><b>vqdmlsl_n_s16</b></b> (int32x4_t a, int16x4_t b, int16_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_n_s32" type="checkbox"><label for="vqdmlsl_n_s32"><div>int64x2_t <b><b>vqdmlsl_n_s32</b></b> (int64x2_t a, int32x2_t b, int32_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_n_s16" type="checkbox"><label for="vqdmlsl_high_n_s16"><div>int32x4_t <b><b>vqdmlsl_high_n_s16</b></b> (int32x4_t a, int16x8_t b, int16_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_n_s32" type="checkbox"><label for="vqdmlsl_high_n_s32"><div>int64x2_t <b><b>vqdmlsl_high_n_s32</b></b> (int64x2_t a, int32x4_t b, int32_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabs_s8" type="checkbox"><label for="vabs_s8"><div>int8x8_t <b><b>vabs_s8</b></b> (int8x8_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_s8" type="checkbox"><label for="vabsq_s8"><div>int8x16_t <b><b>vabsq_s8</b></b> (int8x16_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabs_s16" type="checkbox"><label for="vabs_s16"><div>int16x4_t <b><b>vabs_s16</b></b> (int16x4_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_s16" type="checkbox"><label for="vabsq_s16"><div>int16x8_t <b><b>vabsq_s16</b></b> (int16x8_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabs_s32" type="checkbox"><label for="vabs_s32"><div>int32x2_t <b><b>vabs_s32</b></b> (int32x2_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_s32" type="checkbox"><label for="vabsq_s32"><div>int32x4_t <b><b>vabsq_s32</b></b> (int32x4_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabs_f32" type="checkbox"><label for="vabs_f32"><div>float32x2_t <b><b>vabs_f32</b></b> (float32x2_t a)<span class="right">Floating-point absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabs-vector-floating-point-absolute-value-vector">FABS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_f32" type="checkbox"><label for="vabsq_f32"><div>float32x4_t <b><b>vabsq_f32</b></b> (float32x4_t a)<span class="right">Floating-point absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabs-vector-floating-point-absolute-value-vector">FABS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabs_s64" type="checkbox"><label for="vabs_s64"><div>int64x1_t <b><b>vabs_s64</b></b> (int64x1_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabsd_s64" type="checkbox"><label for="vabsd_s64"><div>int64_t <b><b>vabsd_s64</b></b> (int64_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_s64" type="checkbox"><label for="vabsq_s64"><div>int64x2_t <b><b>vabsq_s64</b></b> (int64x2_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabs_f64" type="checkbox"><label for="vabs_f64"><div>float64x1_t <b><b>vabs_f64</b></b> (float64x1_t a)<span class="right">Floating-point absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabs-vector-floating-point-absolute-value-vector">FABS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_f64" type="checkbox"><label for="vabsq_f64"><div>float64x2_t <b><b>vabsq_f64</b></b> (float64x2_t a)<span class="right">Floating-point absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabs-vector-floating-point-absolute-value-vector">FABS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabs_s8" type="checkbox"><label for="vqabs_s8"><div>int8x8_t <b><b>vqabs_s8</b></b> (int8x8_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabsq_s8" type="checkbox"><label for="vqabsq_s8"><div>int8x16_t <b><b>vqabsq_s8</b></b> (int8x16_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabs_s16" type="checkbox"><label for="vqabs_s16"><div>int16x4_t <b><b>vqabs_s16</b></b> (int16x4_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabsq_s16" type="checkbox"><label for="vqabsq_s16"><div>int16x8_t <b><b>vqabsq_s16</b></b> (int16x8_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabs_s32" type="checkbox"><label for="vqabs_s32"><div>int32x2_t <b><b>vqabs_s32</b></b> (int32x2_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabsq_s32" type="checkbox"><label for="vqabsq_s32"><div>int32x4_t <b><b>vqabsq_s32</b></b> (int32x4_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabs_s64" type="checkbox"><label for="vqabs_s64"><div>int64x1_t <b><b>vqabs_s64</b></b> (int64x1_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabsq_s64" type="checkbox"><label for="vqabsq_s64"><div>int64x2_t <b><b>vqabsq_s64</b></b> (int64x2_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabsb_s8" type="checkbox"><label for="vqabsb_s8"><div>int8_t <b><b>vqabsb_s8</b></b> (int8_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Bd,Bn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabsh_s16" type="checkbox"><label for="vqabsh_s16"><div>int16_t <b><b>vqabsh_s16</b></b> (int16_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Hd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabss_s32" type="checkbox"><label for="vqabss_s32"><div>int32_t <b><b>vqabss_s32</b></b> (int32_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabsd_s64" type="checkbox"><label for="vqabsd_s64"><div>int64_t <b><b>vqabsd_s64</b></b> (int64_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vneg_s8" type="checkbox"><label for="vneg_s8"><div>int8x8_t <b><b>vneg_s8</b></b> (int8x8_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_s8" type="checkbox"><label for="vnegq_s8"><div>int8x16_t <b><b>vnegq_s8</b></b> (int8x16_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vneg_s16" type="checkbox"><label for="vneg_s16"><div>int16x4_t <b><b>vneg_s16</b></b> (int16x4_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_s16" type="checkbox"><label for="vnegq_s16"><div>int16x8_t <b><b>vnegq_s16</b></b> (int16x8_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vneg_s32" type="checkbox"><label for="vneg_s32"><div>int32x2_t <b><b>vneg_s32</b></b> (int32x2_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_s32" type="checkbox"><label for="vnegq_s32"><div>int32x4_t <b><b>vnegq_s32</b></b> (int32x4_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vneg_f32" type="checkbox"><label for="vneg_f32"><div>float32x2_t <b><b>vneg_f32</b></b> (float32x2_t a)<span class="right">Floating-point negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fneg-vector-floating-point-negate-vector">FNEG</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_f32" type="checkbox"><label for="vnegq_f32"><div>float32x4_t <b><b>vnegq_f32</b></b> (float32x4_t a)<span class="right">Floating-point negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fneg-vector-floating-point-negate-vector">FNEG</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vneg_s64" type="checkbox"><label for="vneg_s64"><div>int64x1_t <b><b>vneg_s64</b></b> (int64x1_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vnegd_s64" type="checkbox"><label for="vnegd_s64"><div>int64_t <b><b>vnegd_s64</b></b> (int64_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_s64" type="checkbox"><label for="vnegq_s64"><div>int64x2_t <b><b>vnegq_s64</b></b> (int64x2_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vneg_f64" type="checkbox"><label for="vneg_f64"><div>float64x1_t <b><b>vneg_f64</b></b> (float64x1_t a)<span class="right">Floating-point negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fneg-vector-floating-point-negate-vector">FNEG</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_f64" type="checkbox"><label for="vnegq_f64"><div>float64x2_t <b><b>vnegq_f64</b></b> (float64x2_t a)<span class="right">Floating-point negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fneg-vector-floating-point-negate-vector">FNEG</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqneg_s8" type="checkbox"><label for="vqneg_s8"><div>int8x8_t <b><b>vqneg_s8</b></b> (int8x8_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqnegq_s8" type="checkbox"><label for="vqnegq_s8"><div>int8x16_t <b><b>vqnegq_s8</b></b> (int8x16_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqneg_s16" type="checkbox"><label for="vqneg_s16"><div>int16x4_t <b><b>vqneg_s16</b></b> (int16x4_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqnegq_s16" type="checkbox"><label for="vqnegq_s16"><div>int16x8_t <b><b>vqnegq_s16</b></b> (int16x8_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqneg_s32" type="checkbox"><label for="vqneg_s32"><div>int32x2_t <b><b>vqneg_s32</b></b> (int32x2_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqnegq_s32" type="checkbox"><label for="vqnegq_s32"><div>int32x4_t <b><b>vqnegq_s32</b></b> (int32x4_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqneg_s64" type="checkbox"><label for="vqneg_s64"><div>int64x1_t <b><b>vqneg_s64</b></b> (int64x1_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegq_s64" type="checkbox"><label for="vqnegq_s64"><div>int64x2_t <b><b>vqnegq_s64</b></b> (int64x2_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegb_s8" type="checkbox"><label for="vqnegb_s8"><div>int8_t <b><b>vqnegb_s8</b></b> (int8_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Bd,Bn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegh_s16" type="checkbox"><label for="vqnegh_s16"><div>int16_t <b><b>vqnegh_s16</b></b> (int16_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Hd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegs_s32" type="checkbox"><label for="vqnegs_s32"><div>int32_t <b><b>vqnegs_s32</b></b> (int32_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegd_s64" type="checkbox"><label for="vqnegd_s64"><div>int64_t <b><b>vqnegd_s64</b></b> (int64_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcls_s8" type="checkbox"><label for="vcls_s8"><div>int8x8_t <b><b>vcls_s8</b></b> (int8x8_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclsq_s8" type="checkbox"><label for="vclsq_s8"><div>int8x16_t <b><b>vclsq_s8</b></b> (int8x16_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcls_s16" type="checkbox"><label for="vcls_s16"><div>int16x4_t <b><b>vcls_s16</b></b> (int16x4_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclsq_s16" type="checkbox"><label for="vclsq_s16"><div>int16x8_t <b><b>vclsq_s16</b></b> (int16x8_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcls_s32" type="checkbox"><label for="vcls_s32"><div>int32x2_t <b><b>vcls_s32</b></b> (int32x2_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclsq_s32" type="checkbox"><label for="vclsq_s32"><div>int32x4_t <b><b>vclsq_s32</b></b> (int32x4_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_s8" type="checkbox"><label for="vclz_s8"><div>int8x8_t <b><b>vclz_s8</b></b> (int8x8_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_s8" type="checkbox"><label for="vclzq_s8"><div>int8x16_t <b><b>vclzq_s8</b></b> (int8x16_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_s16" type="checkbox"><label for="vclz_s16"><div>int16x4_t <b><b>vclz_s16</b></b> (int16x4_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_s16" type="checkbox"><label for="vclzq_s16"><div>int16x8_t <b><b>vclzq_s16</b></b> (int16x8_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_s32" type="checkbox"><label for="vclz_s32"><div>int32x2_t <b><b>vclz_s32</b></b> (int32x2_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_s32" type="checkbox"><label for="vclzq_s32"><div>int32x4_t <b><b>vclzq_s32</b></b> (int32x4_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_u8" type="checkbox"><label for="vclz_u8"><div>uint8x8_t <b><b>vclz_u8</b></b> (uint8x8_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_u8" type="checkbox"><label for="vclzq_u8"><div>uint8x16_t <b><b>vclzq_u8</b></b> (uint8x16_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_u16" type="checkbox"><label for="vclz_u16"><div>uint16x4_t <b><b>vclz_u16</b></b> (uint16x4_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_u16" type="checkbox"><label for="vclzq_u16"><div>uint16x8_t <b><b>vclzq_u16</b></b> (uint16x8_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_u32" type="checkbox"><label for="vclz_u32"><div>uint32x2_t <b><b>vclz_u32</b></b> (uint32x2_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_u32" type="checkbox"><label for="vclzq_u32"><div>uint32x4_t <b><b>vclzq_u32</b></b> (uint32x4_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcnt_s8" type="checkbox"><label for="vcnt_s8"><div>int8x8_t <b><b>vcnt_s8</b></b> (int8x8_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcntq_s8" type="checkbox"><label for="vcntq_s8"><div>int8x16_t <b><b>vcntq_s8</b></b> (int8x16_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcnt_u8" type="checkbox"><label for="vcnt_u8"><div>uint8x8_t <b><b>vcnt_u8</b></b> (uint8x8_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcntq_u8" type="checkbox"><label for="vcntq_u8"><div>uint8x16_t <b><b>vcntq_u8</b></b> (uint8x16_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcnt_p8" type="checkbox"><label for="vcnt_p8"><div>poly8x8_t <b><b>vcnt_p8</b></b> (poly8x8_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcntq_p8" type="checkbox"><label for="vcntq_p8"><div>poly8x16_t <b><b>vcntq_p8</b></b> (poly8x16_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpe_u32" type="checkbox"><label for="vrecpe_u32"><div>uint32x2_t <b><b>vrecpe_u32</b></b> (uint32x2_t a)<span class="right">Unsigned reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Reciprocal Estimate. This instruction reads each vector element from the source SIMD&amp;FP register, calculates an approximate inverse for the unsigned integer value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urecpe-unsigned-reciprocal-estimate">URECPE</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(32) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 32];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedRecipEstimate.1" title="function: bits(N) UnsignedRecipEstimate(bits(N) operand)">UnsignedRecipEstimate</a>(element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpeq_u32" type="checkbox"><label for="vrecpeq_u32"><div>uint32x4_t <b><b>vrecpeq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Reciprocal Estimate. This instruction reads each vector element from the source SIMD&amp;FP register, calculates an approximate inverse for the unsigned integer value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urecpe-unsigned-reciprocal-estimate">URECPE</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(32) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 32];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedRecipEstimate.1" title="function: bits(N) UnsignedRecipEstimate(bits(N) operand)">UnsignedRecipEstimate</a>(element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpe_f32" type="checkbox"><label for="vrecpe_f32"><div>float32x2_t <b><b>vrecpe_f32</b></b> (float32x2_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpeq_f32" type="checkbox"><label for="vrecpeq_f32"><div>float32x4_t <b><b>vrecpeq_f32</b></b> (float32x4_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpe_f64" type="checkbox"><label for="vrecpe_f64"><div>float64x1_t <b><b>vrecpe_f64</b></b> (float64x1_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpeq_f64" type="checkbox"><label for="vrecpeq_f64"><div>float64x2_t <b><b>vrecpeq_f64</b></b> (float64x2_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpes_f32" type="checkbox"><label for="vrecpes_f32"><div>float32_t <b><b>vrecpes_f32</b></b> (float32_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecped_f64" type="checkbox"><label for="vrecped_f64"><div>float64_t <b><b>vrecped_f64</b></b> (float64_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecps_f32" type="checkbox"><label for="vrecps_f32"><div>float32x2_t <b><b>vrecps_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpsq_f32" type="checkbox"><label for="vrecpsq_f32"><div>float32x4_t <b><b>vrecpsq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecps_f64" type="checkbox"><label for="vrecps_f64"><div>float64x1_t <b><b>vrecps_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpsq_f64" type="checkbox"><label for="vrecpsq_f64"><div>float64x2_t <b><b>vrecpsq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpss_f32" type="checkbox"><label for="vrecpss_f32"><div>float32_t <b><b>vrecpss_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpsd_f64" type="checkbox"><label for="vrecpsd_f64"><div>float64_t <b><b>vrecpsd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqrt_f32" type="checkbox"><label for="vsqrt_f32"><div>float32x2_t <b><b>vsqrt_f32</b></b> (float32x2_t a)<span class="right">Floating-point square root</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsqrt-vector-floating-point-square-root-vector">FSQRT</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSqrt.2" title="function: bits(N) FPSqrt(bits(N) op, FPCRType fpcr)">FPSqrt</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqrtq_f32" type="checkbox"><label for="vsqrtq_f32"><div>float32x4_t <b><b>vsqrtq_f32</b></b> (float32x4_t a)<span class="right">Floating-point square root</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsqrt-vector-floating-point-square-root-vector">FSQRT</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSqrt.2" title="function: bits(N) FPSqrt(bits(N) op, FPCRType fpcr)">FPSqrt</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqrt_f64" type="checkbox"><label for="vsqrt_f64"><div>float64x1_t <b><b>vsqrt_f64</b></b> (float64x1_t a)<span class="right">Floating-point square root</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsqrt-vector-floating-point-square-root-vector">FSQRT</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSqrt.2" title="function: bits(N) FPSqrt(bits(N) op, FPCRType fpcr)">FPSqrt</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqrtq_f64" type="checkbox"><label for="vsqrtq_f64"><div>float64x2_t <b><b>vsqrtq_f64</b></b> (float64x2_t a)<span class="right">Floating-point square root</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsqrt-vector-floating-point-square-root-vector">FSQRT</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSqrt.2" title="function: bits(N) FPSqrt(bits(N) op, FPCRType fpcr)">FPSqrt</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrte_u32" type="checkbox"><label for="vrsqrte_u32"><div>uint32x2_t <b><b>vrsqrte_u32</b></b> (uint32x2_t a)<span class="right">Unsigned reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Reciprocal Square Root Estimate. This instruction reads each vector element from the source SIMD&amp;FP register, calculates an approximate inverse square root for each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursqrte-unsigned-reciprocal-square-root-estimate">URSQRTE</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(32) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 32];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedRSqrtEstimate.1" title="function: bits(N) UnsignedRSqrtEstimate(bits(N) operand)">UnsignedRSqrtEstimate</a>(element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrteq_u32" type="checkbox"><label for="vrsqrteq_u32"><div>uint32x4_t <b><b>vrsqrteq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Reciprocal Square Root Estimate. This instruction reads each vector element from the source SIMD&amp;FP register, calculates an approximate inverse square root for each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursqrte-unsigned-reciprocal-square-root-estimate">URSQRTE</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(32) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 32];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedRSqrtEstimate.1" title="function: bits(N) UnsignedRSqrtEstimate(bits(N) operand)">UnsignedRSqrtEstimate</a>(element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrte_f32" type="checkbox"><label for="vrsqrte_f32"><div>float32x2_t <b><b>vrsqrte_f32</b></b> (float32x2_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrteq_f32" type="checkbox"><label for="vrsqrteq_f32"><div>float32x4_t <b><b>vrsqrteq_f32</b></b> (float32x4_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrte_f64" type="checkbox"><label for="vrsqrte_f64"><div>float64x1_t <b><b>vrsqrte_f64</b></b> (float64x1_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrteq_f64" type="checkbox"><label for="vrsqrteq_f64"><div>float64x2_t <b><b>vrsqrteq_f64</b></b> (float64x2_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtes_f32" type="checkbox"><label for="vrsqrtes_f32"><div>float32_t <b><b>vrsqrtes_f32</b></b> (float32_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrted_f64" type="checkbox"><label for="vrsqrted_f64"><div>float64_t <b><b>vrsqrted_f64</b></b> (float64_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrts_f32" type="checkbox"><label for="vrsqrts_f32"><div>float32x2_t <b><b>vrsqrts_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtsq_f32" type="checkbox"><label for="vrsqrtsq_f32"><div>float32x4_t <b><b>vrsqrtsq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrts_f64" type="checkbox"><label for="vrsqrts_f64"><div>float64x1_t <b><b>vrsqrts_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtsq_f64" type="checkbox"><label for="vrsqrtsq_f64"><div>float64x2_t <b><b>vrsqrtsq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtss_f32" type="checkbox"><label for="vrsqrtss_f32"><div>float32_t <b><b>vrsqrtss_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtsd_f64" type="checkbox"><label for="vrsqrtsd_f64"><div>float64_t <b><b>vrsqrtsd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_s8" type="checkbox"><label for="vmvn_s8"><div>int8x8_t <b><b>vmvn_s8</b></b> (int8x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_s8" type="checkbox"><label for="vmvnq_s8"><div>int8x16_t <b><b>vmvnq_s8</b></b> (int8x16_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_s16" type="checkbox"><label for="vmvn_s16"><div>int16x4_t <b><b>vmvn_s16</b></b> (int16x4_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_s16" type="checkbox"><label for="vmvnq_s16"><div>int16x8_t <b><b>vmvnq_s16</b></b> (int16x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_s32" type="checkbox"><label for="vmvn_s32"><div>int32x2_t <b><b>vmvn_s32</b></b> (int32x2_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_s32" type="checkbox"><label for="vmvnq_s32"><div>int32x4_t <b><b>vmvnq_s32</b></b> (int32x4_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_u8" type="checkbox"><label for="vmvn_u8"><div>uint8x8_t <b><b>vmvn_u8</b></b> (uint8x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_u8" type="checkbox"><label for="vmvnq_u8"><div>uint8x16_t <b><b>vmvnq_u8</b></b> (uint8x16_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_u16" type="checkbox"><label for="vmvn_u16"><div>uint16x4_t <b><b>vmvn_u16</b></b> (uint16x4_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_u16" type="checkbox"><label for="vmvnq_u16"><div>uint16x8_t <b><b>vmvnq_u16</b></b> (uint16x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_u32" type="checkbox"><label for="vmvn_u32"><div>uint32x2_t <b><b>vmvn_u32</b></b> (uint32x2_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_u32" type="checkbox"><label for="vmvnq_u32"><div>uint32x4_t <b><b>vmvnq_u32</b></b> (uint32x4_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_p8" type="checkbox"><label for="vmvn_p8"><div>poly8x8_t <b><b>vmvn_p8</b></b> (poly8x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_p8" type="checkbox"><label for="vmvnq_p8"><div>poly8x16_t <b><b>vmvnq_p8</b></b> (poly8x16_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_s8" type="checkbox"><label for="vand_s8"><div>int8x8_t <b><b>vand_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_s8" type="checkbox"><label for="vandq_s8"><div>int8x16_t <b><b>vandq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_s16" type="checkbox"><label for="vand_s16"><div>int16x4_t <b><b>vand_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_s16" type="checkbox"><label for="vandq_s16"><div>int16x8_t <b><b>vandq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_s32" type="checkbox"><label for="vand_s32"><div>int32x2_t <b><b>vand_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_s32" type="checkbox"><label for="vandq_s32"><div>int32x4_t <b><b>vandq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_s64" type="checkbox"><label for="vand_s64"><div>int64x1_t <b><b>vand_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_s64" type="checkbox"><label for="vandq_s64"><div>int64x2_t <b><b>vandq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_u8" type="checkbox"><label for="vand_u8"><div>uint8x8_t <b><b>vand_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_u8" type="checkbox"><label for="vandq_u8"><div>uint8x16_t <b><b>vandq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_u16" type="checkbox"><label for="vand_u16"><div>uint16x4_t <b><b>vand_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_u16" type="checkbox"><label for="vandq_u16"><div>uint16x8_t <b><b>vandq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_u32" type="checkbox"><label for="vand_u32"><div>uint32x2_t <b><b>vand_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_u32" type="checkbox"><label for="vandq_u32"><div>uint32x4_t <b><b>vandq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_u64" type="checkbox"><label for="vand_u64"><div>uint64x1_t <b><b>vand_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_u64" type="checkbox"><label for="vandq_u64"><div>uint64x2_t <b><b>vandq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_s8" type="checkbox"><label for="vorr_s8"><div>int8x8_t <b><b>vorr_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_s8" type="checkbox"><label for="vorrq_s8"><div>int8x16_t <b><b>vorrq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_s16" type="checkbox"><label for="vorr_s16"><div>int16x4_t <b><b>vorr_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_s16" type="checkbox"><label for="vorrq_s16"><div>int16x8_t <b><b>vorrq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_s32" type="checkbox"><label for="vorr_s32"><div>int32x2_t <b><b>vorr_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_s32" type="checkbox"><label for="vorrq_s32"><div>int32x4_t <b><b>vorrq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_s64" type="checkbox"><label for="vorr_s64"><div>int64x1_t <b><b>vorr_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_s64" type="checkbox"><label for="vorrq_s64"><div>int64x2_t <b><b>vorrq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_u8" type="checkbox"><label for="vorr_u8"><div>uint8x8_t <b><b>vorr_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_u8" type="checkbox"><label for="vorrq_u8"><div>uint8x16_t <b><b>vorrq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_u16" type="checkbox"><label for="vorr_u16"><div>uint16x4_t <b><b>vorr_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_u16" type="checkbox"><label for="vorrq_u16"><div>uint16x8_t <b><b>vorrq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_u32" type="checkbox"><label for="vorr_u32"><div>uint32x2_t <b><b>vorr_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_u32" type="checkbox"><label for="vorrq_u32"><div>uint32x4_t <b><b>vorrq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_u64" type="checkbox"><label for="vorr_u64"><div>uint64x1_t <b><b>vorr_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_u64" type="checkbox"><label for="vorrq_u64"><div>uint64x2_t <b><b>vorrq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_s8" type="checkbox"><label for="veor_s8"><div>int8x8_t <b><b>veor_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_s8" type="checkbox"><label for="veorq_s8"><div>int8x16_t <b><b>veorq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_s16" type="checkbox"><label for="veor_s16"><div>int16x4_t <b><b>veor_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_s16" type="checkbox"><label for="veorq_s16"><div>int16x8_t <b><b>veorq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_s32" type="checkbox"><label for="veor_s32"><div>int32x2_t <b><b>veor_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_s32" type="checkbox"><label for="veorq_s32"><div>int32x4_t <b><b>veorq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_s64" type="checkbox"><label for="veor_s64"><div>int64x1_t <b><b>veor_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_s64" type="checkbox"><label for="veorq_s64"><div>int64x2_t <b><b>veorq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_u8" type="checkbox"><label for="veor_u8"><div>uint8x8_t <b><b>veor_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_u8" type="checkbox"><label for="veorq_u8"><div>uint8x16_t <b><b>veorq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_u16" type="checkbox"><label for="veor_u16"><div>uint16x4_t <b><b>veor_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_u16" type="checkbox"><label for="veorq_u16"><div>uint16x8_t <b><b>veorq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_u32" type="checkbox"><label for="veor_u32"><div>uint32x2_t <b><b>veor_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_u32" type="checkbox"><label for="veorq_u32"><div>uint32x4_t <b><b>veorq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_u64" type="checkbox"><label for="veor_u64"><div>uint64x1_t <b><b>veor_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_u64" type="checkbox"><label for="veorq_u64"><div>uint64x2_t <b><b>veorq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_s8" type="checkbox"><label for="vbic_s8"><div>int8x8_t <b><b>vbic_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_s8" type="checkbox"><label for="vbicq_s8"><div>int8x16_t <b><b>vbicq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_s16" type="checkbox"><label for="vbic_s16"><div>int16x4_t <b><b>vbic_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_s16" type="checkbox"><label for="vbicq_s16"><div>int16x8_t <b><b>vbicq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_s32" type="checkbox"><label for="vbic_s32"><div>int32x2_t <b><b>vbic_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_s32" type="checkbox"><label for="vbicq_s32"><div>int32x4_t <b><b>vbicq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_s64" type="checkbox"><label for="vbic_s64"><div>int64x1_t <b><b>vbic_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_s64" type="checkbox"><label for="vbicq_s64"><div>int64x2_t <b><b>vbicq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_u8" type="checkbox"><label for="vbic_u8"><div>uint8x8_t <b><b>vbic_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_u8" type="checkbox"><label for="vbicq_u8"><div>uint8x16_t <b><b>vbicq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_u16" type="checkbox"><label for="vbic_u16"><div>uint16x4_t <b><b>vbic_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_u16" type="checkbox"><label for="vbicq_u16"><div>uint16x8_t <b><b>vbicq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_u32" type="checkbox"><label for="vbic_u32"><div>uint32x2_t <b><b>vbic_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_u32" type="checkbox"><label for="vbicq_u32"><div>uint32x4_t <b><b>vbicq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_u64" type="checkbox"><label for="vbic_u64"><div>uint64x1_t <b><b>vbic_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_u64" type="checkbox"><label for="vbicq_u64"><div>uint64x2_t <b><b>vbicq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_s8" type="checkbox"><label for="vorn_s8"><div>int8x8_t <b><b>vorn_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_s8" type="checkbox"><label for="vornq_s8"><div>int8x16_t <b><b>vornq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_s16" type="checkbox"><label for="vorn_s16"><div>int16x4_t <b><b>vorn_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_s16" type="checkbox"><label for="vornq_s16"><div>int16x8_t <b><b>vornq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_s32" type="checkbox"><label for="vorn_s32"><div>int32x2_t <b><b>vorn_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_s32" type="checkbox"><label for="vornq_s32"><div>int32x4_t <b><b>vornq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_s64" type="checkbox"><label for="vorn_s64"><div>int64x1_t <b><b>vorn_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_s64" type="checkbox"><label for="vornq_s64"><div>int64x2_t <b><b>vornq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_u8" type="checkbox"><label for="vorn_u8"><div>uint8x8_t <b><b>vorn_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_u8" type="checkbox"><label for="vornq_u8"><div>uint8x16_t <b><b>vornq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_u16" type="checkbox"><label for="vorn_u16"><div>uint16x4_t <b><b>vorn_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_u16" type="checkbox"><label for="vornq_u16"><div>uint16x8_t <b><b>vornq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_u32" type="checkbox"><label for="vorn_u32"><div>uint32x2_t <b><b>vorn_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_u32" type="checkbox"><label for="vornq_u32"><div>uint32x4_t <b><b>vornq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_u64" type="checkbox"><label for="vorn_u64"><div>uint64x1_t <b><b>vorn_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_u64" type="checkbox"><label for="vornq_u64"><div>uint64x2_t <b><b>vornq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_s8" type="checkbox"><label for="vbsl_s8"><div>int8x8_t <b><b>vbsl_s8</b></b> (uint8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_s8" type="checkbox"><label for="vbslq_s8"><div>int8x16_t <b><b>vbslq_s8</b></b> (uint8x16_t a, int8x16_t b, int8x16_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_s16" type="checkbox"><label for="vbsl_s16"><div>int16x4_t <b><b>vbsl_s16</b></b> (uint16x4_t a, int16x4_t b, int16x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_s16" type="checkbox"><label for="vbslq_s16"><div>int16x8_t <b><b>vbslq_s16</b></b> (uint16x8_t a, int16x8_t b, int16x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_s32" type="checkbox"><label for="vbsl_s32"><div>int32x2_t <b><b>vbsl_s32</b></b> (uint32x2_t a, int32x2_t b, int32x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_s32" type="checkbox"><label for="vbslq_s32"><div>int32x4_t <b><b>vbslq_s32</b></b> (uint32x4_t a, int32x4_t b, int32x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_s64" type="checkbox"><label for="vbsl_s64"><div>int64x1_t <b><b>vbsl_s64</b></b> (uint64x1_t a, int64x1_t b, int64x1_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_s64" type="checkbox"><label for="vbslq_s64"><div>int64x2_t <b><b>vbslq_s64</b></b> (uint64x2_t a, int64x2_t b, int64x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_u8" type="checkbox"><label for="vbsl_u8"><div>uint8x8_t <b><b>vbsl_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_u8" type="checkbox"><label for="vbslq_u8"><div>uint8x16_t <b><b>vbslq_u8</b></b> (uint8x16_t a, uint8x16_t b, uint8x16_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_u16" type="checkbox"><label for="vbsl_u16"><div>uint16x4_t <b><b>vbsl_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_u16" type="checkbox"><label for="vbslq_u16"><div>uint16x8_t <b><b>vbslq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_u32" type="checkbox"><label for="vbsl_u32"><div>uint32x2_t <b><b>vbsl_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_u32" type="checkbox"><label for="vbslq_u32"><div>uint32x4_t <b><b>vbslq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_u64" type="checkbox"><label for="vbsl_u64"><div>uint64x1_t <b><b>vbsl_u64</b></b> (uint64x1_t a, uint64x1_t b, uint64x1_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_u64" type="checkbox"><label for="vbslq_u64"><div>uint64x2_t <b><b>vbslq_u64</b></b> (uint64x2_t a, uint64x2_t b, uint64x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_p64" type="checkbox"><label for="vbsl_p64"><div>poly64x1_t <b><b>vbsl_p64</b></b> (poly64x1_t a, poly64x1_t b, poly64x1_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_p64" type="checkbox"><label for="vbslq_p64"><div>poly64x2_t <b><b>vbslq_p64</b></b> (poly64x2_t a, poly64x2_t b, poly64x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_f32" type="checkbox"><label for="vbsl_f32"><div>float32x2_t <b><b>vbsl_f32</b></b> (uint32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_f32" type="checkbox"><label for="vbslq_f32"><div>float32x4_t <b><b>vbslq_f32</b></b> (uint32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_p8" type="checkbox"><label for="vbsl_p8"><div>poly8x8_t <b><b>vbsl_p8</b></b> (uint8x8_t a, poly8x8_t b, poly8x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_p8" type="checkbox"><label for="vbslq_p8"><div>poly8x16_t <b><b>vbslq_p8</b></b> (uint8x16_t a, poly8x16_t b, poly8x16_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_p16" type="checkbox"><label for="vbsl_p16"><div>poly16x4_t <b><b>vbsl_p16</b></b> (uint16x4_t a, poly16x4_t b, poly16x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_p16" type="checkbox"><label for="vbslq_p16"><div>poly16x8_t <b><b>vbslq_p16</b></b> (uint16x8_t a, poly16x8_t b, poly16x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_f64" type="checkbox"><label for="vbsl_f64"><div>float64x1_t <b><b>vbsl_f64</b></b> (uint64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_f64" type="checkbox"><label for="vbslq_f64"><div>float64x2_t <b><b>vbslq_f64</b></b> (uint64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_s8" type="checkbox"><label for="vcopy_lane_s8"><div>int8x8_t <b><b>vcopy_lane_s8</b></b> (int8x8_t a, const int lane1, int8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_s8" type="checkbox"><label for="vcopyq_lane_s8"><div>int8x16_t <b><b>vcopyq_lane_s8</b></b> (int8x16_t a, const int lane1, int8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+0 &lt;&lt; lane1 &lt;&lt; 15 <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_s16" type="checkbox"><label for="vcopy_lane_s16"><div>int16x4_t <b><b>vcopy_lane_s16</b></b> (int16x4_t a, const int lane1, int16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_s16" type="checkbox"><label for="vcopyq_lane_s16"><div>int16x8_t <b><b>vcopyq_lane_s16</b></b> (int16x8_t a, const int lane1, int16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_s32" type="checkbox"><label for="vcopy_lane_s32"><div>int32x2_t <b><b>vcopy_lane_s32</b></b> (int32x2_t a, const int lane1, int32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_s32" type="checkbox"><label for="vcopyq_lane_s32"><div>int32x4_t <b><b>vcopyq_lane_s32</b></b> (int32x4_t a, const int lane1, int32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_s64" type="checkbox"><label for="vcopy_lane_s64"><div>int64x1_t <b><b>vcopy_lane_s64</b></b> (int64x1_t a, const int lane1, int64x1_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_s64" type="checkbox"><label for="vcopyq_lane_s64"><div>int64x2_t <b><b>vcopyq_lane_s64</b></b> (int64x2_t a, const int lane1, int64x1_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_u8" type="checkbox"><label for="vcopy_lane_u8"><div>uint8x8_t <b><b>vcopy_lane_u8</b></b> (uint8x8_t a, const int lane1, uint8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_u8" type="checkbox"><label for="vcopyq_lane_u8"><div>uint8x16_t <b><b>vcopyq_lane_u8</b></b> (uint8x16_t a, const int lane1, uint8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+0 &lt;&lt; lane1 &lt;&lt; 15 <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_u16" type="checkbox"><label for="vcopy_lane_u16"><div>uint16x4_t <b><b>vcopy_lane_u16</b></b> (uint16x4_t a, const int lane1, uint16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_u16" type="checkbox"><label for="vcopyq_lane_u16"><div>uint16x8_t <b><b>vcopyq_lane_u16</b></b> (uint16x8_t a, const int lane1, uint16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_u32" type="checkbox"><label for="vcopy_lane_u32"><div>uint32x2_t <b><b>vcopy_lane_u32</b></b> (uint32x2_t a, const int lane1, uint32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_u32" type="checkbox"><label for="vcopyq_lane_u32"><div>uint32x4_t <b><b>vcopyq_lane_u32</b></b> (uint32x4_t a, const int lane1, uint32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_u64" type="checkbox"><label for="vcopy_lane_u64"><div>uint64x1_t <b><b>vcopy_lane_u64</b></b> (uint64x1_t a, const int lane1, uint64x1_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_u64" type="checkbox"><label for="vcopyq_lane_u64"><div>uint64x2_t <b><b>vcopyq_lane_u64</b></b> (uint64x2_t a, const int lane1, uint64x1_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_p64" type="checkbox"><label for="vcopy_lane_p64"><div>poly64x1_t <b><b>vcopy_lane_p64</b></b> (poly64x1_t a, const int lane1, poly64x1_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_p64" type="checkbox"><label for="vcopyq_lane_p64"><div>poly64x2_t <b><b>vcopyq_lane_p64</b></b> (poly64x2_t a, const int lane1, poly64x1_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_f32" type="checkbox"><label for="vcopy_lane_f32"><div>float32x2_t <b><b>vcopy_lane_f32</b></b> (float32x2_t a, const int lane1, float32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_f32" type="checkbox"><label for="vcopyq_lane_f32"><div>float32x4_t <b><b>vcopyq_lane_f32</b></b> (float32x4_t a, const int lane1, float32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_f64" type="checkbox"><label for="vcopy_lane_f64"><div>float64x1_t <b><b>vcopy_lane_f64</b></b> (float64x1_t a, const int lane1, float64x1_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_f64" type="checkbox"><label for="vcopyq_lane_f64"><div>float64x2_t <b><b>vcopyq_lane_f64</b></b> (float64x2_t a, const int lane1, float64x1_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_p8" type="checkbox"><label for="vcopy_lane_p8"><div>poly8x8_t <b><b>vcopy_lane_p8</b></b> (poly8x8_t a, const int lane1, poly8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_p8" type="checkbox"><label for="vcopyq_lane_p8"><div>poly8x16_t <b><b>vcopyq_lane_p8</b></b> (poly8x16_t a, const int lane1, poly8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+0 &lt;&lt; lane1 &lt;&lt; 15 <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_p16" type="checkbox"><label for="vcopy_lane_p16"><div>poly16x4_t <b><b>vcopy_lane_p16</b></b> (poly16x4_t a, const int lane1, poly16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_p16" type="checkbox"><label for="vcopyq_lane_p16"><div>poly16x8_t <b><b>vcopyq_lane_p16</b></b> (poly16x8_t a, const int lane1, poly16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_s8" type="checkbox"><label for="vcopy_laneq_s8"><div>int8x8_t <b><b>vcopy_laneq_s8</b></b> (int8x8_t a, const int lane1, int8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_s8" type="checkbox"><label for="vcopyq_laneq_s8"><div>int8x16_t <b><b>vcopyq_laneq_s8</b></b> (int8x16_t a, const int lane1, int8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+0 &lt;&lt; lane1 &lt;&lt; 15 <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_s16" type="checkbox"><label for="vcopy_laneq_s16"><div>int16x4_t <b><b>vcopy_laneq_s16</b></b> (int16x4_t a, const int lane1, int16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_s16" type="checkbox"><label for="vcopyq_laneq_s16"><div>int16x8_t <b><b>vcopyq_laneq_s16</b></b> (int16x8_t a, const int lane1, int16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_s32" type="checkbox"><label for="vcopy_laneq_s32"><div>int32x2_t <b><b>vcopy_laneq_s32</b></b> (int32x2_t a, const int lane1, int32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_s32" type="checkbox"><label for="vcopyq_laneq_s32"><div>int32x4_t <b><b>vcopyq_laneq_s32</b></b> (int32x4_t a, const int lane1, int32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_s64" type="checkbox"><label for="vcopy_laneq_s64"><div>int64x1_t <b><b>vcopy_laneq_s64</b></b> (int64x1_t a, const int lane1, int64x2_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_s64" type="checkbox"><label for="vcopyq_laneq_s64"><div>int64x2_t <b><b>vcopyq_laneq_s64</b></b> (int64x2_t a, const int lane1, int64x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_u8" type="checkbox"><label for="vcopy_laneq_u8"><div>uint8x8_t <b><b>vcopy_laneq_u8</b></b> (uint8x8_t a, const int lane1, uint8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_u8" type="checkbox"><label for="vcopyq_laneq_u8"><div>uint8x16_t <b><b>vcopyq_laneq_u8</b></b> (uint8x16_t a, const int lane1, uint8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+0 &lt;&lt; lane1 &lt;&lt; 15 <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_u16" type="checkbox"><label for="vcopy_laneq_u16"><div>uint16x4_t <b><b>vcopy_laneq_u16</b></b> (uint16x4_t a, const int lane1, uint16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_u16" type="checkbox"><label for="vcopyq_laneq_u16"><div>uint16x8_t <b><b>vcopyq_laneq_u16</b></b> (uint16x8_t a, const int lane1, uint16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_u32" type="checkbox"><label for="vcopy_laneq_u32"><div>uint32x2_t <b><b>vcopy_laneq_u32</b></b> (uint32x2_t a, const int lane1, uint32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_u32" type="checkbox"><label for="vcopyq_laneq_u32"><div>uint32x4_t <b><b>vcopyq_laneq_u32</b></b> (uint32x4_t a, const int lane1, uint32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_u64" type="checkbox"><label for="vcopy_laneq_u64"><div>uint64x1_t <b><b>vcopy_laneq_u64</b></b> (uint64x1_t a, const int lane1, uint64x2_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_u64" type="checkbox"><label for="vcopyq_laneq_u64"><div>uint64x2_t <b><b>vcopyq_laneq_u64</b></b> (uint64x2_t a, const int lane1, uint64x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_p64" type="checkbox"><label for="vcopy_laneq_p64"><div>poly64x1_t <b><b>vcopy_laneq_p64</b></b> (poly64x1_t a, const int lane1, poly64x2_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_p64" type="checkbox"><label for="vcopyq_laneq_p64"><div>poly64x2_t <b><b>vcopyq_laneq_p64</b></b> (poly64x2_t a, const int lane1, poly64x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_f32" type="checkbox"><label for="vcopy_laneq_f32"><div>float32x2_t <b><b>vcopy_laneq_f32</b></b> (float32x2_t a, const int lane1, float32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_f32" type="checkbox"><label for="vcopyq_laneq_f32"><div>float32x4_t <b><b>vcopyq_laneq_f32</b></b> (float32x4_t a, const int lane1, float32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_f64" type="checkbox"><label for="vcopy_laneq_f64"><div>float64x1_t <b><b>vcopy_laneq_f64</b></b> (float64x1_t a, const int lane1, float64x2_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_f64" type="checkbox"><label for="vcopyq_laneq_f64"><div>float64x2_t <b><b>vcopyq_laneq_f64</b></b> (float64x2_t a, const int lane1, float64x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_p8" type="checkbox"><label for="vcopy_laneq_p8"><div>poly8x8_t <b><b>vcopy_laneq_p8</b></b> (poly8x8_t a, const int lane1, poly8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_p8" type="checkbox"><label for="vcopyq_laneq_p8"><div>poly8x16_t <b><b>vcopyq_laneq_p8</b></b> (poly8x16_t a, const int lane1, poly8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+0 &lt;&lt; lane1 &lt;&lt; 15 <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_p16" type="checkbox"><label for="vcopy_laneq_p16"><div>poly16x4_t <b><b>vcopy_laneq_p16</b></b> (poly16x4_t a, const int lane1, poly16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_p16" type="checkbox"><label for="vcopyq_laneq_p16"><div>poly16x8_t <b><b>vcopyq_laneq_p16</b></b> (poly16x8_t a, const int lane1, poly16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbit_s8" type="checkbox"><label for="vrbit_s8"><div>int8x8_t <b><b>vrbit_s8</b></b> (int8x8_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    for i = 0 to esize-1
+        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbitq_s8" type="checkbox"><label for="vrbitq_s8"><div>int8x16_t <b><b>vrbitq_s8</b></b> (int8x16_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    for i = 0 to esize-1
+        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbit_u8" type="checkbox"><label for="vrbit_u8"><div>uint8x8_t <b><b>vrbit_u8</b></b> (uint8x8_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    for i = 0 to esize-1
+        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbitq_u8" type="checkbox"><label for="vrbitq_u8"><div>uint8x16_t <b><b>vrbitq_u8</b></b> (uint8x16_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    for i = 0 to esize-1
+        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbit_p8" type="checkbox"><label for="vrbit_p8"><div>poly8x8_t <b><b>vrbit_p8</b></b> (poly8x8_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    for i = 0 to esize-1
+        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbitq_p8" type="checkbox"><label for="vrbitq_p8"><div>poly8x16_t <b><b>vrbitq_p8</b></b> (poly8x16_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    for i = 0 to esize-1
+        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_s8" type="checkbox"><label for="vcreate_s8"><div>int8x8_t <b><b>vcreate_s8</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_s16" type="checkbox"><label for="vcreate_s16"><div>int16x4_t <b><b>vcreate_s16</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_s32" type="checkbox"><label for="vcreate_s32"><div>int32x2_t <b><b>vcreate_s32</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_s64" type="checkbox"><label for="vcreate_s64"><div>int64x1_t <b><b>vcreate_s64</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_u8" type="checkbox"><label for="vcreate_u8"><div>uint8x8_t <b><b>vcreate_u8</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_u16" type="checkbox"><label for="vcreate_u16"><div>uint16x4_t <b><b>vcreate_u16</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_u32" type="checkbox"><label for="vcreate_u32"><div>uint32x2_t <b><b>vcreate_u32</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_u64" type="checkbox"><label for="vcreate_u64"><div>uint64x1_t <b><b>vcreate_u64</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_p64" type="checkbox"><label for="vcreate_p64"><div>poly64x1_t <b><b>vcreate_p64</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_f16" type="checkbox"><label for="vcreate_f16"><div>float16x4_t <b><b>vcreate_f16</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_f32" type="checkbox"><label for="vcreate_f32"><div>float32x2_t <b><b>vcreate_f32</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_p8" type="checkbox"><label for="vcreate_p8"><div>poly8x8_t <b><b>vcreate_p8</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_p16" type="checkbox"><label for="vcreate_p16"><div>poly16x4_t <b><b>vcreate_p16</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_f64" type="checkbox"><label for="vcreate_f64"><div>float64x1_t <b><b>vcreate_f64</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_s8" type="checkbox"><label for="vdup_n_s8"><div>int8x8_t <b><b>vdup_n_s8</b></b> (int8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_s8" type="checkbox"><label for="vdupq_n_s8"><div>int8x16_t <b><b>vdupq_n_s8</b></b> (int8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_s16" type="checkbox"><label for="vdup_n_s16"><div>int16x4_t <b><b>vdup_n_s16</b></b> (int16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_s16" type="checkbox"><label for="vdupq_n_s16"><div>int16x8_t <b><b>vdupq_n_s16</b></b> (int16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_s32" type="checkbox"><label for="vdup_n_s32"><div>int32x2_t <b><b>vdup_n_s32</b></b> (int32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_s32" type="checkbox"><label for="vdupq_n_s32"><div>int32x4_t <b><b>vdupq_n_s32</b></b> (int32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_s64" type="checkbox"><label for="vdup_n_s64"><div>int64x1_t <b><b>vdup_n_s64</b></b> (int64_t value)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Dd.D[0],xn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_s64" type="checkbox"><label for="vdupq_n_s64"><div>int64x2_t <b><b>vdupq_n_s64</b></b> (int64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_u8" type="checkbox"><label for="vdup_n_u8"><div>uint8x8_t <b><b>vdup_n_u8</b></b> (uint8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_u8" type="checkbox"><label for="vdupq_n_u8"><div>uint8x16_t <b><b>vdupq_n_u8</b></b> (uint8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_u16" type="checkbox"><label for="vdup_n_u16"><div>uint16x4_t <b><b>vdup_n_u16</b></b> (uint16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_u16" type="checkbox"><label for="vdupq_n_u16"><div>uint16x8_t <b><b>vdupq_n_u16</b></b> (uint16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_u32" type="checkbox"><label for="vdup_n_u32"><div>uint32x2_t <b><b>vdup_n_u32</b></b> (uint32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_u32" type="checkbox"><label for="vdupq_n_u32"><div>uint32x4_t <b><b>vdupq_n_u32</b></b> (uint32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_u64" type="checkbox"><label for="vdup_n_u64"><div>uint64x1_t <b><b>vdup_n_u64</b></b> (uint64_t value)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Dd.D[0],xn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_u64" type="checkbox"><label for="vdupq_n_u64"><div>uint64x2_t <b><b>vdupq_n_u64</b></b> (uint64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_p64" type="checkbox"><label for="vdup_n_p64"><div>poly64x1_t <b><b>vdup_n_p64</b></b> (poly64_t value)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Dd.D[0],xn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_p64" type="checkbox"><label for="vdupq_n_p64"><div>poly64x2_t <b><b>vdupq_n_p64</b></b> (poly64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_f32" type="checkbox"><label for="vdup_n_f32"><div>float32x2_t <b><b>vdup_n_f32</b></b> (float32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_f32" type="checkbox"><label for="vdupq_n_f32"><div>float32x4_t <b><b>vdupq_n_f32</b></b> (float32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_p8" type="checkbox"><label for="vdup_n_p8"><div>poly8x8_t <b><b>vdup_n_p8</b></b> (poly8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_p8" type="checkbox"><label for="vdupq_n_p8"><div>poly8x16_t <b><b>vdupq_n_p8</b></b> (poly8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_p16" type="checkbox"><label for="vdup_n_p16"><div>poly16x4_t <b><b>vdup_n_p16</b></b> (poly16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_p16" type="checkbox"><label for="vdupq_n_p16"><div>poly16x8_t <b><b>vdupq_n_p16</b></b> (poly16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_f64" type="checkbox"><label for="vdup_n_f64"><div>float64x1_t <b><b>vdup_n_f64</b></b> (float64_t value)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Dd.D[0],xn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_f64" type="checkbox"><label for="vdupq_n_f64"><div>float64x2_t <b><b>vdupq_n_f64</b></b> (float64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_s8" type="checkbox"><label for="vmov_n_s8"><div>int8x8_t <b><b>vmov_n_s8</b></b> (int8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_s8" type="checkbox"><label for="vmovq_n_s8"><div>int8x16_t <b><b>vmovq_n_s8</b></b> (int8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_s16" type="checkbox"><label for="vmov_n_s16"><div>int16x4_t <b><b>vmov_n_s16</b></b> (int16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_s16" type="checkbox"><label for="vmovq_n_s16"><div>int16x8_t <b><b>vmovq_n_s16</b></b> (int16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_s32" type="checkbox"><label for="vmov_n_s32"><div>int32x2_t <b><b>vmov_n_s32</b></b> (int32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_s32" type="checkbox"><label for="vmovq_n_s32"><div>int32x4_t <b><b>vmovq_n_s32</b></b> (int32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_s64" type="checkbox"><label for="vmov_n_s64"><div>int64x1_t <b><b>vmov_n_s64</b></b> (int64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_s64" type="checkbox"><label for="vmovq_n_s64"><div>int64x2_t <b><b>vmovq_n_s64</b></b> (int64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_u8" type="checkbox"><label for="vmov_n_u8"><div>uint8x8_t <b><b>vmov_n_u8</b></b> (uint8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_u8" type="checkbox"><label for="vmovq_n_u8"><div>uint8x16_t <b><b>vmovq_n_u8</b></b> (uint8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_u16" type="checkbox"><label for="vmov_n_u16"><div>uint16x4_t <b><b>vmov_n_u16</b></b> (uint16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_u16" type="checkbox"><label for="vmovq_n_u16"><div>uint16x8_t <b><b>vmovq_n_u16</b></b> (uint16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_u32" type="checkbox"><label for="vmov_n_u32"><div>uint32x2_t <b><b>vmov_n_u32</b></b> (uint32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_u32" type="checkbox"><label for="vmovq_n_u32"><div>uint32x4_t <b><b>vmovq_n_u32</b></b> (uint32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_u64" type="checkbox"><label for="vmov_n_u64"><div>uint64x1_t <b><b>vmov_n_u64</b></b> (uint64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_u64" type="checkbox"><label for="vmovq_n_u64"><div>uint64x2_t <b><b>vmovq_n_u64</b></b> (uint64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_f32" type="checkbox"><label for="vmov_n_f32"><div>float32x2_t <b><b>vmov_n_f32</b></b> (float32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_f32" type="checkbox"><label for="vmovq_n_f32"><div>float32x4_t <b><b>vmovq_n_f32</b></b> (float32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_p8" type="checkbox"><label for="vmov_n_p8"><div>poly8x8_t <b><b>vmov_n_p8</b></b> (poly8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_p8" type="checkbox"><label for="vmovq_n_p8"><div>poly8x16_t <b><b>vmovq_n_p8</b></b> (poly8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_p16" type="checkbox"><label for="vmov_n_p16"><div>poly16x4_t <b><b>vmov_n_p16</b></b> (poly16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_p16" type="checkbox"><label for="vmovq_n_p16"><div>poly16x8_t <b><b>vmovq_n_p16</b></b> (poly16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_f64" type="checkbox"><label for="vmov_n_f64"><div>float64x1_t <b><b>vmov_n_f64</b></b> (float64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_f64" type="checkbox"><label for="vmovq_n_f64"><div>float64x2_t <b><b>vmovq_n_f64</b></b> (float64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_s8" type="checkbox"><label for="vdup_lane_s8"><div>int8x8_t <b><b>vdup_lane_s8</b></b> (int8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_s8" type="checkbox"><label for="vdupq_lane_s8"><div>int8x16_t <b><b>vdupq_lane_s8</b></b> (int8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_s16" type="checkbox"><label for="vdup_lane_s16"><div>int16x4_t <b><b>vdup_lane_s16</b></b> (int16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_s16" type="checkbox"><label for="vdupq_lane_s16"><div>int16x8_t <b><b>vdupq_lane_s16</b></b> (int16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_s32" type="checkbox"><label for="vdup_lane_s32"><div>int32x2_t <b><b>vdup_lane_s32</b></b> (int32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_s32" type="checkbox"><label for="vdupq_lane_s32"><div>int32x4_t <b><b>vdupq_lane_s32</b></b> (int32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_s64" type="checkbox"><label for="vdup_lane_s64"><div>int64x1_t <b><b>vdup_lane_s64</b></b> (int64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_s64" type="checkbox"><label for="vdupq_lane_s64"><div>int64x2_t <b><b>vdupq_lane_s64</b></b> (int64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_u8" type="checkbox"><label for="vdup_lane_u8"><div>uint8x8_t <b><b>vdup_lane_u8</b></b> (uint8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_u8" type="checkbox"><label for="vdupq_lane_u8"><div>uint8x16_t <b><b>vdupq_lane_u8</b></b> (uint8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_u16" type="checkbox"><label for="vdup_lane_u16"><div>uint16x4_t <b><b>vdup_lane_u16</b></b> (uint16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_u16" type="checkbox"><label for="vdupq_lane_u16"><div>uint16x8_t <b><b>vdupq_lane_u16</b></b> (uint16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_u32" type="checkbox"><label for="vdup_lane_u32"><div>uint32x2_t <b><b>vdup_lane_u32</b></b> (uint32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_u32" type="checkbox"><label for="vdupq_lane_u32"><div>uint32x4_t <b><b>vdupq_lane_u32</b></b> (uint32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_u64" type="checkbox"><label for="vdup_lane_u64"><div>uint64x1_t <b><b>vdup_lane_u64</b></b> (uint64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_u64" type="checkbox"><label for="vdupq_lane_u64"><div>uint64x2_t <b><b>vdupq_lane_u64</b></b> (uint64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_p64" type="checkbox"><label for="vdup_lane_p64"><div>poly64x1_t <b><b>vdup_lane_p64</b></b> (poly64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_p64" type="checkbox"><label for="vdupq_lane_p64"><div>poly64x2_t <b><b>vdupq_lane_p64</b></b> (poly64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_f32" type="checkbox"><label for="vdup_lane_f32"><div>float32x2_t <b><b>vdup_lane_f32</b></b> (float32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_f32" type="checkbox"><label for="vdupq_lane_f32"><div>float32x4_t <b><b>vdupq_lane_f32</b></b> (float32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_p8" type="checkbox"><label for="vdup_lane_p8"><div>poly8x8_t <b><b>vdup_lane_p8</b></b> (poly8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_p8" type="checkbox"><label for="vdupq_lane_p8"><div>poly8x16_t <b><b>vdupq_lane_p8</b></b> (poly8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_p16" type="checkbox"><label for="vdup_lane_p16"><div>poly16x4_t <b><b>vdup_lane_p16</b></b> (poly16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_p16" type="checkbox"><label for="vdupq_lane_p16"><div>poly16x8_t <b><b>vdupq_lane_p16</b></b> (poly16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_f64" type="checkbox"><label for="vdup_lane_f64"><div>float64x1_t <b><b>vdup_lane_f64</b></b> (float64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_f64" type="checkbox"><label for="vdupq_lane_f64"><div>float64x2_t <b><b>vdupq_lane_f64</b></b> (float64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_s8" type="checkbox"><label for="vdup_laneq_s8"><div>int8x8_t <b><b>vdup_laneq_s8</b></b> (int8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_s8" type="checkbox"><label for="vdupq_laneq_s8"><div>int8x16_t <b><b>vdupq_laneq_s8</b></b> (int8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_s16" type="checkbox"><label for="vdup_laneq_s16"><div>int16x4_t <b><b>vdup_laneq_s16</b></b> (int16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_s16" type="checkbox"><label for="vdupq_laneq_s16"><div>int16x8_t <b><b>vdupq_laneq_s16</b></b> (int16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_s32" type="checkbox"><label for="vdup_laneq_s32"><div>int32x2_t <b><b>vdup_laneq_s32</b></b> (int32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_s32" type="checkbox"><label for="vdupq_laneq_s32"><div>int32x4_t <b><b>vdupq_laneq_s32</b></b> (int32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_s64" type="checkbox"><label for="vdup_laneq_s64"><div>int64x1_t <b><b>vdup_laneq_s64</b></b> (int64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_s64" type="checkbox"><label for="vdupq_laneq_s64"><div>int64x2_t <b><b>vdupq_laneq_s64</b></b> (int64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_u8" type="checkbox"><label for="vdup_laneq_u8"><div>uint8x8_t <b><b>vdup_laneq_u8</b></b> (uint8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_u8" type="checkbox"><label for="vdupq_laneq_u8"><div>uint8x16_t <b><b>vdupq_laneq_u8</b></b> (uint8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_u16" type="checkbox"><label for="vdup_laneq_u16"><div>uint16x4_t <b><b>vdup_laneq_u16</b></b> (uint16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_u16" type="checkbox"><label for="vdupq_laneq_u16"><div>uint16x8_t <b><b>vdupq_laneq_u16</b></b> (uint16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_u32" type="checkbox"><label for="vdup_laneq_u32"><div>uint32x2_t <b><b>vdup_laneq_u32</b></b> (uint32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_u32" type="checkbox"><label for="vdupq_laneq_u32"><div>uint32x4_t <b><b>vdupq_laneq_u32</b></b> (uint32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_u64" type="checkbox"><label for="vdup_laneq_u64"><div>uint64x1_t <b><b>vdup_laneq_u64</b></b> (uint64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_u64" type="checkbox"><label for="vdupq_laneq_u64"><div>uint64x2_t <b><b>vdupq_laneq_u64</b></b> (uint64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_p64" type="checkbox"><label for="vdup_laneq_p64"><div>poly64x1_t <b><b>vdup_laneq_p64</b></b> (poly64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_p64" type="checkbox"><label for="vdupq_laneq_p64"><div>poly64x2_t <b><b>vdupq_laneq_p64</b></b> (poly64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_f32" type="checkbox"><label for="vdup_laneq_f32"><div>float32x2_t <b><b>vdup_laneq_f32</b></b> (float32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_f32" type="checkbox"><label for="vdupq_laneq_f32"><div>float32x4_t <b><b>vdupq_laneq_f32</b></b> (float32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_p8" type="checkbox"><label for="vdup_laneq_p8"><div>poly8x8_t <b><b>vdup_laneq_p8</b></b> (poly8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_p8" type="checkbox"><label for="vdupq_laneq_p8"><div>poly8x16_t <b><b>vdupq_laneq_p8</b></b> (poly8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_p16" type="checkbox"><label for="vdup_laneq_p16"><div>poly16x4_t <b><b>vdup_laneq_p16</b></b> (poly16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_p16" type="checkbox"><label for="vdupq_laneq_p16"><div>poly16x8_t <b><b>vdupq_laneq_p16</b></b> (poly16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_f64" type="checkbox"><label for="vdup_laneq_f64"><div>float64x1_t <b><b>vdup_laneq_f64</b></b> (float64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_f64" type="checkbox"><label for="vdupq_laneq_f64"><div>float64x2_t <b><b>vdupq_laneq_f64</b></b> (float64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_s8" type="checkbox"><label for="vcombine_s8"><div>int8x16_t <b><b>vcombine_s8</b></b> (int8x8_t low, int8x8_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.8B <br />
+high &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_s16" type="checkbox"><label for="vcombine_s16"><div>int16x8_t <b><b>vcombine_s16</b></b> (int16x4_t low, int16x4_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.4H <br />
+high &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_s32" type="checkbox"><label for="vcombine_s32"><div>int32x4_t <b><b>vcombine_s32</b></b> (int32x2_t low, int32x2_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.2S <br />
+high &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_s64" type="checkbox"><label for="vcombine_s64"><div>int64x2_t <b><b>vcombine_s64</b></b> (int64x1_t low, int64x1_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.1D <br />
+high &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_u8" type="checkbox"><label for="vcombine_u8"><div>uint8x16_t <b><b>vcombine_u8</b></b> (uint8x8_t low, uint8x8_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.8B <br />
+high &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_u16" type="checkbox"><label for="vcombine_u16"><div>uint16x8_t <b><b>vcombine_u16</b></b> (uint16x4_t low, uint16x4_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.4H <br />
+high &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_u32" type="checkbox"><label for="vcombine_u32"><div>uint32x4_t <b><b>vcombine_u32</b></b> (uint32x2_t low, uint32x2_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.2S <br />
+high &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_u64" type="checkbox"><label for="vcombine_u64"><div>uint64x2_t <b><b>vcombine_u64</b></b> (uint64x1_t low, uint64x1_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.1D <br />
+high &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_p64" type="checkbox"><label for="vcombine_p64"><div>poly64x2_t <b><b>vcombine_p64</b></b> (poly64x1_t low, poly64x1_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.1D <br />
+high &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_f16" type="checkbox"><label for="vcombine_f16"><div>float16x8_t <b><b>vcombine_f16</b></b> (float16x4_t low, float16x4_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.4H <br />
+high &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_f32" type="checkbox"><label for="vcombine_f32"><div>float32x4_t <b><b>vcombine_f32</b></b> (float32x2_t low, float32x2_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.2S <br />
+high &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_p8" type="checkbox"><label for="vcombine_p8"><div>poly8x16_t <b><b>vcombine_p8</b></b> (poly8x8_t low, poly8x8_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.8B <br />
+high &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_p16" type="checkbox"><label for="vcombine_p16"><div>poly16x8_t <b><b>vcombine_p16</b></b> (poly16x4_t low, poly16x4_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.4H <br />
+high &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_f64" type="checkbox"><label for="vcombine_f64"><div>float64x2_t <b><b>vcombine_f64</b></b> (float64x1_t low, float64x1_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.1D <br />
+high &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_s8" type="checkbox"><label for="vget_high_s8"><div>int8x8_t <b><b>vget_high_s8</b></b> (int8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_s16" type="checkbox"><label for="vget_high_s16"><div>int16x4_t <b><b>vget_high_s16</b></b> (int16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_s32" type="checkbox"><label for="vget_high_s32"><div>int32x2_t <b><b>vget_high_s32</b></b> (int32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_s64" type="checkbox"><label for="vget_high_s64"><div>int64x1_t <b><b>vget_high_s64</b></b> (int64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_u8" type="checkbox"><label for="vget_high_u8"><div>uint8x8_t <b><b>vget_high_u8</b></b> (uint8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_u16" type="checkbox"><label for="vget_high_u16"><div>uint16x4_t <b><b>vget_high_u16</b></b> (uint16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_u32" type="checkbox"><label for="vget_high_u32"><div>uint32x2_t <b><b>vget_high_u32</b></b> (uint32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_u64" type="checkbox"><label for="vget_high_u64"><div>uint64x1_t <b><b>vget_high_u64</b></b> (uint64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_p64" type="checkbox"><label for="vget_high_p64"><div>poly64x1_t <b><b>vget_high_p64</b></b> (poly64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_f16" type="checkbox"><label for="vget_high_f16"><div>float16x4_t <b><b>vget_high_f16</b></b> (float16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_f32" type="checkbox"><label for="vget_high_f32"><div>float32x2_t <b><b>vget_high_f32</b></b> (float32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_p8" type="checkbox"><label for="vget_high_p8"><div>poly8x8_t <b><b>vget_high_p8</b></b> (poly8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_p16" type="checkbox"><label for="vget_high_p16"><div>poly16x4_t <b><b>vget_high_p16</b></b> (poly16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_f64" type="checkbox"><label for="vget_high_f64"><div>float64x1_t <b><b>vget_high_f64</b></b> (float64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_s8" type="checkbox"><label for="vget_low_s8"><div>int8x8_t <b><b>vget_low_s8</b></b> (int8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_s16" type="checkbox"><label for="vget_low_s16"><div>int16x4_t <b><b>vget_low_s16</b></b> (int16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_s32" type="checkbox"><label for="vget_low_s32"><div>int32x2_t <b><b>vget_low_s32</b></b> (int32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_s64" type="checkbox"><label for="vget_low_s64"><div>int64x1_t <b><b>vget_low_s64</b></b> (int64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_u8" type="checkbox"><label for="vget_low_u8"><div>uint8x8_t <b><b>vget_low_u8</b></b> (uint8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_u16" type="checkbox"><label for="vget_low_u16"><div>uint16x4_t <b><b>vget_low_u16</b></b> (uint16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_u32" type="checkbox"><label for="vget_low_u32"><div>uint32x2_t <b><b>vget_low_u32</b></b> (uint32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_u64" type="checkbox"><label for="vget_low_u64"><div>uint64x1_t <b><b>vget_low_u64</b></b> (uint64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_p64" type="checkbox"><label for="vget_low_p64"><div>poly64x1_t <b><b>vget_low_p64</b></b> (poly64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_f16" type="checkbox"><label for="vget_low_f16"><div>float16x4_t <b><b>vget_low_f16</b></b> (float16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_f32" type="checkbox"><label for="vget_low_f32"><div>float32x2_t <b><b>vget_low_f32</b></b> (float32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_p8" type="checkbox"><label for="vget_low_p8"><div>poly8x8_t <b><b>vget_low_p8</b></b> (poly8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_p16" type="checkbox"><label for="vget_low_p16"><div>poly16x4_t <b><b>vget_low_p16</b></b> (poly16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_f64" type="checkbox"><label for="vget_low_f64"><div>float64x1_t <b><b>vget_low_f64</b></b> (float64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_lane_s8" type="checkbox"><label for="vdupb_lane_s8"><div>int8_t <b><b>vdupb_lane_s8</b></b> (int8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_lane_s16" type="checkbox"><label for="vduph_lane_s16"><div>int16_t <b><b>vduph_lane_s16</b></b> (int16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_lane_s32" type="checkbox"><label for="vdups_lane_s32"><div>int32_t <b><b>vdups_lane_s32</b></b> (int32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_lane_s64" type="checkbox"><label for="vdupd_lane_s64"><div>int64_t <b><b>vdupd_lane_s64</b></b> (int64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_lane_u8" type="checkbox"><label for="vdupb_lane_u8"><div>uint8_t <b><b>vdupb_lane_u8</b></b> (uint8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_lane_u16" type="checkbox"><label for="vduph_lane_u16"><div>uint16_t <b><b>vduph_lane_u16</b></b> (uint16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_lane_u32" type="checkbox"><label for="vdups_lane_u32"><div>uint32_t <b><b>vdups_lane_u32</b></b> (uint32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_lane_u64" type="checkbox"><label for="vdupd_lane_u64"><div>uint64_t <b><b>vdupd_lane_u64</b></b> (uint64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_lane_f32" type="checkbox"><label for="vdups_lane_f32"><div>float32_t <b><b>vdups_lane_f32</b></b> (float32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_lane_f64" type="checkbox"><label for="vdupd_lane_f64"><div>float64_t <b><b>vdupd_lane_f64</b></b> (float64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_lane_p8" type="checkbox"><label for="vdupb_lane_p8"><div>poly8_t <b><b>vdupb_lane_p8</b></b> (poly8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_lane_p16" type="checkbox"><label for="vduph_lane_p16"><div>poly16_t <b><b>vduph_lane_p16</b></b> (poly16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_laneq_s8" type="checkbox"><label for="vdupb_laneq_s8"><div>int8_t <b><b>vdupb_laneq_s8</b></b> (int8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_laneq_s16" type="checkbox"><label for="vduph_laneq_s16"><div>int16_t <b><b>vduph_laneq_s16</b></b> (int16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_laneq_s32" type="checkbox"><label for="vdups_laneq_s32"><div>int32_t <b><b>vdups_laneq_s32</b></b> (int32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_laneq_s64" type="checkbox"><label for="vdupd_laneq_s64"><div>int64_t <b><b>vdupd_laneq_s64</b></b> (int64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_laneq_u8" type="checkbox"><label for="vdupb_laneq_u8"><div>uint8_t <b><b>vdupb_laneq_u8</b></b> (uint8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_laneq_u16" type="checkbox"><label for="vduph_laneq_u16"><div>uint16_t <b><b>vduph_laneq_u16</b></b> (uint16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_laneq_u32" type="checkbox"><label for="vdups_laneq_u32"><div>uint32_t <b><b>vdups_laneq_u32</b></b> (uint32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_laneq_u64" type="checkbox"><label for="vdupd_laneq_u64"><div>uint64_t <b><b>vdupd_laneq_u64</b></b> (uint64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_laneq_f32" type="checkbox"><label for="vdups_laneq_f32"><div>float32_t <b><b>vdups_laneq_f32</b></b> (float32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_laneq_f64" type="checkbox"><label for="vdupd_laneq_f64"><div>float64_t <b><b>vdupd_laneq_f64</b></b> (float64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_laneq_p8" type="checkbox"><label for="vdupb_laneq_p8"><div>poly8_t <b><b>vdupb_laneq_p8</b></b> (poly8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_laneq_p16" type="checkbox"><label for="vduph_laneq_p16"><div>poly16_t <b><b>vduph_laneq_p16</b></b> (poly16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s8" type="checkbox"><label for="vld1_s8"><div>int8x8_t <b><b>vld1_s8</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s8" type="checkbox"><label for="vld1q_s8"><div>int8x16_t <b><b>vld1q_s8</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s16" type="checkbox"><label for="vld1_s16"><div>int16x4_t <b><b>vld1_s16</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s16" type="checkbox"><label for="vld1q_s16"><div>int16x8_t <b><b>vld1q_s16</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s32" type="checkbox"><label for="vld1_s32"><div>int32x2_t <b><b>vld1_s32</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s32" type="checkbox"><label for="vld1q_s32"><div>int32x4_t <b><b>vld1q_s32</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s64" type="checkbox"><label for="vld1_s64"><div>int64x1_t <b><b>vld1_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s64" type="checkbox"><label for="vld1q_s64"><div>int64x2_t <b><b>vld1q_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u8" type="checkbox"><label for="vld1_u8"><div>uint8x8_t <b><b>vld1_u8</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u8" type="checkbox"><label for="vld1q_u8"><div>uint8x16_t <b><b>vld1q_u8</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u16" type="checkbox"><label for="vld1_u16"><div>uint16x4_t <b><b>vld1_u16</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u16" type="checkbox"><label for="vld1q_u16"><div>uint16x8_t <b><b>vld1q_u16</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u32" type="checkbox"><label for="vld1_u32"><div>uint32x2_t <b><b>vld1_u32</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u32" type="checkbox"><label for="vld1q_u32"><div>uint32x4_t <b><b>vld1q_u32</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u64" type="checkbox"><label for="vld1_u64"><div>uint64x1_t <b><b>vld1_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u64" type="checkbox"><label for="vld1q_u64"><div>uint64x2_t <b><b>vld1q_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p64" type="checkbox"><label for="vld1_p64"><div>poly64x1_t <b><b>vld1_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p64" type="checkbox"><label for="vld1q_p64"><div>poly64x2_t <b><b>vld1q_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f16" type="checkbox"><label for="vld1_f16"><div>float16x4_t <b><b>vld1_f16</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f16" type="checkbox"><label for="vld1q_f16"><div>float16x8_t <b><b>vld1q_f16</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f32" type="checkbox"><label for="vld1_f32"><div>float32x2_t <b><b>vld1_f32</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f32" type="checkbox"><label for="vld1q_f32"><div>float32x4_t <b><b>vld1q_f32</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p8" type="checkbox"><label for="vld1_p8"><div>poly8x8_t <b><b>vld1_p8</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p8" type="checkbox"><label for="vld1q_p8"><div>poly8x16_t <b><b>vld1q_p8</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p16" type="checkbox"><label for="vld1_p16"><div>poly16x4_t <b><b>vld1_p16</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p16" type="checkbox"><label for="vld1q_p16"><div>poly16x8_t <b><b>vld1q_p16</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f64" type="checkbox"><label for="vld1_f64"><div>float64x1_t <b><b>vld1_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f64" type="checkbox"><label for="vld1q_f64"><div>float64x2_t <b><b>vld1q_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_s8" type="checkbox"><label for="vld1_lane_s8"><div>int8x8_t <b><b>vld1_lane_s8</b></b> (int8_t const * ptr, int8x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_s8" type="checkbox"><label for="vld1q_lane_s8"><div>int8x16_t <b><b>vld1q_lane_s8</b></b> (int8_t const * ptr, int8x16_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_s16" type="checkbox"><label for="vld1_lane_s16"><div>int16x4_t <b><b>vld1_lane_s16</b></b> (int16_t const * ptr, int16x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_s16" type="checkbox"><label for="vld1q_lane_s16"><div>int16x8_t <b><b>vld1q_lane_s16</b></b> (int16_t const * ptr, int16x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_s32" type="checkbox"><label for="vld1_lane_s32"><div>int32x2_t <b><b>vld1_lane_s32</b></b> (int32_t const * ptr, int32x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_s32" type="checkbox"><label for="vld1q_lane_s32"><div>int32x4_t <b><b>vld1q_lane_s32</b></b> (int32_t const * ptr, int32x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_s64" type="checkbox"><label for="vld1_lane_s64"><div>int64x1_t <b><b>vld1_lane_s64</b></b> (int64_t const * ptr, int64x1_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_s64" type="checkbox"><label for="vld1q_lane_s64"><div>int64x2_t <b><b>vld1q_lane_s64</b></b> (int64_t const * ptr, int64x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_u8" type="checkbox"><label for="vld1_lane_u8"><div>uint8x8_t <b><b>vld1_lane_u8</b></b> (uint8_t const * ptr, uint8x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.B}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_u8" type="checkbox"><label for="vld1q_lane_u8"><div>uint8x16_t <b><b>vld1q_lane_u8</b></b> (uint8_t const * ptr, uint8x16_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.B}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_u16" type="checkbox"><label for="vld1_lane_u16"><div>uint16x4_t <b><b>vld1_lane_u16</b></b> (uint16_t const * ptr, uint16x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_u16" type="checkbox"><label for="vld1q_lane_u16"><div>uint16x8_t <b><b>vld1q_lane_u16</b></b> (uint16_t const * ptr, uint16x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_u32" type="checkbox"><label for="vld1_lane_u32"><div>uint32x2_t <b><b>vld1_lane_u32</b></b> (uint32_t const * ptr, uint32x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_u32" type="checkbox"><label for="vld1q_lane_u32"><div>uint32x4_t <b><b>vld1q_lane_u32</b></b> (uint32_t const * ptr, uint32x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_u64" type="checkbox"><label for="vld1_lane_u64"><div>uint64x1_t <b><b>vld1_lane_u64</b></b> (uint64_t const * ptr, uint64x1_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_u64" type="checkbox"><label for="vld1q_lane_u64"><div>uint64x2_t <b><b>vld1q_lane_u64</b></b> (uint64_t const * ptr, uint64x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_p64" type="checkbox"><label for="vld1_lane_p64"><div>poly64x1_t <b><b>vld1_lane_p64</b></b> (poly64_t const * ptr, poly64x1_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_p64" type="checkbox"><label for="vld1q_lane_p64"><div>poly64x2_t <b><b>vld1q_lane_p64</b></b> (poly64_t const * ptr, poly64x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_f16" type="checkbox"><label for="vld1_lane_f16"><div>float16x4_t <b><b>vld1_lane_f16</b></b> (float16_t const * ptr, float16x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_f16" type="checkbox"><label for="vld1q_lane_f16"><div>float16x8_t <b><b>vld1q_lane_f16</b></b> (float16_t const * ptr, float16x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_f32" type="checkbox"><label for="vld1_lane_f32"><div>float32x2_t <b><b>vld1_lane_f32</b></b> (float32_t const * ptr, float32x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_f32" type="checkbox"><label for="vld1q_lane_f32"><div>float32x4_t <b><b>vld1q_lane_f32</b></b> (float32_t const * ptr, float32x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_p8" type="checkbox"><label for="vld1_lane_p8"><div>poly8x8_t <b><b>vld1_lane_p8</b></b> (poly8_t const * ptr, poly8x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.B}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_p8" type="checkbox"><label for="vld1q_lane_p8"><div>poly8x16_t <b><b>vld1q_lane_p8</b></b> (poly8_t const * ptr, poly8x16_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.B}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_p16" type="checkbox"><label for="vld1_lane_p16"><div>poly16x4_t <b><b>vld1_lane_p16</b></b> (poly16_t const * ptr, poly16x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_p16" type="checkbox"><label for="vld1q_lane_p16"><div>poly16x8_t <b><b>vld1q_lane_p16</b></b> (poly16_t const * ptr, poly16x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_f64" type="checkbox"><label for="vld1_lane_f64"><div>float64x1_t <b><b>vld1_lane_f64</b></b> (float64_t const * ptr, float64x1_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_f64" type="checkbox"><label for="vld1q_lane_f64"><div>float64x2_t <b><b>vld1q_lane_f64</b></b> (float64_t const * ptr, float64x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_s8" type="checkbox"><label for="vld1_dup_s8"><div>int8x8_t <b><b>vld1_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_s8" type="checkbox"><label for="vld1q_dup_s8"><div>int8x16_t <b><b>vld1q_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_s16" type="checkbox"><label for="vld1_dup_s16"><div>int16x4_t <b><b>vld1_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_s16" type="checkbox"><label for="vld1q_dup_s16"><div>int16x8_t <b><b>vld1q_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_s32" type="checkbox"><label for="vld1_dup_s32"><div>int32x2_t <b><b>vld1_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_s32" type="checkbox"><label for="vld1q_dup_s32"><div>int32x4_t <b><b>vld1q_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_s64" type="checkbox"><label for="vld1_dup_s64"><div>int64x1_t <b><b>vld1_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_s64" type="checkbox"><label for="vld1q_dup_s64"><div>int64x2_t <b><b>vld1q_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_u8" type="checkbox"><label for="vld1_dup_u8"><div>uint8x8_t <b><b>vld1_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_u8" type="checkbox"><label for="vld1q_dup_u8"><div>uint8x16_t <b><b>vld1q_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_u16" type="checkbox"><label for="vld1_dup_u16"><div>uint16x4_t <b><b>vld1_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_u16" type="checkbox"><label for="vld1q_dup_u16"><div>uint16x8_t <b><b>vld1q_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_u32" type="checkbox"><label for="vld1_dup_u32"><div>uint32x2_t <b><b>vld1_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_u32" type="checkbox"><label for="vld1q_dup_u32"><div>uint32x4_t <b><b>vld1q_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_u64" type="checkbox"><label for="vld1_dup_u64"><div>uint64x1_t <b><b>vld1_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_u64" type="checkbox"><label for="vld1q_dup_u64"><div>uint64x2_t <b><b>vld1q_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_p64" type="checkbox"><label for="vld1_dup_p64"><div>poly64x1_t <b><b>vld1_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_p64" type="checkbox"><label for="vld1q_dup_p64"><div>poly64x2_t <b><b>vld1q_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_f16" type="checkbox"><label for="vld1_dup_f16"><div>float16x4_t <b><b>vld1_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_f16" type="checkbox"><label for="vld1q_dup_f16"><div>float16x8_t <b><b>vld1q_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_f32" type="checkbox"><label for="vld1_dup_f32"><div>float32x2_t <b><b>vld1_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_f32" type="checkbox"><label for="vld1q_dup_f32"><div>float32x4_t <b><b>vld1q_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_p8" type="checkbox"><label for="vld1_dup_p8"><div>poly8x8_t <b><b>vld1_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_p8" type="checkbox"><label for="vld1q_dup_p8"><div>poly8x16_t <b><b>vld1q_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_p16" type="checkbox"><label for="vld1_dup_p16"><div>poly16x4_t <b><b>vld1_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_p16" type="checkbox"><label for="vld1q_dup_p16"><div>poly16x8_t <b><b>vld1q_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_f64" type="checkbox"><label for="vld1_dup_f64"><div>float64x1_t <b><b>vld1_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_f64" type="checkbox"><label for="vld1q_dup_f64"><div>float64x2_t <b><b>vld1q_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s8" type="checkbox"><label for="vst1_s8"><div>void <b><b>vst1_s8</b></b> (int8_t * ptr, int8x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s8" type="checkbox"><label for="vst1q_s8"><div>void <b><b>vst1q_s8</b></b> (int8_t * ptr, int8x16_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s16" type="checkbox"><label for="vst1_s16"><div>void <b><b>vst1_s16</b></b> (int16_t * ptr, int16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s16" type="checkbox"><label for="vst1q_s16"><div>void <b><b>vst1q_s16</b></b> (int16_t * ptr, int16x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s32" type="checkbox"><label for="vst1_s32"><div>void <b><b>vst1_s32</b></b> (int32_t * ptr, int32x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s32" type="checkbox"><label for="vst1q_s32"><div>void <b><b>vst1q_s32</b></b> (int32_t * ptr, int32x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s64" type="checkbox"><label for="vst1_s64"><div>void <b><b>vst1_s64</b></b> (int64_t * ptr, int64x1_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s64" type="checkbox"><label for="vst1q_s64"><div>void <b><b>vst1q_s64</b></b> (int64_t * ptr, int64x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u8" type="checkbox"><label for="vst1_u8"><div>void <b><b>vst1_u8</b></b> (uint8_t * ptr, uint8x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u8" type="checkbox"><label for="vst1q_u8"><div>void <b><b>vst1q_u8</b></b> (uint8_t * ptr, uint8x16_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u16" type="checkbox"><label for="vst1_u16"><div>void <b><b>vst1_u16</b></b> (uint16_t * ptr, uint16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u16" type="checkbox"><label for="vst1q_u16"><div>void <b><b>vst1q_u16</b></b> (uint16_t * ptr, uint16x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u32" type="checkbox"><label for="vst1_u32"><div>void <b><b>vst1_u32</b></b> (uint32_t * ptr, uint32x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u32" type="checkbox"><label for="vst1q_u32"><div>void <b><b>vst1q_u32</b></b> (uint32_t * ptr, uint32x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u64" type="checkbox"><label for="vst1_u64"><div>void <b><b>vst1_u64</b></b> (uint64_t * ptr, uint64x1_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u64" type="checkbox"><label for="vst1q_u64"><div>void <b><b>vst1q_u64</b></b> (uint64_t * ptr, uint64x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p64" type="checkbox"><label for="vst1_p64"><div>void <b><b>vst1_p64</b></b> (poly64_t * ptr, poly64x1_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p64" type="checkbox"><label for="vst1q_p64"><div>void <b><b>vst1q_p64</b></b> (poly64_t * ptr, poly64x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f16" type="checkbox"><label for="vst1_f16"><div>void <b><b>vst1_f16</b></b> (float16_t * ptr, float16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f16" type="checkbox"><label for="vst1q_f16"><div>void <b><b>vst1q_f16</b></b> (float16_t * ptr, float16x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f32" type="checkbox"><label for="vst1_f32"><div>void <b><b>vst1_f32</b></b> (float32_t * ptr, float32x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f32" type="checkbox"><label for="vst1q_f32"><div>void <b><b>vst1q_f32</b></b> (float32_t * ptr, float32x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p8" type="checkbox"><label for="vst1_p8"><div>void <b><b>vst1_p8</b></b> (poly8_t * ptr, poly8x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p8" type="checkbox"><label for="vst1q_p8"><div>void <b><b>vst1q_p8</b></b> (poly8_t * ptr, poly8x16_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p16" type="checkbox"><label for="vst1_p16"><div>void <b><b>vst1_p16</b></b> (poly16_t * ptr, poly16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p16" type="checkbox"><label for="vst1q_p16"><div>void <b><b>vst1q_p16</b></b> (poly16_t * ptr, poly16x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f64" type="checkbox"><label for="vst1_f64"><div>void <b><b>vst1_f64</b></b> (float64_t * ptr, float64x1_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f64" type="checkbox"><label for="vst1q_f64"><div>void <b><b>vst1q_f64</b></b> (float64_t * ptr, float64x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_s8" type="checkbox"><label for="vst1_lane_s8"><div>void <b><b>vst1_lane_s8</b></b> (int8_t * ptr, int8x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_s8" type="checkbox"><label for="vst1q_lane_s8"><div>void <b><b>vst1q_lane_s8</b></b> (int8_t * ptr, int8x16_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_s16" type="checkbox"><label for="vst1_lane_s16"><div>void <b><b>vst1_lane_s16</b></b> (int16_t * ptr, int16x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_s16" type="checkbox"><label for="vst1q_lane_s16"><div>void <b><b>vst1q_lane_s16</b></b> (int16_t * ptr, int16x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_s32" type="checkbox"><label for="vst1_lane_s32"><div>void <b><b>vst1_lane_s32</b></b> (int32_t * ptr, int32x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_s32" type="checkbox"><label for="vst1q_lane_s32"><div>void <b><b>vst1q_lane_s32</b></b> (int32_t * ptr, int32x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_s64" type="checkbox"><label for="vst1_lane_s64"><div>void <b><b>vst1_lane_s64</b></b> (int64_t * ptr, int64x1_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_s64" type="checkbox"><label for="vst1q_lane_s64"><div>void <b><b>vst1q_lane_s64</b></b> (int64_t * ptr, int64x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_u8" type="checkbox"><label for="vst1_lane_u8"><div>void <b><b>vst1_lane_u8</b></b> (uint8_t * ptr, uint8x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_u8" type="checkbox"><label for="vst1q_lane_u8"><div>void <b><b>vst1q_lane_u8</b></b> (uint8_t * ptr, uint8x16_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_u16" type="checkbox"><label for="vst1_lane_u16"><div>void <b><b>vst1_lane_u16</b></b> (uint16_t * ptr, uint16x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_u16" type="checkbox"><label for="vst1q_lane_u16"><div>void <b><b>vst1q_lane_u16</b></b> (uint16_t * ptr, uint16x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_u32" type="checkbox"><label for="vst1_lane_u32"><div>void <b><b>vst1_lane_u32</b></b> (uint32_t * ptr, uint32x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_u32" type="checkbox"><label for="vst1q_lane_u32"><div>void <b><b>vst1q_lane_u32</b></b> (uint32_t * ptr, uint32x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_u64" type="checkbox"><label for="vst1_lane_u64"><div>void <b><b>vst1_lane_u64</b></b> (uint64_t * ptr, uint64x1_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_u64" type="checkbox"><label for="vst1q_lane_u64"><div>void <b><b>vst1q_lane_u64</b></b> (uint64_t * ptr, uint64x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_p64" type="checkbox"><label for="vst1_lane_p64"><div>void <b><b>vst1_lane_p64</b></b> (poly64_t * ptr, poly64x1_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_p64" type="checkbox"><label for="vst1q_lane_p64"><div>void <b><b>vst1q_lane_p64</b></b> (poly64_t * ptr, poly64x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_f16" type="checkbox"><label for="vst1_lane_f16"><div>void <b><b>vst1_lane_f16</b></b> (float16_t * ptr, float16x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_f16" type="checkbox"><label for="vst1q_lane_f16"><div>void <b><b>vst1q_lane_f16</b></b> (float16_t * ptr, float16x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_f32" type="checkbox"><label for="vst1_lane_f32"><div>void <b><b>vst1_lane_f32</b></b> (float32_t * ptr, float32x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_f32" type="checkbox"><label for="vst1q_lane_f32"><div>void <b><b>vst1q_lane_f32</b></b> (float32_t * ptr, float32x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_p8" type="checkbox"><label for="vst1_lane_p8"><div>void <b><b>vst1_lane_p8</b></b> (poly8_t * ptr, poly8x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_p8" type="checkbox"><label for="vst1q_lane_p8"><div>void <b><b>vst1q_lane_p8</b></b> (poly8_t * ptr, poly8x16_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_p16" type="checkbox"><label for="vst1_lane_p16"><div>void <b><b>vst1_lane_p16</b></b> (poly16_t * ptr, poly16x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_p16" type="checkbox"><label for="vst1q_lane_p16"><div>void <b><b>vst1q_lane_p16</b></b> (poly16_t * ptr, poly16x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_f64" type="checkbox"><label for="vst1_lane_f64"><div>void <b><b>vst1_lane_f64</b></b> (float64_t * ptr, float64x1_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_f64" type="checkbox"><label for="vst1q_lane_f64"><div>void <b><b>vst1q_lane_f64</b></b> (float64_t * ptr, float64x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_s8" type="checkbox"><label for="vld2_s8"><div>int8x8x2_t <b><b>vld2_s8</b></b> (int8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_s8" type="checkbox"><label for="vld2q_s8"><div>int8x16x2_t <b><b>vld2q_s8</b></b> (int8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_s16" type="checkbox"><label for="vld2_s16"><div>int16x4x2_t <b><b>vld2_s16</b></b> (int16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_s16" type="checkbox"><label for="vld2q_s16"><div>int16x8x2_t <b><b>vld2q_s16</b></b> (int16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_s32" type="checkbox"><label for="vld2_s32"><div>int32x2x2_t <b><b>vld2_s32</b></b> (int32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_s32" type="checkbox"><label for="vld2q_s32"><div>int32x4x2_t <b><b>vld2q_s32</b></b> (int32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_u8" type="checkbox"><label for="vld2_u8"><div>uint8x8x2_t <b><b>vld2_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_u8" type="checkbox"><label for="vld2q_u8"><div>uint8x16x2_t <b><b>vld2q_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_u16" type="checkbox"><label for="vld2_u16"><div>uint16x4x2_t <b><b>vld2_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_u16" type="checkbox"><label for="vld2q_u16"><div>uint16x8x2_t <b><b>vld2q_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_u32" type="checkbox"><label for="vld2_u32"><div>uint32x2x2_t <b><b>vld2_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_u32" type="checkbox"><label for="vld2q_u32"><div>uint32x4x2_t <b><b>vld2q_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_f16" type="checkbox"><label for="vld2_f16"><div>float16x4x2_t <b><b>vld2_f16</b></b> (float16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_f16" type="checkbox"><label for="vld2q_f16"><div>float16x8x2_t <b><b>vld2q_f16</b></b> (float16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_f32" type="checkbox"><label for="vld2_f32"><div>float32x2x2_t <b><b>vld2_f32</b></b> (float32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_f32" type="checkbox"><label for="vld2q_f32"><div>float32x4x2_t <b><b>vld2q_f32</b></b> (float32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_p8" type="checkbox"><label for="vld2_p8"><div>poly8x8x2_t <b><b>vld2_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_p8" type="checkbox"><label for="vld2q_p8"><div>poly8x16x2_t <b><b>vld2q_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_p16" type="checkbox"><label for="vld2_p16"><div>poly16x4x2_t <b><b>vld2_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_p16" type="checkbox"><label for="vld2q_p16"><div>poly16x8x2_t <b><b>vld2q_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_s64" type="checkbox"><label for="vld2_s64"><div>int64x1x2_t <b><b>vld2_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_u64" type="checkbox"><label for="vld2_u64"><div>uint64x1x2_t <b><b>vld2_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_p64" type="checkbox"><label for="vld2_p64"><div>poly64x1x2_t <b><b>vld2_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_s64" type="checkbox"><label for="vld2q_s64"><div>int64x2x2_t <b><b>vld2q_s64</b></b> (int64_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_u64" type="checkbox"><label for="vld2q_u64"><div>uint64x2x2_t <b><b>vld2q_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_p64" type="checkbox"><label for="vld2q_p64"><div>poly64x2x2_t <b><b>vld2q_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_f64" type="checkbox"><label for="vld2_f64"><div>float64x1x2_t <b><b>vld2_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_f64" type="checkbox"><label for="vld2q_f64"><div>float64x2x2_t <b><b>vld2q_f64</b></b> (float64_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_s8" type="checkbox"><label for="vld3_s8"><div>int8x8x3_t <b><b>vld3_s8</b></b> (int8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_s8" type="checkbox"><label for="vld3q_s8"><div>int8x16x3_t <b><b>vld3q_s8</b></b> (int8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_s16" type="checkbox"><label for="vld3_s16"><div>int16x4x3_t <b><b>vld3_s16</b></b> (int16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_s16" type="checkbox"><label for="vld3q_s16"><div>int16x8x3_t <b><b>vld3q_s16</b></b> (int16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_s32" type="checkbox"><label for="vld3_s32"><div>int32x2x3_t <b><b>vld3_s32</b></b> (int32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_s32" type="checkbox"><label for="vld3q_s32"><div>int32x4x3_t <b><b>vld3q_s32</b></b> (int32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_u8" type="checkbox"><label for="vld3_u8"><div>uint8x8x3_t <b><b>vld3_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_u8" type="checkbox"><label for="vld3q_u8"><div>uint8x16x3_t <b><b>vld3q_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_u16" type="checkbox"><label for="vld3_u16"><div>uint16x4x3_t <b><b>vld3_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_u16" type="checkbox"><label for="vld3q_u16"><div>uint16x8x3_t <b><b>vld3q_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_u32" type="checkbox"><label for="vld3_u32"><div>uint32x2x3_t <b><b>vld3_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_u32" type="checkbox"><label for="vld3q_u32"><div>uint32x4x3_t <b><b>vld3q_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_f16" type="checkbox"><label for="vld3_f16"><div>float16x4x3_t <b><b>vld3_f16</b></b> (float16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_f16" type="checkbox"><label for="vld3q_f16"><div>float16x8x3_t <b><b>vld3q_f16</b></b> (float16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_f32" type="checkbox"><label for="vld3_f32"><div>float32x2x3_t <b><b>vld3_f32</b></b> (float32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_f32" type="checkbox"><label for="vld3q_f32"><div>float32x4x3_t <b><b>vld3q_f32</b></b> (float32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_p8" type="checkbox"><label for="vld3_p8"><div>poly8x8x3_t <b><b>vld3_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_p8" type="checkbox"><label for="vld3q_p8"><div>poly8x16x3_t <b><b>vld3q_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_p16" type="checkbox"><label for="vld3_p16"><div>poly16x4x3_t <b><b>vld3_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_p16" type="checkbox"><label for="vld3q_p16"><div>poly16x8x3_t <b><b>vld3q_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_s64" type="checkbox"><label for="vld3_s64"><div>int64x1x3_t <b><b>vld3_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_u64" type="checkbox"><label for="vld3_u64"><div>uint64x1x3_t <b><b>vld3_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_p64" type="checkbox"><label for="vld3_p64"><div>poly64x1x3_t <b><b>vld3_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_s64" type="checkbox"><label for="vld3q_s64"><div>int64x2x3_t <b><b>vld3q_s64</b></b> (int64_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_u64" type="checkbox"><label for="vld3q_u64"><div>uint64x2x3_t <b><b>vld3q_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_p64" type="checkbox"><label for="vld3q_p64"><div>poly64x2x3_t <b><b>vld3q_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_f64" type="checkbox"><label for="vld3_f64"><div>float64x1x3_t <b><b>vld3_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_f64" type="checkbox"><label for="vld3q_f64"><div>float64x2x3_t <b><b>vld3q_f64</b></b> (float64_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_s8" type="checkbox"><label for="vld4_s8"><div>int8x8x4_t <b><b>vld4_s8</b></b> (int8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_s8" type="checkbox"><label for="vld4q_s8"><div>int8x16x4_t <b><b>vld4q_s8</b></b> (int8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_s16" type="checkbox"><label for="vld4_s16"><div>int16x4x4_t <b><b>vld4_s16</b></b> (int16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_s16" type="checkbox"><label for="vld4q_s16"><div>int16x8x4_t <b><b>vld4q_s16</b></b> (int16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_s32" type="checkbox"><label for="vld4_s32"><div>int32x2x4_t <b><b>vld4_s32</b></b> (int32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_s32" type="checkbox"><label for="vld4q_s32"><div>int32x4x4_t <b><b>vld4q_s32</b></b> (int32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_u8" type="checkbox"><label for="vld4_u8"><div>uint8x8x4_t <b><b>vld4_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_u8" type="checkbox"><label for="vld4q_u8"><div>uint8x16x4_t <b><b>vld4q_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_u16" type="checkbox"><label for="vld4_u16"><div>uint16x4x4_t <b><b>vld4_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_u16" type="checkbox"><label for="vld4q_u16"><div>uint16x8x4_t <b><b>vld4q_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_u32" type="checkbox"><label for="vld4_u32"><div>uint32x2x4_t <b><b>vld4_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_u32" type="checkbox"><label for="vld4q_u32"><div>uint32x4x4_t <b><b>vld4q_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_f16" type="checkbox"><label for="vld4_f16"><div>float16x4x4_t <b><b>vld4_f16</b></b> (float16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_f16" type="checkbox"><label for="vld4q_f16"><div>float16x8x4_t <b><b>vld4q_f16</b></b> (float16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_f32" type="checkbox"><label for="vld4_f32"><div>float32x2x4_t <b><b>vld4_f32</b></b> (float32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_f32" type="checkbox"><label for="vld4q_f32"><div>float32x4x4_t <b><b>vld4q_f32</b></b> (float32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_p8" type="checkbox"><label for="vld4_p8"><div>poly8x8x4_t <b><b>vld4_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_p8" type="checkbox"><label for="vld4q_p8"><div>poly8x16x4_t <b><b>vld4q_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_p16" type="checkbox"><label for="vld4_p16"><div>poly16x4x4_t <b><b>vld4_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_p16" type="checkbox"><label for="vld4q_p16"><div>poly16x8x4_t <b><b>vld4q_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_s64" type="checkbox"><label for="vld4_s64"><div>int64x1x4_t <b><b>vld4_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_u64" type="checkbox"><label for="vld4_u64"><div>uint64x1x4_t <b><b>vld4_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_p64" type="checkbox"><label for="vld4_p64"><div>poly64x1x4_t <b><b>vld4_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_s64" type="checkbox"><label for="vld4q_s64"><div>int64x2x4_t <b><b>vld4q_s64</b></b> (int64_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_u64" type="checkbox"><label for="vld4q_u64"><div>uint64x2x4_t <b><b>vld4q_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_p64" type="checkbox"><label for="vld4q_p64"><div>poly64x2x4_t <b><b>vld4q_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_f64" type="checkbox"><label for="vld4_f64"><div>float64x1x4_t <b><b>vld4_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_f64" type="checkbox"><label for="vld4q_f64"><div>float64x2x4_t <b><b>vld4q_f64</b></b> (float64_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_s8" type="checkbox"><label for="vld2_dup_s8"><div>int8x8x2_t <b><b>vld2_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_s8" type="checkbox"><label for="vld2q_dup_s8"><div>int8x16x2_t <b><b>vld2q_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_s16" type="checkbox"><label for="vld2_dup_s16"><div>int16x4x2_t <b><b>vld2_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_s16" type="checkbox"><label for="vld2q_dup_s16"><div>int16x8x2_t <b><b>vld2q_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_s32" type="checkbox"><label for="vld2_dup_s32"><div>int32x2x2_t <b><b>vld2_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_s32" type="checkbox"><label for="vld2q_dup_s32"><div>int32x4x2_t <b><b>vld2q_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_u8" type="checkbox"><label for="vld2_dup_u8"><div>uint8x8x2_t <b><b>vld2_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_u8" type="checkbox"><label for="vld2q_dup_u8"><div>uint8x16x2_t <b><b>vld2q_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_u16" type="checkbox"><label for="vld2_dup_u16"><div>uint16x4x2_t <b><b>vld2_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_u16" type="checkbox"><label for="vld2q_dup_u16"><div>uint16x8x2_t <b><b>vld2q_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_u32" type="checkbox"><label for="vld2_dup_u32"><div>uint32x2x2_t <b><b>vld2_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_u32" type="checkbox"><label for="vld2q_dup_u32"><div>uint32x4x2_t <b><b>vld2q_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_f16" type="checkbox"><label for="vld2_dup_f16"><div>float16x4x2_t <b><b>vld2_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_f16" type="checkbox"><label for="vld2q_dup_f16"><div>float16x8x2_t <b><b>vld2q_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_f32" type="checkbox"><label for="vld2_dup_f32"><div>float32x2x2_t <b><b>vld2_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_f32" type="checkbox"><label for="vld2q_dup_f32"><div>float32x4x2_t <b><b>vld2q_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_p8" type="checkbox"><label for="vld2_dup_p8"><div>poly8x8x2_t <b><b>vld2_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_p8" type="checkbox"><label for="vld2q_dup_p8"><div>poly8x16x2_t <b><b>vld2q_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_p16" type="checkbox"><label for="vld2_dup_p16"><div>poly16x4x2_t <b><b>vld2_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_p16" type="checkbox"><label for="vld2q_dup_p16"><div>poly16x8x2_t <b><b>vld2q_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_s64" type="checkbox"><label for="vld2_dup_s64"><div>int64x1x2_t <b><b>vld2_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_u64" type="checkbox"><label for="vld2_dup_u64"><div>uint64x1x2_t <b><b>vld2_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_p64" type="checkbox"><label for="vld2_dup_p64"><div>poly64x1x2_t <b><b>vld2_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_s64" type="checkbox"><label for="vld2q_dup_s64"><div>int64x2x2_t <b><b>vld2q_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_u64" type="checkbox"><label for="vld2q_dup_u64"><div>uint64x2x2_t <b><b>vld2q_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_p64" type="checkbox"><label for="vld2q_dup_p64"><div>poly64x2x2_t <b><b>vld2q_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_f64" type="checkbox"><label for="vld2_dup_f64"><div>float64x1x2_t <b><b>vld2_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_f64" type="checkbox"><label for="vld2q_dup_f64"><div>float64x2x2_t <b><b>vld2q_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_s8" type="checkbox"><label for="vld3_dup_s8"><div>int8x8x3_t <b><b>vld3_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_s8" type="checkbox"><label for="vld3q_dup_s8"><div>int8x16x3_t <b><b>vld3q_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_s16" type="checkbox"><label for="vld3_dup_s16"><div>int16x4x3_t <b><b>vld3_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_s16" type="checkbox"><label for="vld3q_dup_s16"><div>int16x8x3_t <b><b>vld3q_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_s32" type="checkbox"><label for="vld3_dup_s32"><div>int32x2x3_t <b><b>vld3_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_s32" type="checkbox"><label for="vld3q_dup_s32"><div>int32x4x3_t <b><b>vld3q_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_u8" type="checkbox"><label for="vld3_dup_u8"><div>uint8x8x3_t <b><b>vld3_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_u8" type="checkbox"><label for="vld3q_dup_u8"><div>uint8x16x3_t <b><b>vld3q_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_u16" type="checkbox"><label for="vld3_dup_u16"><div>uint16x4x3_t <b><b>vld3_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_u16" type="checkbox"><label for="vld3q_dup_u16"><div>uint16x8x3_t <b><b>vld3q_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_u32" type="checkbox"><label for="vld3_dup_u32"><div>uint32x2x3_t <b><b>vld3_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_u32" type="checkbox"><label for="vld3q_dup_u32"><div>uint32x4x3_t <b><b>vld3q_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_f16" type="checkbox"><label for="vld3_dup_f16"><div>float16x4x3_t <b><b>vld3_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_f16" type="checkbox"><label for="vld3q_dup_f16"><div>float16x8x3_t <b><b>vld3q_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_f32" type="checkbox"><label for="vld3_dup_f32"><div>float32x2x3_t <b><b>vld3_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_f32" type="checkbox"><label for="vld3q_dup_f32"><div>float32x4x3_t <b><b>vld3q_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_p8" type="checkbox"><label for="vld3_dup_p8"><div>poly8x8x3_t <b><b>vld3_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_p8" type="checkbox"><label for="vld3q_dup_p8"><div>poly8x16x3_t <b><b>vld3q_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_p16" type="checkbox"><label for="vld3_dup_p16"><div>poly16x4x3_t <b><b>vld3_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_p16" type="checkbox"><label for="vld3q_dup_p16"><div>poly16x8x3_t <b><b>vld3q_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_s64" type="checkbox"><label for="vld3_dup_s64"><div>int64x1x3_t <b><b>vld3_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_u64" type="checkbox"><label for="vld3_dup_u64"><div>uint64x1x3_t <b><b>vld3_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_p64" type="checkbox"><label for="vld3_dup_p64"><div>poly64x1x3_t <b><b>vld3_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_s64" type="checkbox"><label for="vld3q_dup_s64"><div>int64x2x3_t <b><b>vld3q_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_u64" type="checkbox"><label for="vld3q_dup_u64"><div>uint64x2x3_t <b><b>vld3q_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_p64" type="checkbox"><label for="vld3q_dup_p64"><div>poly64x2x3_t <b><b>vld3q_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_f64" type="checkbox"><label for="vld3_dup_f64"><div>float64x1x3_t <b><b>vld3_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_f64" type="checkbox"><label for="vld3q_dup_f64"><div>float64x2x3_t <b><b>vld3q_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_s8" type="checkbox"><label for="vld4_dup_s8"><div>int8x8x4_t <b><b>vld4_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_s8" type="checkbox"><label for="vld4q_dup_s8"><div>int8x16x4_t <b><b>vld4q_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_s16" type="checkbox"><label for="vld4_dup_s16"><div>int16x4x4_t <b><b>vld4_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_s16" type="checkbox"><label for="vld4q_dup_s16"><div>int16x8x4_t <b><b>vld4q_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_s32" type="checkbox"><label for="vld4_dup_s32"><div>int32x2x4_t <b><b>vld4_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_s32" type="checkbox"><label for="vld4q_dup_s32"><div>int32x4x4_t <b><b>vld4q_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_u8" type="checkbox"><label for="vld4_dup_u8"><div>uint8x8x4_t <b><b>vld4_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_u8" type="checkbox"><label for="vld4q_dup_u8"><div>uint8x16x4_t <b><b>vld4q_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_u16" type="checkbox"><label for="vld4_dup_u16"><div>uint16x4x4_t <b><b>vld4_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_u16" type="checkbox"><label for="vld4q_dup_u16"><div>uint16x8x4_t <b><b>vld4q_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_u32" type="checkbox"><label for="vld4_dup_u32"><div>uint32x2x4_t <b><b>vld4_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_u32" type="checkbox"><label for="vld4q_dup_u32"><div>uint32x4x4_t <b><b>vld4q_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_f16" type="checkbox"><label for="vld4_dup_f16"><div>float16x4x4_t <b><b>vld4_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_f16" type="checkbox"><label for="vld4q_dup_f16"><div>float16x8x4_t <b><b>vld4q_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_f32" type="checkbox"><label for="vld4_dup_f32"><div>float32x2x4_t <b><b>vld4_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_f32" type="checkbox"><label for="vld4q_dup_f32"><div>float32x4x4_t <b><b>vld4q_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_p8" type="checkbox"><label for="vld4_dup_p8"><div>poly8x8x4_t <b><b>vld4_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_p8" type="checkbox"><label for="vld4q_dup_p8"><div>poly8x16x4_t <b><b>vld4q_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_p16" type="checkbox"><label for="vld4_dup_p16"><div>poly16x4x4_t <b><b>vld4_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_p16" type="checkbox"><label for="vld4q_dup_p16"><div>poly16x8x4_t <b><b>vld4q_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_s64" type="checkbox"><label for="vld4_dup_s64"><div>int64x1x4_t <b><b>vld4_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_u64" type="checkbox"><label for="vld4_dup_u64"><div>uint64x1x4_t <b><b>vld4_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_p64" type="checkbox"><label for="vld4_dup_p64"><div>poly64x1x4_t <b><b>vld4_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_s64" type="checkbox"><label for="vld4q_dup_s64"><div>int64x2x4_t <b><b>vld4q_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_u64" type="checkbox"><label for="vld4q_dup_u64"><div>uint64x2x4_t <b><b>vld4q_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_p64" type="checkbox"><label for="vld4q_dup_p64"><div>poly64x2x4_t <b><b>vld4q_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_f64" type="checkbox"><label for="vld4_dup_f64"><div>float64x1x4_t <b><b>vld4_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_f64" type="checkbox"><label for="vld4q_dup_f64"><div>float64x2x4_t <b><b>vld4q_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_s8" type="checkbox"><label for="vst2_s8"><div>void <b><b>vst2_s8</b></b> (int8_t * ptr, int8x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_s8" type="checkbox"><label for="vst2q_s8"><div>void <b><b>vst2q_s8</b></b> (int8_t * ptr, int8x16x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_s16" type="checkbox"><label for="vst2_s16"><div>void <b><b>vst2_s16</b></b> (int16_t * ptr, int16x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_s16" type="checkbox"><label for="vst2q_s16"><div>void <b><b>vst2q_s16</b></b> (int16_t * ptr, int16x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_s32" type="checkbox"><label for="vst2_s32"><div>void <b><b>vst2_s32</b></b> (int32_t * ptr, int32x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_s32" type="checkbox"><label for="vst2q_s32"><div>void <b><b>vst2q_s32</b></b> (int32_t * ptr, int32x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_u8" type="checkbox"><label for="vst2_u8"><div>void <b><b>vst2_u8</b></b> (uint8_t * ptr, uint8x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_u8" type="checkbox"><label for="vst2q_u8"><div>void <b><b>vst2q_u8</b></b> (uint8_t * ptr, uint8x16x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_u16" type="checkbox"><label for="vst2_u16"><div>void <b><b>vst2_u16</b></b> (uint16_t * ptr, uint16x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_u16" type="checkbox"><label for="vst2q_u16"><div>void <b><b>vst2q_u16</b></b> (uint16_t * ptr, uint16x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_u32" type="checkbox"><label for="vst2_u32"><div>void <b><b>vst2_u32</b></b> (uint32_t * ptr, uint32x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_u32" type="checkbox"><label for="vst2q_u32"><div>void <b><b>vst2q_u32</b></b> (uint32_t * ptr, uint32x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_f16" type="checkbox"><label for="vst2_f16"><div>void <b><b>vst2_f16</b></b> (float16_t * ptr, float16x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_f16" type="checkbox"><label for="vst2q_f16"><div>void <b><b>vst2q_f16</b></b> (float16_t * ptr, float16x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_f32" type="checkbox"><label for="vst2_f32"><div>void <b><b>vst2_f32</b></b> (float32_t * ptr, float32x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_f32" type="checkbox"><label for="vst2q_f32"><div>void <b><b>vst2q_f32</b></b> (float32_t * ptr, float32x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_p8" type="checkbox"><label for="vst2_p8"><div>void <b><b>vst2_p8</b></b> (poly8_t * ptr, poly8x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_p8" type="checkbox"><label for="vst2q_p8"><div>void <b><b>vst2q_p8</b></b> (poly8_t * ptr, poly8x16x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_p16" type="checkbox"><label for="vst2_p16"><div>void <b><b>vst2_p16</b></b> (poly16_t * ptr, poly16x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_p16" type="checkbox"><label for="vst2q_p16"><div>void <b><b>vst2q_p16</b></b> (poly16_t * ptr, poly16x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_s64" type="checkbox"><label for="vst2_s64"><div>void <b><b>vst2_s64</b></b> (int64_t * ptr, int64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_u64" type="checkbox"><label for="vst2_u64"><div>void <b><b>vst2_u64</b></b> (uint64_t * ptr, uint64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_p64" type="checkbox"><label for="vst2_p64"><div>void <b><b>vst2_p64</b></b> (poly64_t * ptr, poly64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_s64" type="checkbox"><label for="vst2q_s64"><div>void <b><b>vst2q_s64</b></b> (int64_t * ptr, int64x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_u64" type="checkbox"><label for="vst2q_u64"><div>void <b><b>vst2q_u64</b></b> (uint64_t * ptr, uint64x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_p64" type="checkbox"><label for="vst2q_p64"><div>void <b><b>vst2q_p64</b></b> (poly64_t * ptr, poly64x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_f64" type="checkbox"><label for="vst2_f64"><div>void <b><b>vst2_f64</b></b> (float64_t * ptr, float64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_f64" type="checkbox"><label for="vst2q_f64"><div>void <b><b>vst2q_f64</b></b> (float64_t * ptr, float64x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_s8" type="checkbox"><label for="vst3_s8"><div>void <b><b>vst3_s8</b></b> (int8_t * ptr, int8x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_s8" type="checkbox"><label for="vst3q_s8"><div>void <b><b>vst3q_s8</b></b> (int8_t * ptr, int8x16x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_s16" type="checkbox"><label for="vst3_s16"><div>void <b><b>vst3_s16</b></b> (int16_t * ptr, int16x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_s16" type="checkbox"><label for="vst3q_s16"><div>void <b><b>vst3q_s16</b></b> (int16_t * ptr, int16x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_s32" type="checkbox"><label for="vst3_s32"><div>void <b><b>vst3_s32</b></b> (int32_t * ptr, int32x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_s32" type="checkbox"><label for="vst3q_s32"><div>void <b><b>vst3q_s32</b></b> (int32_t * ptr, int32x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_u8" type="checkbox"><label for="vst3_u8"><div>void <b><b>vst3_u8</b></b> (uint8_t * ptr, uint8x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_u8" type="checkbox"><label for="vst3q_u8"><div>void <b><b>vst3q_u8</b></b> (uint8_t * ptr, uint8x16x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_u16" type="checkbox"><label for="vst3_u16"><div>void <b><b>vst3_u16</b></b> (uint16_t * ptr, uint16x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_u16" type="checkbox"><label for="vst3q_u16"><div>void <b><b>vst3q_u16</b></b> (uint16_t * ptr, uint16x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_u32" type="checkbox"><label for="vst3_u32"><div>void <b><b>vst3_u32</b></b> (uint32_t * ptr, uint32x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_u32" type="checkbox"><label for="vst3q_u32"><div>void <b><b>vst3q_u32</b></b> (uint32_t * ptr, uint32x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_f16" type="checkbox"><label for="vst3_f16"><div>void <b><b>vst3_f16</b></b> (float16_t * ptr, float16x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_f16" type="checkbox"><label for="vst3q_f16"><div>void <b><b>vst3q_f16</b></b> (float16_t * ptr, float16x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_f32" type="checkbox"><label for="vst3_f32"><div>void <b><b>vst3_f32</b></b> (float32_t * ptr, float32x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_f32" type="checkbox"><label for="vst3q_f32"><div>void <b><b>vst3q_f32</b></b> (float32_t * ptr, float32x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_p8" type="checkbox"><label for="vst3_p8"><div>void <b><b>vst3_p8</b></b> (poly8_t * ptr, poly8x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_p8" type="checkbox"><label for="vst3q_p8"><div>void <b><b>vst3q_p8</b></b> (poly8_t * ptr, poly8x16x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_p16" type="checkbox"><label for="vst3_p16"><div>void <b><b>vst3_p16</b></b> (poly16_t * ptr, poly16x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_p16" type="checkbox"><label for="vst3q_p16"><div>void <b><b>vst3q_p16</b></b> (poly16_t * ptr, poly16x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_s64" type="checkbox"><label for="vst3_s64"><div>void <b><b>vst3_s64</b></b> (int64_t * ptr, int64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_u64" type="checkbox"><label for="vst3_u64"><div>void <b><b>vst3_u64</b></b> (uint64_t * ptr, uint64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_p64" type="checkbox"><label for="vst3_p64"><div>void <b><b>vst3_p64</b></b> (poly64_t * ptr, poly64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_s64" type="checkbox"><label for="vst3q_s64"><div>void <b><b>vst3q_s64</b></b> (int64_t * ptr, int64x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_u64" type="checkbox"><label for="vst3q_u64"><div>void <b><b>vst3q_u64</b></b> (uint64_t * ptr, uint64x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_p64" type="checkbox"><label for="vst3q_p64"><div>void <b><b>vst3q_p64</b></b> (poly64_t * ptr, poly64x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_f64" type="checkbox"><label for="vst3_f64"><div>void <b><b>vst3_f64</b></b> (float64_t * ptr, float64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_f64" type="checkbox"><label for="vst3q_f64"><div>void <b><b>vst3q_f64</b></b> (float64_t * ptr, float64x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_s8" type="checkbox"><label for="vst4_s8"><div>void <b><b>vst4_s8</b></b> (int8_t * ptr, int8x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_s8" type="checkbox"><label for="vst4q_s8"><div>void <b><b>vst4q_s8</b></b> (int8_t * ptr, int8x16x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_s16" type="checkbox"><label for="vst4_s16"><div>void <b><b>vst4_s16</b></b> (int16_t * ptr, int16x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_s16" type="checkbox"><label for="vst4q_s16"><div>void <b><b>vst4q_s16</b></b> (int16_t * ptr, int16x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_s32" type="checkbox"><label for="vst4_s32"><div>void <b><b>vst4_s32</b></b> (int32_t * ptr, int32x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_s32" type="checkbox"><label for="vst4q_s32"><div>void <b><b>vst4q_s32</b></b> (int32_t * ptr, int32x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_u8" type="checkbox"><label for="vst4_u8"><div>void <b><b>vst4_u8</b></b> (uint8_t * ptr, uint8x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_u8" type="checkbox"><label for="vst4q_u8"><div>void <b><b>vst4q_u8</b></b> (uint8_t * ptr, uint8x16x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_u16" type="checkbox"><label for="vst4_u16"><div>void <b><b>vst4_u16</b></b> (uint16_t * ptr, uint16x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_u16" type="checkbox"><label for="vst4q_u16"><div>void <b><b>vst4q_u16</b></b> (uint16_t * ptr, uint16x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_u32" type="checkbox"><label for="vst4_u32"><div>void <b><b>vst4_u32</b></b> (uint32_t * ptr, uint32x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_u32" type="checkbox"><label for="vst4q_u32"><div>void <b><b>vst4q_u32</b></b> (uint32_t * ptr, uint32x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_f16" type="checkbox"><label for="vst4_f16"><div>void <b><b>vst4_f16</b></b> (float16_t * ptr, float16x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_f16" type="checkbox"><label for="vst4q_f16"><div>void <b><b>vst4q_f16</b></b> (float16_t * ptr, float16x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_f32" type="checkbox"><label for="vst4_f32"><div>void <b><b>vst4_f32</b></b> (float32_t * ptr, float32x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_f32" type="checkbox"><label for="vst4q_f32"><div>void <b><b>vst4q_f32</b></b> (float32_t * ptr, float32x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_p8" type="checkbox"><label for="vst4_p8"><div>void <b><b>vst4_p8</b></b> (poly8_t * ptr, poly8x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_p8" type="checkbox"><label for="vst4q_p8"><div>void <b><b>vst4q_p8</b></b> (poly8_t * ptr, poly8x16x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_p16" type="checkbox"><label for="vst4_p16"><div>void <b><b>vst4_p16</b></b> (poly16_t * ptr, poly16x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_p16" type="checkbox"><label for="vst4q_p16"><div>void <b><b>vst4q_p16</b></b> (poly16_t * ptr, poly16x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_s64" type="checkbox"><label for="vst4_s64"><div>void <b><b>vst4_s64</b></b> (int64_t * ptr, int64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_u64" type="checkbox"><label for="vst4_u64"><div>void <b><b>vst4_u64</b></b> (uint64_t * ptr, uint64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_p64" type="checkbox"><label for="vst4_p64"><div>void <b><b>vst4_p64</b></b> (poly64_t * ptr, poly64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_s64" type="checkbox"><label for="vst4q_s64"><div>void <b><b>vst4q_s64</b></b> (int64_t * ptr, int64x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_u64" type="checkbox"><label for="vst4q_u64"><div>void <b><b>vst4q_u64</b></b> (uint64_t * ptr, uint64x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_p64" type="checkbox"><label for="vst4q_p64"><div>void <b><b>vst4q_p64</b></b> (poly64_t * ptr, poly64x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_f64" type="checkbox"><label for="vst4_f64"><div>void <b><b>vst4_f64</b></b> (float64_t * ptr, float64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_f64" type="checkbox"><label for="vst4q_f64"><div>void <b><b>vst4q_f64</b></b> (float64_t * ptr, float64x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_s16" type="checkbox"><label for="vld2_lane_s16"><div>int16x4x2_t <b><b>vld2_lane_s16</b></b> (int16_t const * ptr, int16x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_s16" type="checkbox"><label for="vld2q_lane_s16"><div>int16x8x2_t <b><b>vld2q_lane_s16</b></b> (int16_t const * ptr, int16x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_s32" type="checkbox"><label for="vld2_lane_s32"><div>int32x2x2_t <b><b>vld2_lane_s32</b></b> (int32_t const * ptr, int32x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_s32" type="checkbox"><label for="vld2q_lane_s32"><div>int32x4x2_t <b><b>vld2q_lane_s32</b></b> (int32_t const * ptr, int32x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_u16" type="checkbox"><label for="vld2_lane_u16"><div>uint16x4x2_t <b><b>vld2_lane_u16</b></b> (uint16_t const * ptr, uint16x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_u16" type="checkbox"><label for="vld2q_lane_u16"><div>uint16x8x2_t <b><b>vld2q_lane_u16</b></b> (uint16_t const * ptr, uint16x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_u32" type="checkbox"><label for="vld2_lane_u32"><div>uint32x2x2_t <b><b>vld2_lane_u32</b></b> (uint32_t const * ptr, uint32x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_u32" type="checkbox"><label for="vld2q_lane_u32"><div>uint32x4x2_t <b><b>vld2q_lane_u32</b></b> (uint32_t const * ptr, uint32x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_f16" type="checkbox"><label for="vld2_lane_f16"><div>float16x4x2_t <b><b>vld2_lane_f16</b></b> (float16_t const * ptr, float16x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_f16" type="checkbox"><label for="vld2q_lane_f16"><div>float16x8x2_t <b><b>vld2q_lane_f16</b></b> (float16_t const * ptr, float16x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_f32" type="checkbox"><label for="vld2_lane_f32"><div>float32x2x2_t <b><b>vld2_lane_f32</b></b> (float32_t const * ptr, float32x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_f32" type="checkbox"><label for="vld2q_lane_f32"><div>float32x4x2_t <b><b>vld2q_lane_f32</b></b> (float32_t const * ptr, float32x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_p16" type="checkbox"><label for="vld2_lane_p16"><div>poly16x4x2_t <b><b>vld2_lane_p16</b></b> (poly16_t const * ptr, poly16x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_p16" type="checkbox"><label for="vld2q_lane_p16"><div>poly16x8x2_t <b><b>vld2q_lane_p16</b></b> (poly16_t const * ptr, poly16x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_s8" type="checkbox"><label for="vld2_lane_s8"><div>int8x8x2_t <b><b>vld2_lane_s8</b></b> (int8_t const * ptr, int8x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_u8" type="checkbox"><label for="vld2_lane_u8"><div>uint8x8x2_t <b><b>vld2_lane_u8</b></b> (uint8_t const * ptr, uint8x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_p8" type="checkbox"><label for="vld2_lane_p8"><div>poly8x8x2_t <b><b>vld2_lane_p8</b></b> (poly8_t const * ptr, poly8x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_s8" type="checkbox"><label for="vld2q_lane_s8"><div>int8x16x2_t <b><b>vld2q_lane_s8</b></b> (int8_t const * ptr, int8x16x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_u8" type="checkbox"><label for="vld2q_lane_u8"><div>uint8x16x2_t <b><b>vld2q_lane_u8</b></b> (uint8_t const * ptr, uint8x16x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_p8" type="checkbox"><label for="vld2q_lane_p8"><div>poly8x16x2_t <b><b>vld2q_lane_p8</b></b> (poly8_t const * ptr, poly8x16x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_s64" type="checkbox"><label for="vld2_lane_s64"><div>int64x1x2_t <b><b>vld2_lane_s64</b></b> (int64_t const * ptr, int64x1x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>ptr &rarr; Xn
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_s64" type="checkbox"><label for="vld2q_lane_s64"><div>int64x2x2_t <b><b>vld2q_lane_s64</b></b> (int64_t const * ptr, int64x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>ptr &rarr; Xn
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_u64" type="checkbox"><label for="vld2_lane_u64"><div>uint64x1x2_t <b><b>vld2_lane_u64</b></b> (uint64_t const * ptr, uint64x1x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_u64" type="checkbox"><label for="vld2q_lane_u64"><div>uint64x2x2_t <b><b>vld2q_lane_u64</b></b> (uint64_t const * ptr, uint64x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_p64" type="checkbox"><label for="vld2_lane_p64"><div>poly64x1x2_t <b><b>vld2_lane_p64</b></b> (poly64_t const * ptr, poly64x1x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_p64" type="checkbox"><label for="vld2q_lane_p64"><div>poly64x2x2_t <b><b>vld2q_lane_p64</b></b> (poly64_t const * ptr, poly64x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_f64" type="checkbox"><label for="vld2_lane_f64"><div>float64x1x2_t <b><b>vld2_lane_f64</b></b> (float64_t const * ptr, float64x1x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_f64" type="checkbox"><label for="vld2q_lane_f64"><div>float64x2x2_t <b><b>vld2q_lane_f64</b></b> (float64_t const * ptr, float64x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_s16" type="checkbox"><label for="vld3_lane_s16"><div>int16x4x3_t <b><b>vld3_lane_s16</b></b> (int16_t const * ptr, int16x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_s16" type="checkbox"><label for="vld3q_lane_s16"><div>int16x8x3_t <b><b>vld3q_lane_s16</b></b> (int16_t const * ptr, int16x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_s32" type="checkbox"><label for="vld3_lane_s32"><div>int32x2x3_t <b><b>vld3_lane_s32</b></b> (int32_t const * ptr, int32x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2S <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_s32" type="checkbox"><label for="vld3q_lane_s32"><div>int32x4x3_t <b><b>vld3q_lane_s32</b></b> (int32_t const * ptr, int32x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4S <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_u16" type="checkbox"><label for="vld3_lane_u16"><div>uint16x4x3_t <b><b>vld3_lane_u16</b></b> (uint16_t const * ptr, uint16x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_u16" type="checkbox"><label for="vld3q_lane_u16"><div>uint16x8x3_t <b><b>vld3q_lane_u16</b></b> (uint16_t const * ptr, uint16x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_u32" type="checkbox"><label for="vld3_lane_u32"><div>uint32x2x3_t <b><b>vld3_lane_u32</b></b> (uint32_t const * ptr, uint32x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2S <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_u32" type="checkbox"><label for="vld3q_lane_u32"><div>uint32x4x3_t <b><b>vld3q_lane_u32</b></b> (uint32_t const * ptr, uint32x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4S <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_f16" type="checkbox"><label for="vld3_lane_f16"><div>float16x4x3_t <b><b>vld3_lane_f16</b></b> (float16_t const * ptr, float16x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_f16" type="checkbox"><label for="vld3q_lane_f16"><div>float16x8x3_t <b><b>vld3q_lane_f16</b></b> (float16_t const * ptr, float16x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_f32" type="checkbox"><label for="vld3_lane_f32"><div>float32x2x3_t <b><b>vld3_lane_f32</b></b> (float32_t const * ptr, float32x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2S <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_f32" type="checkbox"><label for="vld3q_lane_f32"><div>float32x4x3_t <b><b>vld3q_lane_f32</b></b> (float32_t const * ptr, float32x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4S <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_p16" type="checkbox"><label for="vld3_lane_p16"><div>poly16x4x3_t <b><b>vld3_lane_p16</b></b> (poly16_t const * ptr, poly16x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_p16" type="checkbox"><label for="vld3q_lane_p16"><div>poly16x8x3_t <b><b>vld3q_lane_p16</b></b> (poly16_t const * ptr, poly16x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_s8" type="checkbox"><label for="vld3_lane_s8"><div>int8x8x3_t <b><b>vld3_lane_s8</b></b> (int8_t const * ptr, int8x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8B <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_u8" type="checkbox"><label for="vld3_lane_u8"><div>uint8x8x3_t <b><b>vld3_lane_u8</b></b> (uint8_t const * ptr, uint8x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8B <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_p8" type="checkbox"><label for="vld3_lane_p8"><div>poly8x8x3_t <b><b>vld3_lane_p8</b></b> (poly8_t const * ptr, poly8x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8B <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_s8" type="checkbox"><label for="vld3q_lane_s8"><div>int8x16x3_t <b><b>vld3q_lane_s8</b></b> (int8_t const * ptr, int8x16x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.16B <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_u8" type="checkbox"><label for="vld3q_lane_u8"><div>uint8x16x3_t <b><b>vld3q_lane_u8</b></b> (uint8_t const * ptr, uint8x16x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.16B <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_p8" type="checkbox"><label for="vld3q_lane_p8"><div>poly8x16x3_t <b><b>vld3q_lane_p8</b></b> (poly8_t const * ptr, poly8x16x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.16B <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_s64" type="checkbox"><label for="vld3_lane_s64"><div>int64x1x3_t <b><b>vld3_lane_s64</b></b> (int64_t const * ptr, int64x1x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_s64" type="checkbox"><label for="vld3q_lane_s64"><div>int64x2x3_t <b><b>vld3q_lane_s64</b></b> (int64_t const * ptr, int64x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_u64" type="checkbox"><label for="vld3_lane_u64"><div>uint64x1x3_t <b><b>vld3_lane_u64</b></b> (uint64_t const * ptr, uint64x1x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_u64" type="checkbox"><label for="vld3q_lane_u64"><div>uint64x2x3_t <b><b>vld3q_lane_u64</b></b> (uint64_t const * ptr, uint64x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_p64" type="checkbox"><label for="vld3_lane_p64"><div>poly64x1x3_t <b><b>vld3_lane_p64</b></b> (poly64_t const * ptr, poly64x1x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_p64" type="checkbox"><label for="vld3q_lane_p64"><div>poly64x2x3_t <b><b>vld3q_lane_p64</b></b> (poly64_t const * ptr, poly64x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_f64" type="checkbox"><label for="vld3_lane_f64"><div>float64x1x3_t <b><b>vld3_lane_f64</b></b> (float64_t const * ptr, float64x1x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_f64" type="checkbox"><label for="vld3q_lane_f64"><div>float64x2x3_t <b><b>vld3q_lane_f64</b></b> (float64_t const * ptr, float64x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_s16" type="checkbox"><label for="vld4_lane_s16"><div>int16x4x4_t <b><b>vld4_lane_s16</b></b> (int16_t const * ptr, int16x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4H <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_s16" type="checkbox"><label for="vld4q_lane_s16"><div>int16x8x4_t <b><b>vld4q_lane_s16</b></b> (int16_t const * ptr, int16x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8H <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_s32" type="checkbox"><label for="vld4_lane_s32"><div>int32x2x4_t <b><b>vld4_lane_s32</b></b> (int32_t const * ptr, int32x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2S <br />
+src.val[2] &rarr; Vt3.2S <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_s32" type="checkbox"><label for="vld4q_lane_s32"><div>int32x4x4_t <b><b>vld4q_lane_s32</b></b> (int32_t const * ptr, int32x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4S <br />
+src.val[2] &rarr; Vt3.4S <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_u16" type="checkbox"><label for="vld4_lane_u16"><div>uint16x4x4_t <b><b>vld4_lane_u16</b></b> (uint16_t const * ptr, uint16x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4H <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_u16" type="checkbox"><label for="vld4q_lane_u16"><div>uint16x8x4_t <b><b>vld4q_lane_u16</b></b> (uint16_t const * ptr, uint16x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8H <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_u32" type="checkbox"><label for="vld4_lane_u32"><div>uint32x2x4_t <b><b>vld4_lane_u32</b></b> (uint32_t const * ptr, uint32x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2S <br />
+src.val[2] &rarr; Vt3.2S <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_u32" type="checkbox"><label for="vld4q_lane_u32"><div>uint32x4x4_t <b><b>vld4q_lane_u32</b></b> (uint32_t const * ptr, uint32x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4S <br />
+src.val[2] &rarr; Vt3.4S <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_f16" type="checkbox"><label for="vld4_lane_f16"><div>float16x4x4_t <b><b>vld4_lane_f16</b></b> (float16_t const * ptr, float16x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4H <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_f16" type="checkbox"><label for="vld4q_lane_f16"><div>float16x8x4_t <b><b>vld4q_lane_f16</b></b> (float16_t const * ptr, float16x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8H <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_f32" type="checkbox"><label for="vld4_lane_f32"><div>float32x2x4_t <b><b>vld4_lane_f32</b></b> (float32_t const * ptr, float32x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2S <br />
+src.val[2] &rarr; Vt3.2S <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_f32" type="checkbox"><label for="vld4q_lane_f32"><div>float32x4x4_t <b><b>vld4q_lane_f32</b></b> (float32_t const * ptr, float32x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4S <br />
+src.val[2] &rarr; Vt3.4S <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_p16" type="checkbox"><label for="vld4_lane_p16"><div>poly16x4x4_t <b><b>vld4_lane_p16</b></b> (poly16_t const * ptr, poly16x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4H <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_p16" type="checkbox"><label for="vld4q_lane_p16"><div>poly16x8x4_t <b><b>vld4q_lane_p16</b></b> (poly16_t const * ptr, poly16x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8H <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_s8" type="checkbox"><label for="vld4_lane_s8"><div>int8x8x4_t <b><b>vld4_lane_s8</b></b> (int8_t const * ptr, int8x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8B <br />
+src.val[2] &rarr; Vt3.8B <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_u8" type="checkbox"><label for="vld4_lane_u8"><div>uint8x8x4_t <b><b>vld4_lane_u8</b></b> (uint8_t const * ptr, uint8x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8B <br />
+src.val[2] &rarr; Vt3.8B <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_p8" type="checkbox"><label for="vld4_lane_p8"><div>poly8x8x4_t <b><b>vld4_lane_p8</b></b> (poly8_t const * ptr, poly8x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8B <br />
+src.val[2] &rarr; Vt3.8B <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_s8" type="checkbox"><label for="vld4q_lane_s8"><div>int8x16x4_t <b><b>vld4q_lane_s8</b></b> (int8_t const * ptr, int8x16x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.16B <br />
+src.val[2] &rarr; Vt3.16B <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_u8" type="checkbox"><label for="vld4q_lane_u8"><div>uint8x16x4_t <b><b>vld4q_lane_u8</b></b> (uint8_t const * ptr, uint8x16x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.16B <br />
+src.val[2] &rarr; Vt3.16B <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_p8" type="checkbox"><label for="vld4q_lane_p8"><div>poly8x16x4_t <b><b>vld4q_lane_p8</b></b> (poly8_t const * ptr, poly8x16x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.16B <br />
+src.val[2] &rarr; Vt3.16B <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_s64" type="checkbox"><label for="vld4_lane_s64"><div>int64x1x4_t <b><b>vld4_lane_s64</b></b> (int64_t const * ptr, int64x1x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.1D <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_s64" type="checkbox"><label for="vld4q_lane_s64"><div>int64x2x4_t <b><b>vld4q_lane_s64</b></b> (int64_t const * ptr, int64x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2D <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_u64" type="checkbox"><label for="vld4_lane_u64"><div>uint64x1x4_t <b><b>vld4_lane_u64</b></b> (uint64_t const * ptr, uint64x1x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.1D <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_u64" type="checkbox"><label for="vld4q_lane_u64"><div>uint64x2x4_t <b><b>vld4q_lane_u64</b></b> (uint64_t const * ptr, uint64x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2D <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_p64" type="checkbox"><label for="vld4_lane_p64"><div>poly64x1x4_t <b><b>vld4_lane_p64</b></b> (poly64_t const * ptr, poly64x1x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.1D <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_p64" type="checkbox"><label for="vld4q_lane_p64"><div>poly64x2x4_t <b><b>vld4q_lane_p64</b></b> (poly64_t const * ptr, poly64x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2D <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_f64" type="checkbox"><label for="vld4_lane_f64"><div>float64x1x4_t <b><b>vld4_lane_f64</b></b> (float64_t const * ptr, float64x1x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.1D <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_f64" type="checkbox"><label for="vld4q_lane_f64"><div>float64x2x4_t <b><b>vld4q_lane_f64</b></b> (float64_t const * ptr, float64x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2D <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_s8" type="checkbox"><label for="vst2_lane_s8"><div>void <b><b>vst2_lane_s8</b></b> (int8_t * ptr, int8x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_u8" type="checkbox"><label for="vst2_lane_u8"><div>void <b><b>vst2_lane_u8</b></b> (uint8_t * ptr, uint8x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_p8" type="checkbox"><label for="vst2_lane_p8"><div>void <b><b>vst2_lane_p8</b></b> (poly8_t * ptr, poly8x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_s8" type="checkbox"><label for="vst3_lane_s8"><div>void <b><b>vst3_lane_s8</b></b> (int8_t * ptr, int8x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_u8" type="checkbox"><label for="vst3_lane_u8"><div>void <b><b>vst3_lane_u8</b></b> (uint8_t * ptr, uint8x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_p8" type="checkbox"><label for="vst3_lane_p8"><div>void <b><b>vst3_lane_p8</b></b> (poly8_t * ptr, poly8x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_s8" type="checkbox"><label for="vst4_lane_s8"><div>void <b><b>vst4_lane_s8</b></b> (int8_t * ptr, int8x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_u8" type="checkbox"><label for="vst4_lane_u8"><div>void <b><b>vst4_lane_u8</b></b> (uint8_t * ptr, uint8x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_p8" type="checkbox"><label for="vst4_lane_p8"><div>void <b><b>vst4_lane_p8</b></b> (poly8_t * ptr, poly8x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_s16" type="checkbox"><label for="vst2_lane_s16"><div>void <b><b>vst2_lane_s16</b></b> (int16_t * ptr, int16x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_s16" type="checkbox"><label for="vst2q_lane_s16"><div>void <b><b>vst2q_lane_s16</b></b> (int16_t * ptr, int16x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_s32" type="checkbox"><label for="vst2_lane_s32"><div>void <b><b>vst2_lane_s32</b></b> (int32_t * ptr, int32x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_s32" type="checkbox"><label for="vst2q_lane_s32"><div>void <b><b>vst2q_lane_s32</b></b> (int32_t * ptr, int32x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_u16" type="checkbox"><label for="vst2_lane_u16"><div>void <b><b>vst2_lane_u16</b></b> (uint16_t * ptr, uint16x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_u16" type="checkbox"><label for="vst2q_lane_u16"><div>void <b><b>vst2q_lane_u16</b></b> (uint16_t * ptr, uint16x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_u32" type="checkbox"><label for="vst2_lane_u32"><div>void <b><b>vst2_lane_u32</b></b> (uint32_t * ptr, uint32x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_u32" type="checkbox"><label for="vst2q_lane_u32"><div>void <b><b>vst2q_lane_u32</b></b> (uint32_t * ptr, uint32x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_f16" type="checkbox"><label for="vst2_lane_f16"><div>void <b><b>vst2_lane_f16</b></b> (float16_t * ptr, float16x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_f16" type="checkbox"><label for="vst2q_lane_f16"><div>void <b><b>vst2q_lane_f16</b></b> (float16_t * ptr, float16x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_f32" type="checkbox"><label for="vst2_lane_f32"><div>void <b><b>vst2_lane_f32</b></b> (float32_t * ptr, float32x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_f32" type="checkbox"><label for="vst2q_lane_f32"><div>void <b><b>vst2q_lane_f32</b></b> (float32_t * ptr, float32x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_p16" type="checkbox"><label for="vst2_lane_p16"><div>void <b><b>vst2_lane_p16</b></b> (poly16_t * ptr, poly16x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_p16" type="checkbox"><label for="vst2q_lane_p16"><div>void <b><b>vst2q_lane_p16</b></b> (poly16_t * ptr, poly16x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_s8" type="checkbox"><label for="vst2q_lane_s8"><div>void <b><b>vst2q_lane_s8</b></b> (int8_t * ptr, int8x16x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_u8" type="checkbox"><label for="vst2q_lane_u8"><div>void <b><b>vst2q_lane_u8</b></b> (uint8_t * ptr, uint8x16x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_p8" type="checkbox"><label for="vst2q_lane_p8"><div>void <b><b>vst2q_lane_p8</b></b> (poly8_t * ptr, poly8x16x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_s64" type="checkbox"><label for="vst2_lane_s64"><div>void <b><b>vst2_lane_s64</b></b> (int64_t * ptr, int64x1x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_s64" type="checkbox"><label for="vst2q_lane_s64"><div>void <b><b>vst2q_lane_s64</b></b> (int64_t * ptr, int64x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_u64" type="checkbox"><label for="vst2_lane_u64"><div>void <b><b>vst2_lane_u64</b></b> (uint64_t * ptr, uint64x1x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_u64" type="checkbox"><label for="vst2q_lane_u64"><div>void <b><b>vst2q_lane_u64</b></b> (uint64_t * ptr, uint64x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_p64" type="checkbox"><label for="vst2_lane_p64"><div>void <b><b>vst2_lane_p64</b></b> (poly64_t * ptr, poly64x1x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_p64" type="checkbox"><label for="vst2q_lane_p64"><div>void <b><b>vst2q_lane_p64</b></b> (poly64_t * ptr, poly64x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_f64" type="checkbox"><label for="vst2_lane_f64"><div>void <b><b>vst2_lane_f64</b></b> (float64_t * ptr, float64x1x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_f64" type="checkbox"><label for="vst2q_lane_f64"><div>void <b><b>vst2q_lane_f64</b></b> (float64_t * ptr, float64x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 2 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_s16" type="checkbox"><label for="vst3_lane_s16"><div>void <b><b>vst3_lane_s16</b></b> (int16_t * ptr, int16x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_s16" type="checkbox"><label for="vst3q_lane_s16"><div>void <b><b>vst3q_lane_s16</b></b> (int16_t * ptr, int16x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_s32" type="checkbox"><label for="vst3_lane_s32"><div>void <b><b>vst3_lane_s32</b></b> (int32_t * ptr, int32x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_s32" type="checkbox"><label for="vst3q_lane_s32"><div>void <b><b>vst3q_lane_s32</b></b> (int32_t * ptr, int32x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_u16" type="checkbox"><label for="vst3_lane_u16"><div>void <b><b>vst3_lane_u16</b></b> (uint16_t * ptr, uint16x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_u16" type="checkbox"><label for="vst3q_lane_u16"><div>void <b><b>vst3q_lane_u16</b></b> (uint16_t * ptr, uint16x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_u32" type="checkbox"><label for="vst3_lane_u32"><div>void <b><b>vst3_lane_u32</b></b> (uint32_t * ptr, uint32x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_u32" type="checkbox"><label for="vst3q_lane_u32"><div>void <b><b>vst3q_lane_u32</b></b> (uint32_t * ptr, uint32x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_f16" type="checkbox"><label for="vst3_lane_f16"><div>void <b><b>vst3_lane_f16</b></b> (float16_t * ptr, float16x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_f16" type="checkbox"><label for="vst3q_lane_f16"><div>void <b><b>vst3q_lane_f16</b></b> (float16_t * ptr, float16x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_f32" type="checkbox"><label for="vst3_lane_f32"><div>void <b><b>vst3_lane_f32</b></b> (float32_t * ptr, float32x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_f32" type="checkbox"><label for="vst3q_lane_f32"><div>void <b><b>vst3q_lane_f32</b></b> (float32_t * ptr, float32x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_p16" type="checkbox"><label for="vst3_lane_p16"><div>void <b><b>vst3_lane_p16</b></b> (poly16_t * ptr, poly16x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_p16" type="checkbox"><label for="vst3q_lane_p16"><div>void <b><b>vst3q_lane_p16</b></b> (poly16_t * ptr, poly16x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_s8" type="checkbox"><label for="vst3q_lane_s8"><div>void <b><b>vst3q_lane_s8</b></b> (int8_t * ptr, int8x16x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_u8" type="checkbox"><label for="vst3q_lane_u8"><div>void <b><b>vst3q_lane_u8</b></b> (uint8_t * ptr, uint8x16x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_p8" type="checkbox"><label for="vst3q_lane_p8"><div>void <b><b>vst3q_lane_p8</b></b> (poly8_t * ptr, poly8x16x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_s64" type="checkbox"><label for="vst3_lane_s64"><div>void <b><b>vst3_lane_s64</b></b> (int64_t * ptr, int64x1x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_s64" type="checkbox"><label for="vst3q_lane_s64"><div>void <b><b>vst3q_lane_s64</b></b> (int64_t * ptr, int64x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_u64" type="checkbox"><label for="vst3_lane_u64"><div>void <b><b>vst3_lane_u64</b></b> (uint64_t * ptr, uint64x1x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_u64" type="checkbox"><label for="vst3q_lane_u64"><div>void <b><b>vst3q_lane_u64</b></b> (uint64_t * ptr, uint64x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_p64" type="checkbox"><label for="vst3_lane_p64"><div>void <b><b>vst3_lane_p64</b></b> (poly64_t * ptr, poly64x1x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_p64" type="checkbox"><label for="vst3q_lane_p64"><div>void <b><b>vst3q_lane_p64</b></b> (poly64_t * ptr, poly64x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_f64" type="checkbox"><label for="vst3_lane_f64"><div>void <b><b>vst3_lane_f64</b></b> (float64_t * ptr, float64x1x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_f64" type="checkbox"><label for="vst3q_lane_f64"><div>void <b><b>vst3q_lane_f64</b></b> (float64_t * ptr, float64x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_s16" type="checkbox"><label for="vst4_lane_s16"><div>void <b><b>vst4_lane_s16</b></b> (int16_t * ptr, int16x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_s16" type="checkbox"><label for="vst4q_lane_s16"><div>void <b><b>vst4q_lane_s16</b></b> (int16_t * ptr, int16x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_s32" type="checkbox"><label for="vst4_lane_s32"><div>void <b><b>vst4_lane_s32</b></b> (int32_t * ptr, int32x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_s32" type="checkbox"><label for="vst4q_lane_s32"><div>void <b><b>vst4q_lane_s32</b></b> (int32_t * ptr, int32x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_u16" type="checkbox"><label for="vst4_lane_u16"><div>void <b><b>vst4_lane_u16</b></b> (uint16_t * ptr, uint16x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_u16" type="checkbox"><label for="vst4q_lane_u16"><div>void <b><b>vst4q_lane_u16</b></b> (uint16_t * ptr, uint16x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_u32" type="checkbox"><label for="vst4_lane_u32"><div>void <b><b>vst4_lane_u32</b></b> (uint32_t * ptr, uint32x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_u32" type="checkbox"><label for="vst4q_lane_u32"><div>void <b><b>vst4q_lane_u32</b></b> (uint32_t * ptr, uint32x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_f16" type="checkbox"><label for="vst4_lane_f16"><div>void <b><b>vst4_lane_f16</b></b> (float16_t * ptr, float16x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_f16" type="checkbox"><label for="vst4q_lane_f16"><div>void <b><b>vst4q_lane_f16</b></b> (float16_t * ptr, float16x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_f32" type="checkbox"><label for="vst4_lane_f32"><div>void <b><b>vst4_lane_f32</b></b> (float32_t * ptr, float32x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_f32" type="checkbox"><label for="vst4q_lane_f32"><div>void <b><b>vst4q_lane_f32</b></b> (float32_t * ptr, float32x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_p16" type="checkbox"><label for="vst4_lane_p16"><div>void <b><b>vst4_lane_p16</b></b> (poly16_t * ptr, poly16x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_p16" type="checkbox"><label for="vst4q_lane_p16"><div>void <b><b>vst4q_lane_p16</b></b> (poly16_t * ptr, poly16x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_s8" type="checkbox"><label for="vst4q_lane_s8"><div>void <b><b>vst4q_lane_s8</b></b> (int8_t * ptr, int8x16x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_u8" type="checkbox"><label for="vst4q_lane_u8"><div>void <b><b>vst4q_lane_u8</b></b> (uint8_t * ptr, uint8x16x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_p8" type="checkbox"><label for="vst4q_lane_p8"><div>void <b><b>vst4q_lane_p8</b></b> (poly8_t * ptr, poly8x16x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_s64" type="checkbox"><label for="vst4_lane_s64"><div>void <b><b>vst4_lane_s64</b></b> (int64_t * ptr, int64x1x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_s64" type="checkbox"><label for="vst4q_lane_s64"><div>void <b><b>vst4q_lane_s64</b></b> (int64_t * ptr, int64x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_u64" type="checkbox"><label for="vst4_lane_u64"><div>void <b><b>vst4_lane_u64</b></b> (uint64_t * ptr, uint64x1x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_u64" type="checkbox"><label for="vst4q_lane_u64"><div>void <b><b>vst4q_lane_u64</b></b> (uint64_t * ptr, uint64x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_p64" type="checkbox"><label for="vst4_lane_p64"><div>void <b><b>vst4_lane_p64</b></b> (poly64_t * ptr, poly64x1x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_p64" type="checkbox"><label for="vst4q_lane_p64"><div>void <b><b>vst4q_lane_p64</b></b> (poly64_t * ptr, poly64x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_f64" type="checkbox"><label for="vst4_lane_f64"><div>void <b><b>vst4_lane_f64</b></b> (float64_t * ptr, float64x1x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_f64" type="checkbox"><label for="vst4q_lane_f64"><div>void <b><b>vst4q_lane_f64</b></b> (float64_t * ptr, float64x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s8_x2" type="checkbox"><label for="vst1_s8_x2"><div>void <b><b>vst1_s8_x2</b></b> (int8_t * ptr, int8x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s8_x2" type="checkbox"><label for="vst1q_s8_x2"><div>void <b><b>vst1q_s8_x2</b></b> (int8_t * ptr, int8x16x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s16_x2" type="checkbox"><label for="vst1_s16_x2"><div>void <b><b>vst1_s16_x2</b></b> (int16_t * ptr, int16x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s16_x2" type="checkbox"><label for="vst1q_s16_x2"><div>void <b><b>vst1q_s16_x2</b></b> (int16_t * ptr, int16x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s32_x2" type="checkbox"><label for="vst1_s32_x2"><div>void <b><b>vst1_s32_x2</b></b> (int32_t * ptr, int32x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s32_x2" type="checkbox"><label for="vst1q_s32_x2"><div>void <b><b>vst1q_s32_x2</b></b> (int32_t * ptr, int32x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u8_x2" type="checkbox"><label for="vst1_u8_x2"><div>void <b><b>vst1_u8_x2</b></b> (uint8_t * ptr, uint8x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u8_x2" type="checkbox"><label for="vst1q_u8_x2"><div>void <b><b>vst1q_u8_x2</b></b> (uint8_t * ptr, uint8x16x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u16_x2" type="checkbox"><label for="vst1_u16_x2"><div>void <b><b>vst1_u16_x2</b></b> (uint16_t * ptr, uint16x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u16_x2" type="checkbox"><label for="vst1q_u16_x2"><div>void <b><b>vst1q_u16_x2</b></b> (uint16_t * ptr, uint16x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u32_x2" type="checkbox"><label for="vst1_u32_x2"><div>void <b><b>vst1_u32_x2</b></b> (uint32_t * ptr, uint32x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u32_x2" type="checkbox"><label for="vst1q_u32_x2"><div>void <b><b>vst1q_u32_x2</b></b> (uint32_t * ptr, uint32x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f16_x2" type="checkbox"><label for="vst1_f16_x2"><div>void <b><b>vst1_f16_x2</b></b> (float16_t * ptr, float16x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f16_x2" type="checkbox"><label for="vst1q_f16_x2"><div>void <b><b>vst1q_f16_x2</b></b> (float16_t * ptr, float16x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f32_x2" type="checkbox"><label for="vst1_f32_x2"><div>void <b><b>vst1_f32_x2</b></b> (float32_t * ptr, float32x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f32_x2" type="checkbox"><label for="vst1q_f32_x2"><div>void <b><b>vst1q_f32_x2</b></b> (float32_t * ptr, float32x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p8_x2" type="checkbox"><label for="vst1_p8_x2"><div>void <b><b>vst1_p8_x2</b></b> (poly8_t * ptr, poly8x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p8_x2" type="checkbox"><label for="vst1q_p8_x2"><div>void <b><b>vst1q_p8_x2</b></b> (poly8_t * ptr, poly8x16x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p16_x2" type="checkbox"><label for="vst1_p16_x2"><div>void <b><b>vst1_p16_x2</b></b> (poly16_t * ptr, poly16x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p16_x2" type="checkbox"><label for="vst1q_p16_x2"><div>void <b><b>vst1q_p16_x2</b></b> (poly16_t * ptr, poly16x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s64_x2" type="checkbox"><label for="vst1_s64_x2"><div>void <b><b>vst1_s64_x2</b></b> (int64_t * ptr, int64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u64_x2" type="checkbox"><label for="vst1_u64_x2"><div>void <b><b>vst1_u64_x2</b></b> (uint64_t * ptr, uint64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p64_x2" type="checkbox"><label for="vst1_p64_x2"><div>void <b><b>vst1_p64_x2</b></b> (poly64_t * ptr, poly64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s64_x2" type="checkbox"><label for="vst1q_s64_x2"><div>void <b><b>vst1q_s64_x2</b></b> (int64_t * ptr, int64x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u64_x2" type="checkbox"><label for="vst1q_u64_x2"><div>void <b><b>vst1q_u64_x2</b></b> (uint64_t * ptr, uint64x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p64_x2" type="checkbox"><label for="vst1q_p64_x2"><div>void <b><b>vst1q_p64_x2</b></b> (poly64_t * ptr, poly64x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f64_x2" type="checkbox"><label for="vst1_f64_x2"><div>void <b><b>vst1_f64_x2</b></b> (float64_t * ptr, float64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f64_x2" type="checkbox"><label for="vst1q_f64_x2"><div>void <b><b>vst1q_f64_x2</b></b> (float64_t * ptr, float64x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s8_x3" type="checkbox"><label for="vst1_s8_x3"><div>void <b><b>vst1_s8_x3</b></b> (int8_t * ptr, int8x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s8_x3" type="checkbox"><label for="vst1q_s8_x3"><div>void <b><b>vst1q_s8_x3</b></b> (int8_t * ptr, int8x16x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s16_x3" type="checkbox"><label for="vst1_s16_x3"><div>void <b><b>vst1_s16_x3</b></b> (int16_t * ptr, int16x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s16_x3" type="checkbox"><label for="vst1q_s16_x3"><div>void <b><b>vst1q_s16_x3</b></b> (int16_t * ptr, int16x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s32_x3" type="checkbox"><label for="vst1_s32_x3"><div>void <b><b>vst1_s32_x3</b></b> (int32_t * ptr, int32x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s32_x3" type="checkbox"><label for="vst1q_s32_x3"><div>void <b><b>vst1q_s32_x3</b></b> (int32_t * ptr, int32x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u8_x3" type="checkbox"><label for="vst1_u8_x3"><div>void <b><b>vst1_u8_x3</b></b> (uint8_t * ptr, uint8x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u8_x3" type="checkbox"><label for="vst1q_u8_x3"><div>void <b><b>vst1q_u8_x3</b></b> (uint8_t * ptr, uint8x16x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u16_x3" type="checkbox"><label for="vst1_u16_x3"><div>void <b><b>vst1_u16_x3</b></b> (uint16_t * ptr, uint16x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u16_x3" type="checkbox"><label for="vst1q_u16_x3"><div>void <b><b>vst1q_u16_x3</b></b> (uint16_t * ptr, uint16x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u32_x3" type="checkbox"><label for="vst1_u32_x3"><div>void <b><b>vst1_u32_x3</b></b> (uint32_t * ptr, uint32x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u32_x3" type="checkbox"><label for="vst1q_u32_x3"><div>void <b><b>vst1q_u32_x3</b></b> (uint32_t * ptr, uint32x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f16_x3" type="checkbox"><label for="vst1_f16_x3"><div>void <b><b>vst1_f16_x3</b></b> (float16_t * ptr, float16x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f16_x3" type="checkbox"><label for="vst1q_f16_x3"><div>void <b><b>vst1q_f16_x3</b></b> (float16_t * ptr, float16x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f32_x3" type="checkbox"><label for="vst1_f32_x3"><div>void <b><b>vst1_f32_x3</b></b> (float32_t * ptr, float32x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f32_x3" type="checkbox"><label for="vst1q_f32_x3"><div>void <b><b>vst1q_f32_x3</b></b> (float32_t * ptr, float32x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p8_x3" type="checkbox"><label for="vst1_p8_x3"><div>void <b><b>vst1_p8_x3</b></b> (poly8_t * ptr, poly8x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p8_x3" type="checkbox"><label for="vst1q_p8_x3"><div>void <b><b>vst1q_p8_x3</b></b> (poly8_t * ptr, poly8x16x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p16_x3" type="checkbox"><label for="vst1_p16_x3"><div>void <b><b>vst1_p16_x3</b></b> (poly16_t * ptr, poly16x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p16_x3" type="checkbox"><label for="vst1q_p16_x3"><div>void <b><b>vst1q_p16_x3</b></b> (poly16_t * ptr, poly16x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s64_x3" type="checkbox"><label for="vst1_s64_x3"><div>void <b><b>vst1_s64_x3</b></b> (int64_t * ptr, int64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u64_x3" type="checkbox"><label for="vst1_u64_x3"><div>void <b><b>vst1_u64_x3</b></b> (uint64_t * ptr, uint64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p64_x3" type="checkbox"><label for="vst1_p64_x3"><div>void <b><b>vst1_p64_x3</b></b> (poly64_t * ptr, poly64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s64_x3" type="checkbox"><label for="vst1q_s64_x3"><div>void <b><b>vst1q_s64_x3</b></b> (int64_t * ptr, int64x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u64_x3" type="checkbox"><label for="vst1q_u64_x3"><div>void <b><b>vst1q_u64_x3</b></b> (uint64_t * ptr, uint64x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p64_x3" type="checkbox"><label for="vst1q_p64_x3"><div>void <b><b>vst1q_p64_x3</b></b> (poly64_t * ptr, poly64x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f64_x3" type="checkbox"><label for="vst1_f64_x3"><div>void <b><b>vst1_f64_x3</b></b> (float64_t * ptr, float64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f64_x3" type="checkbox"><label for="vst1q_f64_x3"><div>void <b><b>vst1q_f64_x3</b></b> (float64_t * ptr, float64x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s8_x4" type="checkbox"><label for="vst1_s8_x4"><div>void <b><b>vst1_s8_x4</b></b> (int8_t * ptr, int8x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s8_x4" type="checkbox"><label for="vst1q_s8_x4"><div>void <b><b>vst1q_s8_x4</b></b> (int8_t * ptr, int8x16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s16_x4" type="checkbox"><label for="vst1_s16_x4"><div>void <b><b>vst1_s16_x4</b></b> (int16_t * ptr, int16x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s16_x4" type="checkbox"><label for="vst1q_s16_x4"><div>void <b><b>vst1q_s16_x4</b></b> (int16_t * ptr, int16x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s32_x4" type="checkbox"><label for="vst1_s32_x4"><div>void <b><b>vst1_s32_x4</b></b> (int32_t * ptr, int32x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s32_x4" type="checkbox"><label for="vst1q_s32_x4"><div>void <b><b>vst1q_s32_x4</b></b> (int32_t * ptr, int32x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u8_x4" type="checkbox"><label for="vst1_u8_x4"><div>void <b><b>vst1_u8_x4</b></b> (uint8_t * ptr, uint8x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u8_x4" type="checkbox"><label for="vst1q_u8_x4"><div>void <b><b>vst1q_u8_x4</b></b> (uint8_t * ptr, uint8x16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u16_x4" type="checkbox"><label for="vst1_u16_x4"><div>void <b><b>vst1_u16_x4</b></b> (uint16_t * ptr, uint16x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u16_x4" type="checkbox"><label for="vst1q_u16_x4"><div>void <b><b>vst1q_u16_x4</b></b> (uint16_t * ptr, uint16x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u32_x4" type="checkbox"><label for="vst1_u32_x4"><div>void <b><b>vst1_u32_x4</b></b> (uint32_t * ptr, uint32x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u32_x4" type="checkbox"><label for="vst1q_u32_x4"><div>void <b><b>vst1q_u32_x4</b></b> (uint32_t * ptr, uint32x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f16_x4" type="checkbox"><label for="vst1_f16_x4"><div>void <b><b>vst1_f16_x4</b></b> (float16_t * ptr, float16x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f16_x4" type="checkbox"><label for="vst1q_f16_x4"><div>void <b><b>vst1q_f16_x4</b></b> (float16_t * ptr, float16x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f32_x4" type="checkbox"><label for="vst1_f32_x4"><div>void <b><b>vst1_f32_x4</b></b> (float32_t * ptr, float32x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f32_x4" type="checkbox"><label for="vst1q_f32_x4"><div>void <b><b>vst1q_f32_x4</b></b> (float32_t * ptr, float32x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p8_x4" type="checkbox"><label for="vst1_p8_x4"><div>void <b><b>vst1_p8_x4</b></b> (poly8_t * ptr, poly8x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p8_x4" type="checkbox"><label for="vst1q_p8_x4"><div>void <b><b>vst1q_p8_x4</b></b> (poly8_t * ptr, poly8x16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p16_x4" type="checkbox"><label for="vst1_p16_x4"><div>void <b><b>vst1_p16_x4</b></b> (poly16_t * ptr, poly16x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p16_x4" type="checkbox"><label for="vst1q_p16_x4"><div>void <b><b>vst1q_p16_x4</b></b> (poly16_t * ptr, poly16x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s64_x4" type="checkbox"><label for="vst1_s64_x4"><div>void <b><b>vst1_s64_x4</b></b> (int64_t * ptr, int64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u64_x4" type="checkbox"><label for="vst1_u64_x4"><div>void <b><b>vst1_u64_x4</b></b> (uint64_t * ptr, uint64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p64_x4" type="checkbox"><label for="vst1_p64_x4"><div>void <b><b>vst1_p64_x4</b></b> (poly64_t * ptr, poly64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s64_x4" type="checkbox"><label for="vst1q_s64_x4"><div>void <b><b>vst1q_s64_x4</b></b> (int64_t * ptr, int64x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u64_x4" type="checkbox"><label for="vst1q_u64_x4"><div>void <b><b>vst1q_u64_x4</b></b> (uint64_t * ptr, uint64x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p64_x4" type="checkbox"><label for="vst1q_p64_x4"><div>void <b><b>vst1q_p64_x4</b></b> (poly64_t * ptr, poly64x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f64_x4" type="checkbox"><label for="vst1_f64_x4"><div>void <b><b>vst1_f64_x4</b></b> (float64_t * ptr, float64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f64_x4" type="checkbox"><label for="vst1q_f64_x4"><div>void <b><b>vst1q_f64_x4</b></b> (float64_t * ptr, float64x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s8_x2" type="checkbox"><label for="vld1_s8_x2"><div>int8x8x2_t <b><b>vld1_s8_x2</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s8_x2" type="checkbox"><label for="vld1q_s8_x2"><div>int8x16x2_t <b><b>vld1q_s8_x2</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s16_x2" type="checkbox"><label for="vld1_s16_x2"><div>int16x4x2_t <b><b>vld1_s16_x2</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s16_x2" type="checkbox"><label for="vld1q_s16_x2"><div>int16x8x2_t <b><b>vld1q_s16_x2</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s32_x2" type="checkbox"><label for="vld1_s32_x2"><div>int32x2x2_t <b><b>vld1_s32_x2</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s32_x2" type="checkbox"><label for="vld1q_s32_x2"><div>int32x4x2_t <b><b>vld1q_s32_x2</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u8_x2" type="checkbox"><label for="vld1_u8_x2"><div>uint8x8x2_t <b><b>vld1_u8_x2</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u8_x2" type="checkbox"><label for="vld1q_u8_x2"><div>uint8x16x2_t <b><b>vld1q_u8_x2</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u16_x2" type="checkbox"><label for="vld1_u16_x2"><div>uint16x4x2_t <b><b>vld1_u16_x2</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u16_x2" type="checkbox"><label for="vld1q_u16_x2"><div>uint16x8x2_t <b><b>vld1q_u16_x2</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u32_x2" type="checkbox"><label for="vld1_u32_x2"><div>uint32x2x2_t <b><b>vld1_u32_x2</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u32_x2" type="checkbox"><label for="vld1q_u32_x2"><div>uint32x4x2_t <b><b>vld1q_u32_x2</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f16_x2" type="checkbox"><label for="vld1_f16_x2"><div>float16x4x2_t <b><b>vld1_f16_x2</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f16_x2" type="checkbox"><label for="vld1q_f16_x2"><div>float16x8x2_t <b><b>vld1q_f16_x2</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f32_x2" type="checkbox"><label for="vld1_f32_x2"><div>float32x2x2_t <b><b>vld1_f32_x2</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f32_x2" type="checkbox"><label for="vld1q_f32_x2"><div>float32x4x2_t <b><b>vld1q_f32_x2</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p8_x2" type="checkbox"><label for="vld1_p8_x2"><div>poly8x8x2_t <b><b>vld1_p8_x2</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p8_x2" type="checkbox"><label for="vld1q_p8_x2"><div>poly8x16x2_t <b><b>vld1q_p8_x2</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p16_x2" type="checkbox"><label for="vld1_p16_x2"><div>poly16x4x2_t <b><b>vld1_p16_x2</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p16_x2" type="checkbox"><label for="vld1q_p16_x2"><div>poly16x8x2_t <b><b>vld1q_p16_x2</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s64_x2" type="checkbox"><label for="vld1_s64_x2"><div>int64x1x2_t <b><b>vld1_s64_x2</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u64_x2" type="checkbox"><label for="vld1_u64_x2"><div>uint64x1x2_t <b><b>vld1_u64_x2</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p64_x2" type="checkbox"><label for="vld1_p64_x2"><div>poly64x1x2_t <b><b>vld1_p64_x2</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s64_x2" type="checkbox"><label for="vld1q_s64_x2"><div>int64x2x2_t <b><b>vld1q_s64_x2</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u64_x2" type="checkbox"><label for="vld1q_u64_x2"><div>uint64x2x2_t <b><b>vld1q_u64_x2</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p64_x2" type="checkbox"><label for="vld1q_p64_x2"><div>poly64x2x2_t <b><b>vld1q_p64_x2</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f64_x2" type="checkbox"><label for="vld1_f64_x2"><div>float64x1x2_t <b><b>vld1_f64_x2</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f64_x2" type="checkbox"><label for="vld1q_f64_x2"><div>float64x2x2_t <b><b>vld1q_f64_x2</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s8_x3" type="checkbox"><label for="vld1_s8_x3"><div>int8x8x3_t <b><b>vld1_s8_x3</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s8_x3" type="checkbox"><label for="vld1q_s8_x3"><div>int8x16x3_t <b><b>vld1q_s8_x3</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s16_x3" type="checkbox"><label for="vld1_s16_x3"><div>int16x4x3_t <b><b>vld1_s16_x3</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s16_x3" type="checkbox"><label for="vld1q_s16_x3"><div>int16x8x3_t <b><b>vld1q_s16_x3</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s32_x3" type="checkbox"><label for="vld1_s32_x3"><div>int32x2x3_t <b><b>vld1_s32_x3</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s32_x3" type="checkbox"><label for="vld1q_s32_x3"><div>int32x4x3_t <b><b>vld1q_s32_x3</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u8_x3" type="checkbox"><label for="vld1_u8_x3"><div>uint8x8x3_t <b><b>vld1_u8_x3</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u8_x3" type="checkbox"><label for="vld1q_u8_x3"><div>uint8x16x3_t <b><b>vld1q_u8_x3</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u16_x3" type="checkbox"><label for="vld1_u16_x3"><div>uint16x4x3_t <b><b>vld1_u16_x3</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u16_x3" type="checkbox"><label for="vld1q_u16_x3"><div>uint16x8x3_t <b><b>vld1q_u16_x3</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u32_x3" type="checkbox"><label for="vld1_u32_x3"><div>uint32x2x3_t <b><b>vld1_u32_x3</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u32_x3" type="checkbox"><label for="vld1q_u32_x3"><div>uint32x4x3_t <b><b>vld1q_u32_x3</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f16_x3" type="checkbox"><label for="vld1_f16_x3"><div>float16x4x3_t <b><b>vld1_f16_x3</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f16_x3" type="checkbox"><label for="vld1q_f16_x3"><div>float16x8x3_t <b><b>vld1q_f16_x3</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f32_x3" type="checkbox"><label for="vld1_f32_x3"><div>float32x2x3_t <b><b>vld1_f32_x3</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f32_x3" type="checkbox"><label for="vld1q_f32_x3"><div>float32x4x3_t <b><b>vld1q_f32_x3</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p8_x3" type="checkbox"><label for="vld1_p8_x3"><div>poly8x8x3_t <b><b>vld1_p8_x3</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p8_x3" type="checkbox"><label for="vld1q_p8_x3"><div>poly8x16x3_t <b><b>vld1q_p8_x3</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p16_x3" type="checkbox"><label for="vld1_p16_x3"><div>poly16x4x3_t <b><b>vld1_p16_x3</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p16_x3" type="checkbox"><label for="vld1q_p16_x3"><div>poly16x8x3_t <b><b>vld1q_p16_x3</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s64_x3" type="checkbox"><label for="vld1_s64_x3"><div>int64x1x3_t <b><b>vld1_s64_x3</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u64_x3" type="checkbox"><label for="vld1_u64_x3"><div>uint64x1x3_t <b><b>vld1_u64_x3</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p64_x3" type="checkbox"><label for="vld1_p64_x3"><div>poly64x1x3_t <b><b>vld1_p64_x3</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s64_x3" type="checkbox"><label for="vld1q_s64_x3"><div>int64x2x3_t <b><b>vld1q_s64_x3</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u64_x3" type="checkbox"><label for="vld1q_u64_x3"><div>uint64x2x3_t <b><b>vld1q_u64_x3</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p64_x3" type="checkbox"><label for="vld1q_p64_x3"><div>poly64x2x3_t <b><b>vld1q_p64_x3</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f64_x3" type="checkbox"><label for="vld1_f64_x3"><div>float64x1x3_t <b><b>vld1_f64_x3</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f64_x3" type="checkbox"><label for="vld1q_f64_x3"><div>float64x2x3_t <b><b>vld1q_f64_x3</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s8_x4" type="checkbox"><label for="vld1_s8_x4"><div>int8x8x4_t <b><b>vld1_s8_x4</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s8_x4" type="checkbox"><label for="vld1q_s8_x4"><div>int8x16x4_t <b><b>vld1q_s8_x4</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s16_x4" type="checkbox"><label for="vld1_s16_x4"><div>int16x4x4_t <b><b>vld1_s16_x4</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s16_x4" type="checkbox"><label for="vld1q_s16_x4"><div>int16x8x4_t <b><b>vld1q_s16_x4</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s32_x4" type="checkbox"><label for="vld1_s32_x4"><div>int32x2x4_t <b><b>vld1_s32_x4</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s32_x4" type="checkbox"><label for="vld1q_s32_x4"><div>int32x4x4_t <b><b>vld1q_s32_x4</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u8_x4" type="checkbox"><label for="vld1_u8_x4"><div>uint8x8x4_t <b><b>vld1_u8_x4</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u8_x4" type="checkbox"><label for="vld1q_u8_x4"><div>uint8x16x4_t <b><b>vld1q_u8_x4</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u16_x4" type="checkbox"><label for="vld1_u16_x4"><div>uint16x4x4_t <b><b>vld1_u16_x4</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u16_x4" type="checkbox"><label for="vld1q_u16_x4"><div>uint16x8x4_t <b><b>vld1q_u16_x4</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u32_x4" type="checkbox"><label for="vld1_u32_x4"><div>uint32x2x4_t <b><b>vld1_u32_x4</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u32_x4" type="checkbox"><label for="vld1q_u32_x4"><div>uint32x4x4_t <b><b>vld1q_u32_x4</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f16_x4" type="checkbox"><label for="vld1_f16_x4"><div>float16x4x4_t <b><b>vld1_f16_x4</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f16_x4" type="checkbox"><label for="vld1q_f16_x4"><div>float16x8x4_t <b><b>vld1q_f16_x4</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f32_x4" type="checkbox"><label for="vld1_f32_x4"><div>float32x2x4_t <b><b>vld1_f32_x4</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f32_x4" type="checkbox"><label for="vld1q_f32_x4"><div>float32x4x4_t <b><b>vld1q_f32_x4</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p8_x4" type="checkbox"><label for="vld1_p8_x4"><div>poly8x8x4_t <b><b>vld1_p8_x4</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p8_x4" type="checkbox"><label for="vld1q_p8_x4"><div>poly8x16x4_t <b><b>vld1q_p8_x4</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p16_x4" type="checkbox"><label for="vld1_p16_x4"><div>poly16x4x4_t <b><b>vld1_p16_x4</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p16_x4" type="checkbox"><label for="vld1q_p16_x4"><div>poly16x8x4_t <b><b>vld1q_p16_x4</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s64_x4" type="checkbox"><label for="vld1_s64_x4"><div>int64x1x4_t <b><b>vld1_s64_x4</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u64_x4" type="checkbox"><label for="vld1_u64_x4"><div>uint64x1x4_t <b><b>vld1_u64_x4</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p64_x4" type="checkbox"><label for="vld1_p64_x4"><div>poly64x1x4_t <b><b>vld1_p64_x4</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s64_x4" type="checkbox"><label for="vld1q_s64_x4"><div>int64x2x4_t <b><b>vld1q_s64_x4</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u64_x4" type="checkbox"><label for="vld1q_u64_x4"><div>uint64x2x4_t <b><b>vld1q_u64_x4</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p64_x4" type="checkbox"><label for="vld1q_p64_x4"><div>poly64x2x4_t <b><b>vld1q_p64_x4</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f64_x4" type="checkbox"><label for="vld1_f64_x4"><div>float64x1x4_t <b><b>vld1_f64_x4</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f64_x4" type="checkbox"><label for="vld1q_f64_x4"><div>float64x2x4_t <b><b>vld1q_f64_x4</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_s8" type="checkbox"><label for="vpadd_s8"><div>int8x8_t <b><b>vpadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_s16" type="checkbox"><label for="vpadd_s16"><div>int16x4_t <b><b>vpadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_s32" type="checkbox"><label for="vpadd_s32"><div>int32x2_t <b><b>vpadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_u8" type="checkbox"><label for="vpadd_u8"><div>uint8x8_t <b><b>vpadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_u16" type="checkbox"><label for="vpadd_u16"><div>uint16x4_t <b><b>vpadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_u32" type="checkbox"><label for="vpadd_u32"><div>uint32x2_t <b><b>vpadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_f32" type="checkbox"><label for="vpadd_f32"><div>float32x2_t <b><b>vpadd_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_s8" type="checkbox"><label for="vpaddq_s8"><div>int8x16_t <b><b>vpaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_s16" type="checkbox"><label for="vpaddq_s16"><div>int16x8_t <b><b>vpaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_s32" type="checkbox"><label for="vpaddq_s32"><div>int32x4_t <b><b>vpaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_s64" type="checkbox"><label for="vpaddq_s64"><div>int64x2_t <b><b>vpaddq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_u8" type="checkbox"><label for="vpaddq_u8"><div>uint8x16_t <b><b>vpaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_u16" type="checkbox"><label for="vpaddq_u16"><div>uint16x8_t <b><b>vpaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_u32" type="checkbox"><label for="vpaddq_u32"><div>uint32x4_t <b><b>vpaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_u64" type="checkbox"><label for="vpaddq_u64"><div>uint64x2_t <b><b>vpaddq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_f32" type="checkbox"><label for="vpaddq_f32"><div>float32x4_t <b><b>vpaddq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_f64" type="checkbox"><label for="vpaddq_f64"><div>float64x2_t <b><b>vpaddq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_s8" type="checkbox"><label for="vpaddl_s8"><div>int16x4_t <b><b>vpaddl_s8</b></b> (int8x8_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.4H,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_s8" type="checkbox"><label for="vpaddlq_s8"><div>int16x8_t <b><b>vpaddlq_s8</b></b> (int8x16_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.8H,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_s16" type="checkbox"><label for="vpaddl_s16"><div>int32x2_t <b><b>vpaddl_s16</b></b> (int16x4_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.2S,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_s16" type="checkbox"><label for="vpaddlq_s16"><div>int32x4_t <b><b>vpaddlq_s16</b></b> (int16x8_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.4S,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_s32" type="checkbox"><label for="vpaddl_s32"><div>int64x1_t <b><b>vpaddl_s32</b></b> (int32x2_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.1D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_s32" type="checkbox"><label for="vpaddlq_s32"><div>int64x2_t <b><b>vpaddlq_s32</b></b> (int32x4_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.2D,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_u8" type="checkbox"><label for="vpaddl_u8"><div>uint16x4_t <b><b>vpaddl_u8</b></b> (uint8x8_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.4H,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_u8" type="checkbox"><label for="vpaddlq_u8"><div>uint16x8_t <b><b>vpaddlq_u8</b></b> (uint8x16_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.8H,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_u16" type="checkbox"><label for="vpaddl_u16"><div>uint32x2_t <b><b>vpaddl_u16</b></b> (uint16x4_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.2S,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_u16" type="checkbox"><label for="vpaddlq_u16"><div>uint32x4_t <b><b>vpaddlq_u16</b></b> (uint16x8_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.4S,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_u32" type="checkbox"><label for="vpaddl_u32"><div>uint64x1_t <b><b>vpaddl_u32</b></b> (uint32x2_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.1D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_u32" type="checkbox"><label for="vpaddlq_u32"><div>uint64x2_t <b><b>vpaddlq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.2D,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_s8" type="checkbox"><label for="vpadal_s8"><div>int16x4_t <b><b>vpadal_s8</b></b> (int16x4_t a, int8x8_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.4H,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_s8" type="checkbox"><label for="vpadalq_s8"><div>int16x8_t <b><b>vpadalq_s8</b></b> (int16x8_t a, int8x16_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.8H,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_s16" type="checkbox"><label for="vpadal_s16"><div>int32x2_t <b><b>vpadal_s16</b></b> (int32x2_t a, int16x4_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.2S,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_s16" type="checkbox"><label for="vpadalq_s16"><div>int32x4_t <b><b>vpadalq_s16</b></b> (int32x4_t a, int16x8_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.4S,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_s32" type="checkbox"><label for="vpadal_s32"><div>int64x1_t <b><b>vpadal_s32</b></b> (int64x1_t a, int32x2_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.1D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D <br />
+b &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_s32" type="checkbox"><label for="vpadalq_s32"><div>int64x2_t <b><b>vpadalq_s32</b></b> (int64x2_t a, int32x4_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.2D,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_u8" type="checkbox"><label for="vpadal_u8"><div>uint16x4_t <b><b>vpadal_u8</b></b> (uint16x4_t a, uint8x8_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.4H,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_u8" type="checkbox"><label for="vpadalq_u8"><div>uint16x8_t <b><b>vpadalq_u8</b></b> (uint16x8_t a, uint8x16_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.8H,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_u16" type="checkbox"><label for="vpadal_u16"><div>uint32x2_t <b><b>vpadal_u16</b></b> (uint32x2_t a, uint16x4_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.2S,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_u16" type="checkbox"><label for="vpadalq_u16"><div>uint32x4_t <b><b>vpadalq_u16</b></b> (uint32x4_t a, uint16x8_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.4S,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_u32" type="checkbox"><label for="vpadal_u32"><div>uint64x1_t <b><b>vpadal_u32</b></b> (uint64x1_t a, uint32x2_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.1D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D <br />
+b &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_u32" type="checkbox"><label for="vpadalq_u32"><div>uint64x2_t <b><b>vpadalq_u32</b></b> (uint64x2_t a, uint32x4_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.2D,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_s8" type="checkbox"><label for="vpmax_s8"><div>int8x8_t <b><b>vpmax_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_s16" type="checkbox"><label for="vpmax_s16"><div>int16x4_t <b><b>vpmax_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_s32" type="checkbox"><label for="vpmax_s32"><div>int32x2_t <b><b>vpmax_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_u8" type="checkbox"><label for="vpmax_u8"><div>uint8x8_t <b><b>vpmax_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_u16" type="checkbox"><label for="vpmax_u16"><div>uint16x4_t <b><b>vpmax_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_u32" type="checkbox"><label for="vpmax_u32"><div>uint32x2_t <b><b>vpmax_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_f32" type="checkbox"><label for="vpmax_f32"><div>float32x2_t <b><b>vpmax_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_s8" type="checkbox"><label for="vpmaxq_s8"><div>int8x16_t <b><b>vpmaxq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_s16" type="checkbox"><label for="vpmaxq_s16"><div>int16x8_t <b><b>vpmaxq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_s32" type="checkbox"><label for="vpmaxq_s32"><div>int32x4_t <b><b>vpmaxq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_u8" type="checkbox"><label for="vpmaxq_u8"><div>uint8x16_t <b><b>vpmaxq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_u16" type="checkbox"><label for="vpmaxq_u16"><div>uint16x8_t <b><b>vpmaxq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_u32" type="checkbox"><label for="vpmaxq_u32"><div>uint32x4_t <b><b>vpmaxq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_f32" type="checkbox"><label for="vpmaxq_f32"><div>float32x4_t <b><b>vpmaxq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_f64" type="checkbox"><label for="vpmaxq_f64"><div>float64x2_t <b><b>vpmaxq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_s8" type="checkbox"><label for="vpmin_s8"><div>int8x8_t <b><b>vpmin_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_s16" type="checkbox"><label for="vpmin_s16"><div>int16x4_t <b><b>vpmin_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_s32" type="checkbox"><label for="vpmin_s32"><div>int32x2_t <b><b>vpmin_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_u8" type="checkbox"><label for="vpmin_u8"><div>uint8x8_t <b><b>vpmin_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_u16" type="checkbox"><label for="vpmin_u16"><div>uint16x4_t <b><b>vpmin_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_u32" type="checkbox"><label for="vpmin_u32"><div>uint32x2_t <b><b>vpmin_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_f32" type="checkbox"><label for="vpmin_f32"><div>float32x2_t <b><b>vpmin_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_s8" type="checkbox"><label for="vpminq_s8"><div>int8x16_t <b><b>vpminq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_s16" type="checkbox"><label for="vpminq_s16"><div>int16x8_t <b><b>vpminq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_s32" type="checkbox"><label for="vpminq_s32"><div>int32x4_t <b><b>vpminq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_u8" type="checkbox"><label for="vpminq_u8"><div>uint8x16_t <b><b>vpminq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_u16" type="checkbox"><label for="vpminq_u16"><div>uint16x8_t <b><b>vpminq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_u32" type="checkbox"><label for="vpminq_u32"><div>uint32x4_t <b><b>vpminq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_f32" type="checkbox"><label for="vpminq_f32"><div>float32x4_t <b><b>vpminq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_f64" type="checkbox"><label for="vpminq_f64"><div>float64x2_t <b><b>vpminq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnm_f32" type="checkbox"><label for="vpmaxnm_f32"><div>float32x2_t <b><b>vpmaxnm_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnmq_f32" type="checkbox"><label for="vpmaxnmq_f32"><div>float32x4_t <b><b>vpmaxnmq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnmq_f64" type="checkbox"><label for="vpmaxnmq_f64"><div>float64x2_t <b><b>vpmaxnmq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnm_f32" type="checkbox"><label for="vpminnm_f32"><div>float32x2_t <b><b>vpminnm_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnmq_f32" type="checkbox"><label for="vpminnmq_f32"><div>float32x4_t <b><b>vpminnmq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnmq_f64" type="checkbox"><label for="vpminnmq_f64"><div>float64x2_t <b><b>vpminnmq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddd_s64" type="checkbox"><label for="vpaddd_s64"><div>int64_t <b><b>vpaddd_s64</b></b> (int64x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddd_u64" type="checkbox"><label for="vpaddd_u64"><div>uint64_t <b><b>vpaddd_u64</b></b> (uint64x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpadds_f32" type="checkbox"><label for="vpadds_f32"><div>float32_t <b><b>vpadds_f32</b></b> (float32x2_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddd_f64" type="checkbox"><label for="vpaddd_f64"><div>float64_t <b><b>vpaddd_f64</b></b> (float64x2_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxs_f32" type="checkbox"><label for="vpmaxs_f32"><div>float32_t <b><b>vpmaxs_f32</b></b> (float32x2_t a)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxqd_f64" type="checkbox"><label for="vpmaxqd_f64"><div>float64_t <b><b>vpmaxqd_f64</b></b> (float64x2_t a)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmins_f32" type="checkbox"><label for="vpmins_f32"><div>float32_t <b><b>vpmins_f32</b></b> (float32x2_t a)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminqd_f64" type="checkbox"><label for="vpminqd_f64"><div>float64_t <b><b>vpminqd_f64</b></b> (float64x2_t a)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnms_f32" type="checkbox"><label for="vpmaxnms_f32"><div>float32_t <b><b>vpmaxnms_f32</b></b> (float32x2_t a)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnmqd_f64" type="checkbox"><label for="vpmaxnmqd_f64"><div>float64_t <b><b>vpmaxnmqd_f64</b></b> (float64x2_t a)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnms_f32" type="checkbox"><label for="vpminnms_f32"><div>float32_t <b><b>vpminnms_f32</b></b> (float32x2_t a)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnmqd_f64" type="checkbox"><label for="vpminnmqd_f64"><div>float64_t <b><b>vpminnmqd_f64</b></b> (float64x2_t a)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_s8" type="checkbox"><label for="vaddv_s8"><div>int8_t <b><b>vaddv_s8</b></b> (int8x8_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Bd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_s8" type="checkbox"><label for="vaddvq_s8"><div>int8_t <b><b>vaddvq_s8</b></b> (int8x16_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Bd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_s16" type="checkbox"><label for="vaddv_s16"><div>int16_t <b><b>vaddv_s16</b></b> (int16x4_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Hd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_s16" type="checkbox"><label for="vaddvq_s16"><div>int16_t <b><b>vaddvq_s16</b></b> (int16x8_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Hd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_s32" type="checkbox"><label for="vaddv_s32"><div>int32_t <b><b>vaddv_s32</b></b> (int32x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a>  Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_s32" type="checkbox"><label for="vaddvq_s32"><div>int32_t <b><b>vaddvq_s32</b></b> (int32x4_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_s64" type="checkbox"><label for="vaddvq_s64"><div>int64_t <b><b>vaddvq_s64</b></b> (int64x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_u8" type="checkbox"><label for="vaddv_u8"><div>uint8_t <b><b>vaddv_u8</b></b> (uint8x8_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Bd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_u8" type="checkbox"><label for="vaddvq_u8"><div>uint8_t <b><b>vaddvq_u8</b></b> (uint8x16_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Bd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_u16" type="checkbox"><label for="vaddv_u16"><div>uint16_t <b><b>vaddv_u16</b></b> (uint16x4_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Hd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_u16" type="checkbox"><label for="vaddvq_u16"><div>uint16_t <b><b>vaddvq_u16</b></b> (uint16x8_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Hd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_u32" type="checkbox"><label for="vaddv_u32"><div>uint32_t <b><b>vaddv_u32</b></b> (uint32x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a>  Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_u32" type="checkbox"><label for="vaddvq_u32"><div>uint32_t <b><b>vaddvq_u32</b></b> (uint32x4_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_u64" type="checkbox"><label for="vaddvq_u64"><div>uint64_t <b><b>vaddvq_u64</b></b> (uint64x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_f32" type="checkbox"><label for="vaddv_f32"><div>float32_t <b><b>vaddv_f32</b></b> (float32x2_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_f32" type="checkbox"><label for="vaddvq_f32"><div>float32_t <b><b>vaddvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Vt.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Sd,Vt.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+a &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_f64" type="checkbox"><label for="vaddvq_f64"><div>float64_t <b><b>vaddvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_s8" type="checkbox"><label for="vaddlv_s8"><div>int16_t <b><b>vaddlv_s8</b></b> (int8x8_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Hd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_s8" type="checkbox"><label for="vaddlvq_s8"><div>int16_t <b><b>vaddlvq_s8</b></b> (int8x16_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Hd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_s16" type="checkbox"><label for="vaddlv_s16"><div>int32_t <b><b>vaddlv_s16</b></b> (int16x4_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Sd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_s16" type="checkbox"><label for="vaddlvq_s16"><div>int32_t <b><b>vaddlvq_s16</b></b> (int16x8_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Sd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_s32" type="checkbox"><label for="vaddlv_s32"><div>int64_t <b><b>vaddlv_s32</b></b> (int32x2_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.1D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_s32" type="checkbox"><label for="vaddlvq_s32"><div>int64_t <b><b>vaddlvq_s32</b></b> (int32x4_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Dd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_u8" type="checkbox"><label for="vaddlv_u8"><div>uint16_t <b><b>vaddlv_u8</b></b> (uint8x8_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Hd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_u8" type="checkbox"><label for="vaddlvq_u8"><div>uint16_t <b><b>vaddlvq_u8</b></b> (uint8x16_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Hd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_u16" type="checkbox"><label for="vaddlv_u16"><div>uint32_t <b><b>vaddlv_u16</b></b> (uint16x4_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Sd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_u16" type="checkbox"><label for="vaddlvq_u16"><div>uint32_t <b><b>vaddlvq_u16</b></b> (uint16x8_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Sd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_u32" type="checkbox"><label for="vaddlv_u32"><div>uint64_t <b><b>vaddlv_u32</b></b> (uint32x2_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.1D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_u32" type="checkbox"><label for="vaddlvq_u32"><div>uint64_t <b><b>vaddlvq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Dd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_s8" type="checkbox"><label for="vmaxv_s8"><div>int8_t <b><b>vmaxv_s8</b></b> (int8x8_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Bd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_s8" type="checkbox"><label for="vmaxvq_s8"><div>int8_t <b><b>vmaxvq_s8</b></b> (int8x16_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Bd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_s16" type="checkbox"><label for="vmaxv_s16"><div>int16_t <b><b>vmaxv_s16</b></b> (int16x4_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Hd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_s16" type="checkbox"><label for="vmaxvq_s16"><div>int16_t <b><b>vmaxvq_s16</b></b> (int16x8_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Hd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_s32" type="checkbox"><label for="vmaxv_s32"><div>int32_t <b><b>vmaxv_s32</b></b> (int32x2_t a)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a>  Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_s32" type="checkbox"><label for="vmaxvq_s32"><div>int32_t <b><b>vmaxvq_s32</b></b> (int32x4_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_u8" type="checkbox"><label for="vmaxv_u8"><div>uint8_t <b><b>vmaxv_u8</b></b> (uint8x8_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Bd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_u8" type="checkbox"><label for="vmaxvq_u8"><div>uint8_t <b><b>vmaxvq_u8</b></b> (uint8x16_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Bd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_u16" type="checkbox"><label for="vmaxv_u16"><div>uint16_t <b><b>vmaxv_u16</b></b> (uint16x4_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Hd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_u16" type="checkbox"><label for="vmaxvq_u16"><div>uint16_t <b><b>vmaxvq_u16</b></b> (uint16x8_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Hd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_u32" type="checkbox"><label for="vmaxv_u32"><div>uint32_t <b><b>vmaxv_u32</b></b> (uint32x2_t a)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a>  Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_u32" type="checkbox"><label for="vmaxvq_u32"><div>uint32_t <b><b>vmaxvq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_f32" type="checkbox"><label for="vmaxv_f32"><div>float32_t <b><b>vmaxv_f32</b></b> (float32x2_t a)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_f32" type="checkbox"><label for="vmaxvq_f32"><div>float32_t <b><b>vmaxvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxv-floating-point-maximum-across-vector">FMAXV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_FMAX" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_FMAX</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_f64" type="checkbox"><label for="vmaxvq_f64"><div>float64_t <b><b>vmaxvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_s8" type="checkbox"><label for="vminv_s8"><div>int8_t <b><b>vminv_s8</b></b> (int8x8_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Bd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_s8" type="checkbox"><label for="vminvq_s8"><div>int8_t <b><b>vminvq_s8</b></b> (int8x16_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Bd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_s16" type="checkbox"><label for="vminv_s16"><div>int16_t <b><b>vminv_s16</b></b> (int16x4_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Hd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_s16" type="checkbox"><label for="vminvq_s16"><div>int16_t <b><b>vminvq_s16</b></b> (int16x8_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Hd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_s32" type="checkbox"><label for="vminv_s32"><div>int32_t <b><b>vminv_s32</b></b> (int32x2_t a)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a>  Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_s32" type="checkbox"><label for="vminvq_s32"><div>int32_t <b><b>vminvq_s32</b></b> (int32x4_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_u8" type="checkbox"><label for="vminv_u8"><div>uint8_t <b><b>vminv_u8</b></b> (uint8x8_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Bd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_u8" type="checkbox"><label for="vminvq_u8"><div>uint8_t <b><b>vminvq_u8</b></b> (uint8x16_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Bd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_u16" type="checkbox"><label for="vminv_u16"><div>uint16_t <b><b>vminv_u16</b></b> (uint16x4_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Hd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_u16" type="checkbox"><label for="vminvq_u16"><div>uint16_t <b><b>vminvq_u16</b></b> (uint16x8_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Hd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_u32" type="checkbox"><label for="vminv_u32"><div>uint32_t <b><b>vminv_u32</b></b> (uint32x2_t a)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a>  Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_u32" type="checkbox"><label for="vminvq_u32"><div>uint32_t <b><b>vminvq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_f32" type="checkbox"><label for="vminv_f32"><div>float32_t <b><b>vminv_f32</b></b> (float32x2_t a)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_f32" type="checkbox"><label for="vminvq_f32"><div>float32_t <b><b>vminvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminv-floating-point-minimum-across-vector">FMINV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_FMIN" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_FMIN</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_f64" type="checkbox"><label for="vminvq_f64"><div>float64_t <b><b>vminvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmv_f32" type="checkbox"><label for="vmaxnmv_f32"><div>float32_t <b><b>vmaxnmv_f32</b></b> (float32x2_t a)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmvq_f32" type="checkbox"><label for="vmaxnmvq_f32"><div>float32_t <b><b>vmaxnmvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point maximum number across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmv-floating-point-maximum-number-across-vector">FMAXNMV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_FMAXNUM" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_FMAXNUM</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmvq_f64" type="checkbox"><label for="vmaxnmvq_f64"><div>float64_t <b><b>vmaxnmvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a>  Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnmv_f32" type="checkbox"><label for="vminnmv_f32"><div>float32_t <b><b>vminnmv_f32</b></b> (float32x2_t a)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a>  Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnmvq_f32" type="checkbox"><label for="vminnmvq_f32"><div>float32_t <b><b>vminnmvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point minimum number across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmv-floating-point-minimum-number-across-vector">FMINNMV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_FMINNUM" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_FMINNUM</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnmvq_f64" type="checkbox"><label for="vminnmvq_f64"><div>float64_t <b><b>vminnmvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a>  Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vext_s8" type="checkbox"><label for="vext_s8"><div>int8x8_t <b><b>vext_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_s8" type="checkbox"><label for="vextq_s8"><div>int8x16_t <b><b>vextq_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_s16" type="checkbox"><label for="vext_s16"><div>int16x4_t <b><b>vext_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<1)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_s16" type="checkbox"><label for="vextq_s16"><div>int16x8_t <b><b>vextq_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<1)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_s32" type="checkbox"><label for="vext_s32"><div>int32x2_t <b><b>vext_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<2)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_s32" type="checkbox"><label for="vextq_s32"><div>int32x4_t <b><b>vextq_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<2)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_s64" type="checkbox"><label for="vext_s64"><div>int64x1_t <b><b>vext_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_s64" type="checkbox"><label for="vextq_s64"><div>int64x2_t <b><b>vextq_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_u8" type="checkbox"><label for="vext_u8"><div>uint8x8_t <b><b>vext_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_u8" type="checkbox"><label for="vextq_u8"><div>uint8x16_t <b><b>vextq_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_u16" type="checkbox"><label for="vext_u16"><div>uint16x4_t <b><b>vext_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<1)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_u16" type="checkbox"><label for="vextq_u16"><div>uint16x8_t <b><b>vextq_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<1)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_u32" type="checkbox"><label for="vext_u32"><div>uint32x2_t <b><b>vext_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<2)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_u32" type="checkbox"><label for="vextq_u32"><div>uint32x4_t <b><b>vextq_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<2)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_u64" type="checkbox"><label for="vext_u64"><div>uint64x1_t <b><b>vext_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_u64" type="checkbox"><label for="vextq_u64"><div>uint64x2_t <b><b>vextq_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_p64" type="checkbox"><label for="vext_p64"><div>poly64x1_t <b><b>vext_p64</b></b> (poly64x1_t a, poly64x1_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_p64" type="checkbox"><label for="vextq_p64"><div>poly64x2_t <b><b>vextq_p64</b></b> (poly64x2_t a, poly64x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_f32" type="checkbox"><label for="vext_f32"><div>float32x2_t <b><b>vext_f32</b></b> (float32x2_t a, float32x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<2)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_f32" type="checkbox"><label for="vextq_f32"><div>float32x4_t <b><b>vextq_f32</b></b> (float32x4_t a, float32x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<2)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_f64" type="checkbox"><label for="vext_f64"><div>float64x1_t <b><b>vext_f64</b></b> (float64x1_t a, float64x1_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vextq_f64" type="checkbox"><label for="vextq_f64"><div>float64x2_t <b><b>vextq_f64</b></b> (float64x2_t a, float64x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vext_p8" type="checkbox"><label for="vext_p8"><div>poly8x8_t <b><b>vext_p8</b></b> (poly8x8_t a, poly8x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_p8" type="checkbox"><label for="vextq_p8"><div>poly8x16_t <b><b>vextq_p8</b></b> (poly8x16_t a, poly8x16_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_p16" type="checkbox"><label for="vext_p16"><div>poly16x4_t <b><b>vext_p16</b></b> (poly16x4_t a, poly16x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<1)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_p16" type="checkbox"><label for="vextq_p16"><div>poly16x8_t <b><b>vextq_p16</b></b> (poly16x8_t a, poly16x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<1)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_s8" type="checkbox"><label for="vrev64_s8"><div>int8x8_t <b><b>vrev64_s8</b></b> (int8x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_s8" type="checkbox"><label for="vrev64q_s8"><div>int8x16_t <b><b>vrev64q_s8</b></b> (int8x16_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_s16" type="checkbox"><label for="vrev64_s16"><div>int16x4_t <b><b>vrev64_s16</b></b> (int16x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_s16" type="checkbox"><label for="vrev64q_s16"><div>int16x8_t <b><b>vrev64q_s16</b></b> (int16x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_s32" type="checkbox"><label for="vrev64_s32"><div>int32x2_t <b><b>vrev64_s32</b></b> (int32x2_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_s32" type="checkbox"><label for="vrev64q_s32"><div>int32x4_t <b><b>vrev64q_s32</b></b> (int32x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_u8" type="checkbox"><label for="vrev64_u8"><div>uint8x8_t <b><b>vrev64_u8</b></b> (uint8x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_u8" type="checkbox"><label for="vrev64q_u8"><div>uint8x16_t <b><b>vrev64q_u8</b></b> (uint8x16_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_u16" type="checkbox"><label for="vrev64_u16"><div>uint16x4_t <b><b>vrev64_u16</b></b> (uint16x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_u16" type="checkbox"><label for="vrev64q_u16"><div>uint16x8_t <b><b>vrev64q_u16</b></b> (uint16x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_u32" type="checkbox"><label for="vrev64_u32"><div>uint32x2_t <b><b>vrev64_u32</b></b> (uint32x2_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_u32" type="checkbox"><label for="vrev64q_u32"><div>uint32x4_t <b><b>vrev64q_u32</b></b> (uint32x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_f32" type="checkbox"><label for="vrev64_f32"><div>float32x2_t <b><b>vrev64_f32</b></b> (float32x2_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_f32" type="checkbox"><label for="vrev64q_f32"><div>float32x4_t <b><b>vrev64q_f32</b></b> (float32x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_p8" type="checkbox"><label for="vrev64_p8"><div>poly8x8_t <b><b>vrev64_p8</b></b> (poly8x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_p8" type="checkbox"><label for="vrev64q_p8"><div>poly8x16_t <b><b>vrev64q_p8</b></b> (poly8x16_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_p16" type="checkbox"><label for="vrev64_p16"><div>poly16x4_t <b><b>vrev64_p16</b></b> (poly16x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_p16" type="checkbox"><label for="vrev64q_p16"><div>poly16x8_t <b><b>vrev64q_p16</b></b> (poly16x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_s8" type="checkbox"><label for="vrev32_s8"><div>int8x8_t <b><b>vrev32_s8</b></b> (int8x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_s8" type="checkbox"><label for="vrev32q_s8"><div>int8x16_t <b><b>vrev32q_s8</b></b> (int8x16_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_s16" type="checkbox"><label for="vrev32_s16"><div>int16x4_t <b><b>vrev32_s16</b></b> (int16x4_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_s16" type="checkbox"><label for="vrev32q_s16"><div>int16x8_t <b><b>vrev32q_s16</b></b> (int16x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_u8" type="checkbox"><label for="vrev32_u8"><div>uint8x8_t <b><b>vrev32_u8</b></b> (uint8x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_u8" type="checkbox"><label for="vrev32q_u8"><div>uint8x16_t <b><b>vrev32q_u8</b></b> (uint8x16_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_u16" type="checkbox"><label for="vrev32_u16"><div>uint16x4_t <b><b>vrev32_u16</b></b> (uint16x4_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_u16" type="checkbox"><label for="vrev32q_u16"><div>uint16x8_t <b><b>vrev32q_u16</b></b> (uint16x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_p8" type="checkbox"><label for="vrev32_p8"><div>poly8x8_t <b><b>vrev32_p8</b></b> (poly8x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_p8" type="checkbox"><label for="vrev32q_p8"><div>poly8x16_t <b><b>vrev32q_p8</b></b> (poly8x16_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_p16" type="checkbox"><label for="vrev32_p16"><div>poly16x4_t <b><b>vrev32_p16</b></b> (poly16x4_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_p16" type="checkbox"><label for="vrev32q_p16"><div>poly16x8_t <b><b>vrev32q_p16</b></b> (poly16x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16_s8" type="checkbox"><label for="vrev16_s8"><div>int8x8_t <b><b>vrev16_s8</b></b> (int8x8_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16q_s8" type="checkbox"><label for="vrev16q_s8"><div>int8x16_t <b><b>vrev16q_s8</b></b> (int8x16_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16_u8" type="checkbox"><label for="vrev16_u8"><div>uint8x8_t <b><b>vrev16_u8</b></b> (uint8x8_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16q_u8" type="checkbox"><label for="vrev16q_u8"><div>uint8x16_t <b><b>vrev16q_u8</b></b> (uint8x16_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16_p8" type="checkbox"><label for="vrev16_p8"><div>poly8x8_t <b><b>vrev16_p8</b></b> (poly8x8_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16q_p8" type="checkbox"><label for="vrev16q_p8"><div>poly8x16_t <b><b>vrev16q_p8</b></b> (poly8x16_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_s8" type="checkbox"><label for="vzip1_s8"><div>int8x8_t <b><b>vzip1_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_s8" type="checkbox"><label for="vzip1q_s8"><div>int8x16_t <b><b>vzip1q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_s16" type="checkbox"><label for="vzip1_s16"><div>int16x4_t <b><b>vzip1_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_s16" type="checkbox"><label for="vzip1q_s16"><div>int16x8_t <b><b>vzip1q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_s32" type="checkbox"><label for="vzip1_s32"><div>int32x2_t <b><b>vzip1_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_s32" type="checkbox"><label for="vzip1q_s32"><div>int32x4_t <b><b>vzip1q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_s64" type="checkbox"><label for="vzip1q_s64"><div>int64x2_t <b><b>vzip1q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_u8" type="checkbox"><label for="vzip1_u8"><div>uint8x8_t <b><b>vzip1_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_u8" type="checkbox"><label for="vzip1q_u8"><div>uint8x16_t <b><b>vzip1q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_u16" type="checkbox"><label for="vzip1_u16"><div>uint16x4_t <b><b>vzip1_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_u16" type="checkbox"><label for="vzip1q_u16"><div>uint16x8_t <b><b>vzip1q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_u32" type="checkbox"><label for="vzip1_u32"><div>uint32x2_t <b><b>vzip1_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_u32" type="checkbox"><label for="vzip1q_u32"><div>uint32x4_t <b><b>vzip1q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_u64" type="checkbox"><label for="vzip1q_u64"><div>uint64x2_t <b><b>vzip1q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_p64" type="checkbox"><label for="vzip1q_p64"><div>poly64x2_t <b><b>vzip1q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_f32" type="checkbox"><label for="vzip1_f32"><div>float32x2_t <b><b>vzip1_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_f32" type="checkbox"><label for="vzip1q_f32"><div>float32x4_t <b><b>vzip1q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_f64" type="checkbox"><label for="vzip1q_f64"><div>float64x2_t <b><b>vzip1q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_p8" type="checkbox"><label for="vzip1_p8"><div>poly8x8_t <b><b>vzip1_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_p8" type="checkbox"><label for="vzip1q_p8"><div>poly8x16_t <b><b>vzip1q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_p16" type="checkbox"><label for="vzip1_p16"><div>poly16x4_t <b><b>vzip1_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_p16" type="checkbox"><label for="vzip1q_p16"><div>poly16x8_t <b><b>vzip1q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_s8" type="checkbox"><label for="vzip2_s8"><div>int8x8_t <b><b>vzip2_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_s8" type="checkbox"><label for="vzip2q_s8"><div>int8x16_t <b><b>vzip2q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_s16" type="checkbox"><label for="vzip2_s16"><div>int16x4_t <b><b>vzip2_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_s16" type="checkbox"><label for="vzip2q_s16"><div>int16x8_t <b><b>vzip2q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_s32" type="checkbox"><label for="vzip2_s32"><div>int32x2_t <b><b>vzip2_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_s32" type="checkbox"><label for="vzip2q_s32"><div>int32x4_t <b><b>vzip2q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_s64" type="checkbox"><label for="vzip2q_s64"><div>int64x2_t <b><b>vzip2q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_u8" type="checkbox"><label for="vzip2_u8"><div>uint8x8_t <b><b>vzip2_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_u8" type="checkbox"><label for="vzip2q_u8"><div>uint8x16_t <b><b>vzip2q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_u16" type="checkbox"><label for="vzip2_u16"><div>uint16x4_t <b><b>vzip2_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_u16" type="checkbox"><label for="vzip2q_u16"><div>uint16x8_t <b><b>vzip2q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_u32" type="checkbox"><label for="vzip2_u32"><div>uint32x2_t <b><b>vzip2_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_u32" type="checkbox"><label for="vzip2q_u32"><div>uint32x4_t <b><b>vzip2q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_u64" type="checkbox"><label for="vzip2q_u64"><div>uint64x2_t <b><b>vzip2q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_p64" type="checkbox"><label for="vzip2q_p64"><div>poly64x2_t <b><b>vzip2q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_f32" type="checkbox"><label for="vzip2_f32"><div>float32x2_t <b><b>vzip2_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_f32" type="checkbox"><label for="vzip2q_f32"><div>float32x4_t <b><b>vzip2q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_f64" type="checkbox"><label for="vzip2q_f64"><div>float64x2_t <b><b>vzip2q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_p8" type="checkbox"><label for="vzip2_p8"><div>poly8x8_t <b><b>vzip2_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_p8" type="checkbox"><label for="vzip2q_p8"><div>poly8x16_t <b><b>vzip2q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_p16" type="checkbox"><label for="vzip2_p16"><div>poly16x4_t <b><b>vzip2_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_p16" type="checkbox"><label for="vzip2q_p16"><div>poly16x8_t <b><b>vzip2q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_s8" type="checkbox"><label for="vuzp1_s8"><div>int8x8_t <b><b>vuzp1_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_s8" type="checkbox"><label for="vuzp1q_s8"><div>int8x16_t <b><b>vuzp1q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_s16" type="checkbox"><label for="vuzp1_s16"><div>int16x4_t <b><b>vuzp1_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_s16" type="checkbox"><label for="vuzp1q_s16"><div>int16x8_t <b><b>vuzp1q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_s32" type="checkbox"><label for="vuzp1_s32"><div>int32x2_t <b><b>vuzp1_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_s32" type="checkbox"><label for="vuzp1q_s32"><div>int32x4_t <b><b>vuzp1q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_s64" type="checkbox"><label for="vuzp1q_s64"><div>int64x2_t <b><b>vuzp1q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_u8" type="checkbox"><label for="vuzp1_u8"><div>uint8x8_t <b><b>vuzp1_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_u8" type="checkbox"><label for="vuzp1q_u8"><div>uint8x16_t <b><b>vuzp1q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_u16" type="checkbox"><label for="vuzp1_u16"><div>uint16x4_t <b><b>vuzp1_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_u16" type="checkbox"><label for="vuzp1q_u16"><div>uint16x8_t <b><b>vuzp1q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_u32" type="checkbox"><label for="vuzp1_u32"><div>uint32x2_t <b><b>vuzp1_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_u32" type="checkbox"><label for="vuzp1q_u32"><div>uint32x4_t <b><b>vuzp1q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_u64" type="checkbox"><label for="vuzp1q_u64"><div>uint64x2_t <b><b>vuzp1q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_p64" type="checkbox"><label for="vuzp1q_p64"><div>poly64x2_t <b><b>vuzp1q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_f32" type="checkbox"><label for="vuzp1_f32"><div>float32x2_t <b><b>vuzp1_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_f32" type="checkbox"><label for="vuzp1q_f32"><div>float32x4_t <b><b>vuzp1q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_f64" type="checkbox"><label for="vuzp1q_f64"><div>float64x2_t <b><b>vuzp1q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_p8" type="checkbox"><label for="vuzp1_p8"><div>poly8x8_t <b><b>vuzp1_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_p8" type="checkbox"><label for="vuzp1q_p8"><div>poly8x16_t <b><b>vuzp1q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_p16" type="checkbox"><label for="vuzp1_p16"><div>poly16x4_t <b><b>vuzp1_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_p16" type="checkbox"><label for="vuzp1q_p16"><div>poly16x8_t <b><b>vuzp1q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_s8" type="checkbox"><label for="vuzp2_s8"><div>int8x8_t <b><b>vuzp2_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_s8" type="checkbox"><label for="vuzp2q_s8"><div>int8x16_t <b><b>vuzp2q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_s16" type="checkbox"><label for="vuzp2_s16"><div>int16x4_t <b><b>vuzp2_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_s16" type="checkbox"><label for="vuzp2q_s16"><div>int16x8_t <b><b>vuzp2q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_s32" type="checkbox"><label for="vuzp2_s32"><div>int32x2_t <b><b>vuzp2_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_s32" type="checkbox"><label for="vuzp2q_s32"><div>int32x4_t <b><b>vuzp2q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_s64" type="checkbox"><label for="vuzp2q_s64"><div>int64x2_t <b><b>vuzp2q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_u8" type="checkbox"><label for="vuzp2_u8"><div>uint8x8_t <b><b>vuzp2_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_u8" type="checkbox"><label for="vuzp2q_u8"><div>uint8x16_t <b><b>vuzp2q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_u16" type="checkbox"><label for="vuzp2_u16"><div>uint16x4_t <b><b>vuzp2_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_u16" type="checkbox"><label for="vuzp2q_u16"><div>uint16x8_t <b><b>vuzp2q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_u32" type="checkbox"><label for="vuzp2_u32"><div>uint32x2_t <b><b>vuzp2_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_u32" type="checkbox"><label for="vuzp2q_u32"><div>uint32x4_t <b><b>vuzp2q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_u64" type="checkbox"><label for="vuzp2q_u64"><div>uint64x2_t <b><b>vuzp2q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_p64" type="checkbox"><label for="vuzp2q_p64"><div>poly64x2_t <b><b>vuzp2q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_f32" type="checkbox"><label for="vuzp2_f32"><div>float32x2_t <b><b>vuzp2_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_f32" type="checkbox"><label for="vuzp2q_f32"><div>float32x4_t <b><b>vuzp2q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_f64" type="checkbox"><label for="vuzp2q_f64"><div>float64x2_t <b><b>vuzp2q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_p8" type="checkbox"><label for="vuzp2_p8"><div>poly8x8_t <b><b>vuzp2_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_p8" type="checkbox"><label for="vuzp2q_p8"><div>poly8x16_t <b><b>vuzp2q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_p16" type="checkbox"><label for="vuzp2_p16"><div>poly16x4_t <b><b>vuzp2_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_p16" type="checkbox"><label for="vuzp2q_p16"><div>poly16x8_t <b><b>vuzp2q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_s8" type="checkbox"><label for="vtrn1_s8"><div>int8x8_t <b><b>vtrn1_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_s8" type="checkbox"><label for="vtrn1q_s8"><div>int8x16_t <b><b>vtrn1q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_s16" type="checkbox"><label for="vtrn1_s16"><div>int16x4_t <b><b>vtrn1_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_s16" type="checkbox"><label for="vtrn1q_s16"><div>int16x8_t <b><b>vtrn1q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_s32" type="checkbox"><label for="vtrn1_s32"><div>int32x2_t <b><b>vtrn1_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_s32" type="checkbox"><label for="vtrn1q_s32"><div>int32x4_t <b><b>vtrn1q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_s64" type="checkbox"><label for="vtrn1q_s64"><div>int64x2_t <b><b>vtrn1q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_u8" type="checkbox"><label for="vtrn1_u8"><div>uint8x8_t <b><b>vtrn1_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_u8" type="checkbox"><label for="vtrn1q_u8"><div>uint8x16_t <b><b>vtrn1q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_u16" type="checkbox"><label for="vtrn1_u16"><div>uint16x4_t <b><b>vtrn1_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_u16" type="checkbox"><label for="vtrn1q_u16"><div>uint16x8_t <b><b>vtrn1q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_u32" type="checkbox"><label for="vtrn1_u32"><div>uint32x2_t <b><b>vtrn1_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_u32" type="checkbox"><label for="vtrn1q_u32"><div>uint32x4_t <b><b>vtrn1q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_u64" type="checkbox"><label for="vtrn1q_u64"><div>uint64x2_t <b><b>vtrn1q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_p64" type="checkbox"><label for="vtrn1q_p64"><div>poly64x2_t <b><b>vtrn1q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_f32" type="checkbox"><label for="vtrn1_f32"><div>float32x2_t <b><b>vtrn1_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_f32" type="checkbox"><label for="vtrn1q_f32"><div>float32x4_t <b><b>vtrn1q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_f64" type="checkbox"><label for="vtrn1q_f64"><div>float64x2_t <b><b>vtrn1q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_p8" type="checkbox"><label for="vtrn1_p8"><div>poly8x8_t <b><b>vtrn1_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_p8" type="checkbox"><label for="vtrn1q_p8"><div>poly8x16_t <b><b>vtrn1q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_p16" type="checkbox"><label for="vtrn1_p16"><div>poly16x4_t <b><b>vtrn1_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_p16" type="checkbox"><label for="vtrn1q_p16"><div>poly16x8_t <b><b>vtrn1q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_s8" type="checkbox"><label for="vtrn2_s8"><div>int8x8_t <b><b>vtrn2_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_s8" type="checkbox"><label for="vtrn2q_s8"><div>int8x16_t <b><b>vtrn2q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_s16" type="checkbox"><label for="vtrn2_s16"><div>int16x4_t <b><b>vtrn2_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_s16" type="checkbox"><label for="vtrn2q_s16"><div>int16x8_t <b><b>vtrn2q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_s32" type="checkbox"><label for="vtrn2_s32"><div>int32x2_t <b><b>vtrn2_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_s32" type="checkbox"><label for="vtrn2q_s32"><div>int32x4_t <b><b>vtrn2q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_s64" type="checkbox"><label for="vtrn2q_s64"><div>int64x2_t <b><b>vtrn2q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_u8" type="checkbox"><label for="vtrn2_u8"><div>uint8x8_t <b><b>vtrn2_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_u8" type="checkbox"><label for="vtrn2q_u8"><div>uint8x16_t <b><b>vtrn2q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_u16" type="checkbox"><label for="vtrn2_u16"><div>uint16x4_t <b><b>vtrn2_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_u16" type="checkbox"><label for="vtrn2q_u16"><div>uint16x8_t <b><b>vtrn2q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_u32" type="checkbox"><label for="vtrn2_u32"><div>uint32x2_t <b><b>vtrn2_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_u32" type="checkbox"><label for="vtrn2q_u32"><div>uint32x4_t <b><b>vtrn2q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_u64" type="checkbox"><label for="vtrn2q_u64"><div>uint64x2_t <b><b>vtrn2q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_p64" type="checkbox"><label for="vtrn2q_p64"><div>poly64x2_t <b><b>vtrn2q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_f32" type="checkbox"><label for="vtrn2_f32"><div>float32x2_t <b><b>vtrn2_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_f32" type="checkbox"><label for="vtrn2q_f32"><div>float32x4_t <b><b>vtrn2q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_f64" type="checkbox"><label for="vtrn2q_f64"><div>float64x2_t <b><b>vtrn2q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_p8" type="checkbox"><label for="vtrn2_p8"><div>poly8x8_t <b><b>vtrn2_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_p8" type="checkbox"><label for="vtrn2q_p8"><div>poly8x16_t <b><b>vtrn2q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_p16" type="checkbox"><label for="vtrn2_p16"><div>poly16x4_t <b><b>vtrn2_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_p16" type="checkbox"><label for="vtrn2q_p16"><div>poly16x8_t <b><b>vtrn2q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtbl1_s8" type="checkbox"><label for="vtbl1_s8"><div>int8x8_t <b><b>vtbl1_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; Zeros(64):a <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl1_u8" type="checkbox"><label for="vtbl1_u8"><div>uint8x8_t <b><b>vtbl1_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; Zeros(64):a <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl1_p8" type="checkbox"><label for="vtbl1_p8"><div>poly8x8_t <b><b>vtbl1_p8</b></b> (poly8x8_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; Zeros(64):a <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx1_s8" type="checkbox"><label for="vtbx1_s8"><div>int8x8_t <b><b>vtbx1_s8</b></b> (int8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#8
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B},Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; Zeros(64):b <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx1_u8" type="checkbox"><label for="vtbx1_u8"><div>uint8x8_t <b><b>vtbx1_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#8
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B},Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; Zeros(64):b <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx1_p8" type="checkbox"><label for="vtbx1_p8"><div>poly8x8_t <b><b>vtbx1_p8</b></b> (poly8x8_t a, poly8x8_t b, uint8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#8
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B},Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B, Vtmp.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; Zeros(64):b <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl2_s8" type="checkbox"><label for="vtbl2_s8"><div>int8x8_t <b><b>vtbl2_s8</b></b> (int8x8x2_t a, int8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl2_u8" type="checkbox"><label for="vtbl2_u8"><div>uint8x8_t <b><b>vtbl2_u8</b></b> (uint8x8x2_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl2_p8" type="checkbox"><label for="vtbl2_p8"><div>poly8x8_t <b><b>vtbl2_p8</b></b> (poly8x8x2_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl3_s8" type="checkbox"><label for="vtbl3_s8"><div>int8x8_t <b><b>vtbl3_s8</b></b> (int8x8x3_t a, int8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+Vn+1 &rarr; Zeros(64):a.val[2] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl3_u8" type="checkbox"><label for="vtbl3_u8"><div>uint8x8_t <b><b>vtbl3_u8</b></b> (uint8x8x3_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+Vn+1 &rarr; Zeros(64):a.val[2] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl3_p8" type="checkbox"><label for="vtbl3_p8"><div>poly8x8_t <b><b>vtbl3_p8</b></b> (poly8x8x3_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+Vn+1 &rarr; Zeros(64):a.val[2] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl4_s8" type="checkbox"><label for="vtbl4_s8"><div>int8x8_t <b><b>vtbl4_s8</b></b> (int8x8x4_t a, int8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+Vn+1 &rarr; a.val[3]:a.val[2] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl4_u8" type="checkbox"><label for="vtbl4_u8"><div>uint8x8_t <b><b>vtbl4_u8</b></b> (uint8x8x4_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+Vn+1 &rarr; a.val[3]:a.val[2] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl4_p8" type="checkbox"><label for="vtbl4_p8"><div>poly8x8_t <b><b>vtbl4_p8</b></b> (poly8x8x4_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+Vn+1 &rarr; a.val[3]:a.val[2] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx2_s8" type="checkbox"><label for="vtbx2_s8"><div>int8x8_t <b><b>vtbx2_s8</b></b> (int8x8_t a, int8x8x2_t b, int8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx2_u8" type="checkbox"><label for="vtbx2_u8"><div>uint8x8_t <b><b>vtbx2_u8</b></b> (uint8x8_t a, uint8x8x2_t b, uint8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx2_p8" type="checkbox"><label for="vtbx2_p8"><div>poly8x8_t <b><b>vtbx2_p8</b></b> (poly8x8_t a, poly8x8x2_t b, uint8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx3_s8" type="checkbox"><label for="vtbx3_s8"><div>int8x8_t <b><b>vtbx3_s8</b></b> (int8x8_t a, int8x8x3_t b, int8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#24
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B,Vn+1.16B},Vm.8
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+Vn+1 &rarr; Zeros(64):b.val[2] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx3_u8" type="checkbox"><label for="vtbx3_u8"><div>uint8x8_t <b><b>vtbx3_u8</b></b> (uint8x8_t a, uint8x8x3_t b, uint8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#24
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B,Vn+1.16B},Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+Vn+1 &rarr; Zeros(64):b.val[2] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx3_p8" type="checkbox"><label for="vtbx3_p8"><div>poly8x8_t <b><b>vtbx3_p8</b></b> (poly8x8_t a, poly8x8x3_t b, uint8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#24
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B,Vn+1.16B},Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+Vn+1 &rarr; Zeros(64):b.val[2] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx4_s8" type="checkbox"><label for="vtbx4_s8"><div>int8x8_t <b><b>vtbx4_s8</b></b> (int8x8_t a, int8x8x4_t b, int8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+Vn+1 &rarr; b.val[3]:b.val[2] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx4_u8" type="checkbox"><label for="vtbx4_u8"><div>uint8x8_t <b><b>vtbx4_u8</b></b> (uint8x8_t a, uint8x8x4_t b, uint8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+Vn+1 &rarr; b.val[3]:b.val[2] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx4_p8" type="checkbox"><label for="vtbx4_p8"><div>poly8x8_t <b><b>vtbx4_p8</b></b> (poly8x8_t a, poly8x8x4_t b, uint8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+Vn+1 &rarr; b.val[3]:b.val[2] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1_s8" type="checkbox"><label for="vqtbl1_s8"><div>int8x8_t <b><b>vqtbl1_s8</b></b> (int8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1q_s8" type="checkbox"><label for="vqtbl1q_s8"><div>int8x16_t <b><b>vqtbl1q_s8</b></b> (int8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1_u8" type="checkbox"><label for="vqtbl1_u8"><div>uint8x8_t <b><b>vqtbl1_u8</b></b> (uint8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1q_u8" type="checkbox"><label for="vqtbl1q_u8"><div>uint8x16_t <b><b>vqtbl1q_u8</b></b> (uint8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1_p8" type="checkbox"><label for="vqtbl1_p8"><div>poly8x8_t <b><b>vqtbl1_p8</b></b> (poly8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1q_p8" type="checkbox"><label for="vqtbl1q_p8"><div>poly8x16_t <b><b>vqtbl1q_p8</b></b> (poly8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1_s8" type="checkbox"><label for="vqtbx1_s8"><div>int8x8_t <b><b>vqtbx1_s8</b></b> (int8x8_t a, int8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t &rarr; Vn.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1q_s8" type="checkbox"><label for="vqtbx1q_s8"><div>int8x16_t <b><b>vqtbx1q_s8</b></b> (int8x16_t a, int8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t &rarr; Vn.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1_u8" type="checkbox"><label for="vqtbx1_u8"><div>uint8x8_t <b><b>vqtbx1_u8</b></b> (uint8x8_t a, uint8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t &rarr; Vn.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1q_u8" type="checkbox"><label for="vqtbx1q_u8"><div>uint8x16_t <b><b>vqtbx1q_u8</b></b> (uint8x16_t a, uint8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t &rarr; Vn.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1_p8" type="checkbox"><label for="vqtbx1_p8"><div>poly8x8_t <b><b>vqtbx1_p8</b></b> (poly8x8_t a, poly8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t &rarr; Vn.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1q_p8" type="checkbox"><label for="vqtbx1q_p8"><div>poly8x16_t <b><b>vqtbx1q_p8</b></b> (poly8x16_t a, poly8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t &rarr; Vn.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2_s8" type="checkbox"><label for="vqtbl2_s8"><div>int8x8_t <b><b>vqtbl2_s8</b></b> (int8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2q_s8" type="checkbox"><label for="vqtbl2q_s8"><div>int8x16_t <b><b>vqtbl2q_s8</b></b> (int8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2_u8" type="checkbox"><label for="vqtbl2_u8"><div>uint8x8_t <b><b>vqtbl2_u8</b></b> (uint8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2q_u8" type="checkbox"><label for="vqtbl2q_u8"><div>uint8x16_t <b><b>vqtbl2q_u8</b></b> (uint8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2_p8" type="checkbox"><label for="vqtbl2_p8"><div>poly8x8_t <b><b>vqtbl2_p8</b></b> (poly8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2q_p8" type="checkbox"><label for="vqtbl2q_p8"><div>poly8x16_t <b><b>vqtbl2q_p8</b></b> (poly8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3_s8" type="checkbox"><label for="vqtbl3_s8"><div>int8x8_t <b><b>vqtbl3_s8</b></b> (int8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3q_s8" type="checkbox"><label for="vqtbl3q_s8"><div>int8x16_t <b><b>vqtbl3q_s8</b></b> (int8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3_u8" type="checkbox"><label for="vqtbl3_u8"><div>uint8x8_t <b><b>vqtbl3_u8</b></b> (uint8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3q_u8" type="checkbox"><label for="vqtbl3q_u8"><div>uint8x16_t <b><b>vqtbl3q_u8</b></b> (uint8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3_p8" type="checkbox"><label for="vqtbl3_p8"><div>poly8x8_t <b><b>vqtbl3_p8</b></b> (poly8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3q_p8" type="checkbox"><label for="vqtbl3q_p8"><div>poly8x16_t <b><b>vqtbl3q_p8</b></b> (poly8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4_s8" type="checkbox"><label for="vqtbl4_s8"><div>int8x8_t <b><b>vqtbl4_s8</b></b> (int8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4q_s8" type="checkbox"><label for="vqtbl4q_s8"><div>int8x16_t <b><b>vqtbl4q_s8</b></b> (int8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4_u8" type="checkbox"><label for="vqtbl4_u8"><div>uint8x8_t <b><b>vqtbl4_u8</b></b> (uint8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4q_u8" type="checkbox"><label for="vqtbl4q_u8"><div>uint8x16_t <b><b>vqtbl4q_u8</b></b> (uint8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4_p8" type="checkbox"><label for="vqtbl4_p8"><div>poly8x8_t <b><b>vqtbl4_p8</b></b> (poly8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4q_p8" type="checkbox"><label for="vqtbl4q_p8"><div>poly8x16_t <b><b>vqtbl4q_p8</b></b> (poly8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2_s8" type="checkbox"><label for="vqtbx2_s8"><div>int8x8_t <b><b>vqtbx2_s8</b></b> (int8x8_t a, int8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2q_s8" type="checkbox"><label for="vqtbx2q_s8"><div>int8x16_t <b><b>vqtbx2q_s8</b></b> (int8x16_t a, int8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2_u8" type="checkbox"><label for="vqtbx2_u8"><div>uint8x8_t <b><b>vqtbx2_u8</b></b> (uint8x8_t a, uint8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2q_u8" type="checkbox"><label for="vqtbx2q_u8"><div>uint8x16_t <b><b>vqtbx2q_u8</b></b> (uint8x16_t a, uint8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2_p8" type="checkbox"><label for="vqtbx2_p8"><div>poly8x8_t <b><b>vqtbx2_p8</b></b> (poly8x8_t a, poly8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2q_p8" type="checkbox"><label for="vqtbx2q_p8"><div>poly8x16_t <b><b>vqtbx2q_p8</b></b> (poly8x16_t a, poly8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3_s8" type="checkbox"><label for="vqtbx3_s8"><div>int8x8_t <b><b>vqtbx3_s8</b></b> (int8x8_t a, int8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3q_s8" type="checkbox"><label for="vqtbx3q_s8"><div>int8x16_t <b><b>vqtbx3q_s8</b></b> (int8x16_t a, int8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3_u8" type="checkbox"><label for="vqtbx3_u8"><div>uint8x8_t <b><b>vqtbx3_u8</b></b> (uint8x8_t a, uint8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3q_u8" type="checkbox"><label for="vqtbx3q_u8"><div>uint8x16_t <b><b>vqtbx3q_u8</b></b> (uint8x16_t a, uint8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3_p8" type="checkbox"><label for="vqtbx3_p8"><div>poly8x8_t <b><b>vqtbx3_p8</b></b> (poly8x8_t a, poly8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3q_p8" type="checkbox"><label for="vqtbx3q_p8"><div>poly8x16_t <b><b>vqtbx3q_p8</b></b> (poly8x16_t a, poly8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4_s8" type="checkbox"><label for="vqtbx4_s8"><div>int8x8_t <b><b>vqtbx4_s8</b></b> (int8x8_t a, int8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4q_s8" type="checkbox"><label for="vqtbx4q_s8"><div>int8x16_t <b><b>vqtbx4q_s8</b></b> (int8x16_t a, int8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4_u8" type="checkbox"><label for="vqtbx4_u8"><div>uint8x8_t <b><b>vqtbx4_u8</b></b> (uint8x8_t a, uint8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4q_u8" type="checkbox"><label for="vqtbx4q_u8"><div>uint8x16_t <b><b>vqtbx4q_u8</b></b> (uint8x16_t a, uint8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4_p8" type="checkbox"><label for="vqtbx4_p8"><div>poly8x8_t <b><b>vqtbx4_p8</b></b> (poly8x8_t a, poly8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4q_p8" type="checkbox"><label for="vqtbx4q_p8"><div>poly8x16_t <b><b>vqtbx4q_p8</b></b> (poly8x16_t a, poly8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_u8" type="checkbox"><label for="vget_lane_u8"><div>uint8_t <b><b>vget_lane_u8</b></b> (uint8x8_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_u16" type="checkbox"><label for="vget_lane_u16"><div>uint16_t <b><b>vget_lane_u16</b></b> (uint16x4_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_u32" type="checkbox"><label for="vget_lane_u32"><div>uint32_t <b><b>vget_lane_u32</b></b> (uint32x2_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_u64" type="checkbox"><label for="vget_lane_u64"><div>uint64_t <b><b>vget_lane_u64</b></b> (uint64x1_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_p64" type="checkbox"><label for="vget_lane_p64"><div>poly64_t <b><b>vget_lane_p64</b></b> (poly64x1_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_s8" type="checkbox"><label for="vget_lane_s8"><div>int8_t <b><b>vget_lane_s8</b></b> (int8x8_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_s16" type="checkbox"><label for="vget_lane_s16"><div>int16_t <b><b>vget_lane_s16</b></b> (int16x4_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_s32" type="checkbox"><label for="vget_lane_s32"><div>int32_t <b><b>vget_lane_s32</b></b> (int32x2_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_s64" type="checkbox"><label for="vget_lane_s64"><div>int64_t <b><b>vget_lane_s64</b></b> (int64x1_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_p8" type="checkbox"><label for="vget_lane_p8"><div>poly8_t <b><b>vget_lane_p8</b></b> (poly8x8_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_p16" type="checkbox"><label for="vget_lane_p16"><div>poly16_t <b><b>vget_lane_p16</b></b> (poly16x4_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_f32" type="checkbox"><label for="vget_lane_f32"><div>float32_t <b><b>vget_lane_f32</b></b> (float32x2_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_f64" type="checkbox"><label for="vget_lane_f64"><div>float64_t <b><b>vget_lane_f64</b></b> (float64x1_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_u8" type="checkbox"><label for="vgetq_lane_u8"><div>uint8_t <b><b>vgetq_lane_u8</b></b> (uint8x16_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_u16" type="checkbox"><label for="vgetq_lane_u16"><div>uint16_t <b><b>vgetq_lane_u16</b></b> (uint16x8_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_u32" type="checkbox"><label for="vgetq_lane_u32"><div>uint32_t <b><b>vgetq_lane_u32</b></b> (uint32x4_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_u64" type="checkbox"><label for="vgetq_lane_u64"><div>uint64_t <b><b>vgetq_lane_u64</b></b> (uint64x2_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_p64" type="checkbox"><label for="vgetq_lane_p64"><div>poly64_t <b><b>vgetq_lane_p64</b></b> (poly64x2_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_s8" type="checkbox"><label for="vgetq_lane_s8"><div>int8_t <b><b>vgetq_lane_s8</b></b> (int8x16_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_s16" type="checkbox"><label for="vgetq_lane_s16"><div>int16_t <b><b>vgetq_lane_s16</b></b> (int16x8_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_s32" type="checkbox"><label for="vgetq_lane_s32"><div>int32_t <b><b>vgetq_lane_s32</b></b> (int32x4_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_s64" type="checkbox"><label for="vgetq_lane_s64"><div>int64_t <b><b>vgetq_lane_s64</b></b> (int64x2_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_p8" type="checkbox"><label for="vgetq_lane_p8"><div>poly8_t <b><b>vgetq_lane_p8</b></b> (poly8x16_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_p16" type="checkbox"><label for="vgetq_lane_p16"><div>poly16_t <b><b>vgetq_lane_p16</b></b> (poly16x8_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_f16" type="checkbox"><label for="vget_lane_f16"><div>float16_t <b><b>vget_lane_f16</b></b> (float16x4_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_f16" type="checkbox"><label for="vgetq_lane_f16"><div>float16_t <b><b>vgetq_lane_f16</b></b> (float16x8_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_f32" type="checkbox"><label for="vgetq_lane_f32"><div>float32_t <b><b>vgetq_lane_f32</b></b> (float32x4_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_f64" type="checkbox"><label for="vgetq_lane_f64"><div>float64_t <b><b>vgetq_lane_f64</b></b> (float64x2_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_u8" type="checkbox"><label for="vset_lane_u8"><div>uint8x8_t <b><b>vset_lane_u8</b></b> (uint8_t a, uint8x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_u16" type="checkbox"><label for="vset_lane_u16"><div>uint16x4_t <b><b>vset_lane_u16</b></b> (uint16_t a, uint16x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_u32" type="checkbox"><label for="vset_lane_u32"><div>uint32x2_t <b><b>vset_lane_u32</b></b> (uint32_t a, uint32x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_u64" type="checkbox"><label for="vset_lane_u64"><div>uint64x1_t <b><b>vset_lane_u64</b></b> (uint64_t a, uint64x1_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_p64" type="checkbox"><label for="vset_lane_p64"><div>poly64x1_t <b><b>vset_lane_p64</b></b> (poly64_t a, poly64x1_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_s8" type="checkbox"><label for="vset_lane_s8"><div>int8x8_t <b><b>vset_lane_s8</b></b> (int8_t a, int8x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_s16" type="checkbox"><label for="vset_lane_s16"><div>int16x4_t <b><b>vset_lane_s16</b></b> (int16_t a, int16x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_s32" type="checkbox"><label for="vset_lane_s32"><div>int32x2_t <b><b>vset_lane_s32</b></b> (int32_t a, int32x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_s64" type="checkbox"><label for="vset_lane_s64"><div>int64x1_t <b><b>vset_lane_s64</b></b> (int64_t a, int64x1_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_p8" type="checkbox"><label for="vset_lane_p8"><div>poly8x8_t <b><b>vset_lane_p8</b></b> (poly8_t a, poly8x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_p16" type="checkbox"><label for="vset_lane_p16"><div>poly16x4_t <b><b>vset_lane_p16</b></b> (poly16_t a, poly16x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_f16" type="checkbox"><label for="vset_lane_f16"><div>float16x4_t <b><b>vset_lane_f16</b></b> (float16_t a, float16x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Vn.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; VnH <br />
+v &rarr; Vd.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_f16" type="checkbox"><label for="vsetq_lane_f16"><div>float16x8_t <b><b>vsetq_lane_f16</b></b> (float16_t a, float16x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Vn.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; VnH <br />
+v &rarr; Vd.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_f32" type="checkbox"><label for="vset_lane_f32"><div>float32x2_t <b><b>vset_lane_f32</b></b> (float32_t a, float32x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_f64" type="checkbox"><label for="vset_lane_f64"><div>float64x1_t <b><b>vset_lane_f64</b></b> (float64_t a, float64x1_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_u8" type="checkbox"><label for="vsetq_lane_u8"><div>uint8x16_t <b><b>vsetq_lane_u8</b></b> (uint8_t a, uint8x16_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_u16" type="checkbox"><label for="vsetq_lane_u16"><div>uint16x8_t <b><b>vsetq_lane_u16</b></b> (uint16_t a, uint16x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_u32" type="checkbox"><label for="vsetq_lane_u32"><div>uint32x4_t <b><b>vsetq_lane_u32</b></b> (uint32_t a, uint32x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_u64" type="checkbox"><label for="vsetq_lane_u64"><div>uint64x2_t <b><b>vsetq_lane_u64</b></b> (uint64_t a, uint64x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_p64" type="checkbox"><label for="vsetq_lane_p64"><div>poly64x2_t <b><b>vsetq_lane_p64</b></b> (poly64_t a, poly64x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_s8" type="checkbox"><label for="vsetq_lane_s8"><div>int8x16_t <b><b>vsetq_lane_s8</b></b> (int8_t a, int8x16_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_s16" type="checkbox"><label for="vsetq_lane_s16"><div>int16x8_t <b><b>vsetq_lane_s16</b></b> (int16_t a, int16x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_s32" type="checkbox"><label for="vsetq_lane_s32"><div>int32x4_t <b><b>vsetq_lane_s32</b></b> (int32_t a, int32x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_s64" type="checkbox"><label for="vsetq_lane_s64"><div>int64x2_t <b><b>vsetq_lane_s64</b></b> (int64_t a, int64x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_p8" type="checkbox"><label for="vsetq_lane_p8"><div>poly8x16_t <b><b>vsetq_lane_p8</b></b> (poly8_t a, poly8x16_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_p16" type="checkbox"><label for="vsetq_lane_p16"><div>poly16x8_t <b><b>vsetq_lane_p16</b></b> (poly16_t a, poly16x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_f32" type="checkbox"><label for="vsetq_lane_f32"><div>float32x4_t <b><b>vsetq_lane_f32</b></b> (float32_t a, float32x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_f64" type="checkbox"><label for="vsetq_lane_f64"><div>float64x2_t <b><b>vsetq_lane_f64</b></b> (float64_t a, float64x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpxs_f32" type="checkbox"><label for="vrecpxs_f32"><div>float32_t <b><b>vrecpxs_f32</b></b> (float32_t a)<span class="right">Floating-point reciprocal exponent</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal exponent (scalar). This instruction finds an approximate reciprocal exponent for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpx-floating-point-reciprocal-exponent-scalar">FRECPX</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecpX.2" title="function: bits(N) FPRecpX(bits(N) op, FPCRType fpcr)">FPRecpX</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpxd_f64" type="checkbox"><label for="vrecpxd_f64"><div>float64_t <b><b>vrecpxd_f64</b></b> (float64_t a)<span class="right">Floating-point reciprocal exponent</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal exponent (scalar). This instruction finds an approximate reciprocal exponent for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpx-floating-point-reciprocal-exponent-scalar">FRECPX</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecpX.2" title="function: bits(N) FPRecpX(bits(N) op, FPCRType fpcr)">FPRecpX</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_n_f32" type="checkbox"><label for="vfma_n_f32"><div>float32x2_t <b><b>vfma_n_f32</b></b> (float32x2_t a, float32x2_t b, float32_t n)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+n &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_n_f32" type="checkbox"><label for="vfmaq_n_f32"><div>float32x4_t <b><b>vfmaq_n_f32</b></b> (float32x4_t a, float32x4_t b, float32_t n)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+n &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfms_n_f32" type="checkbox"><label for="vfms_n_f32"><div>float32x2_t <b><b>vfms_n_f32</b></b> (float32x2_t a, float32x2_t b, float32_t n)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+n &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_n_f32" type="checkbox"><label for="vfmsq_n_f32"><div>float32x4_t <b><b>vfmsq_n_f32</b></b> (float32x4_t a, float32x4_t b, float32_t n)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+n &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_n_f64" type="checkbox"><label for="vfma_n_f64"><div>float64x1_t <b><b>vfma_n_f64</b></b> (float64x1_t a, float64x1_t b, float64_t n)<span class="right">Floating-point fused multiply-add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add (scalar). This instruction multiplies the values of the first two SIMD&amp;FP source registers, adds the product to the value of the third SIMD&amp;FP source register, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmadd-floating-point-fused-multiply-add-scalar">FMADD</a> Dd,Dn,Dm,Da
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Da <br />
+b &rarr; Dn <br />
+n &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) result;
+bits(datasize) operanda = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[a];
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(operanda, operand1, operand2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_n_f64" type="checkbox"><label for="vfmaq_n_f64"><div>float64x2_t <b><b>vfmaq_n_f64</b></b> (float64x2_t a, float64x2_t b, float64_t n)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2D,Vn.2D,Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+n &rarr; Vm.D[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_n_f64" type="checkbox"><label for="vfms_n_f64"><div>float64x1_t <b><b>vfms_n_f64</b></b> (float64x1_t a, float64x1_t b, float64_t n)<span class="right">Floating-point fused multiply-subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Fused Multiply-Subtract (scalar). This instruction multiplies the values of the first two SIMD&amp;FP source registers, negates the product, adds that to the value of the third SIMD&amp;FP source register, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmsub-floating-point-fused-multiply-subtract-scalar">FMSUB</a> Dd,Dn,Dm,Da
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Da <br />
+b &rarr; Dn <br />
+n &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) result;
+bits(datasize) operanda = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[a];
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(operand1);
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(operanda, operand1, operand2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_n_f64" type="checkbox"><label for="vfmsq_n_f64"><div>float64x2_t <b><b>vfmsq_n_f64</b></b> (float64x2_t a, float64x2_t b, float64_t n)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2D,Vn.2D,Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+n &rarr; Vm.D[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_s8" type="checkbox"><label for="vtrn_s8"><div>int8x8x2_t <b><b>vtrn_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_s16" type="checkbox"><label for="vtrn_s16"><div>int16x4x2_t <b><b>vtrn_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_u8" type="checkbox"><label for="vtrn_u8"><div>uint8x8x2_t <b><b>vtrn_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_u16" type="checkbox"><label for="vtrn_u16"><div>uint16x4x2_t <b><b>vtrn_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_p8" type="checkbox"><label for="vtrn_p8"><div>poly8x8x2_t <b><b>vtrn_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_p16" type="checkbox"><label for="vtrn_p16"><div>poly16x4x2_t <b><b>vtrn_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_s32" type="checkbox"><label for="vtrn_s32"><div>int32x2x2_t <b><b>vtrn_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_f32" type="checkbox"><label for="vtrn_f32"><div>float32x2x2_t <b><b>vtrn_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_u32" type="checkbox"><label for="vtrn_u32"><div>uint32x2x2_t <b><b>vtrn_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_s8" type="checkbox"><label for="vtrnq_s8"><div>int8x16x2_t <b><b>vtrnq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_s16" type="checkbox"><label for="vtrnq_s16"><div>int16x8x2_t <b><b>vtrnq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_s32" type="checkbox"><label for="vtrnq_s32"><div>int32x4x2_t <b><b>vtrnq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_f32" type="checkbox"><label for="vtrnq_f32"><div>float32x4x2_t <b><b>vtrnq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_u8" type="checkbox"><label for="vtrnq_u8"><div>uint8x16x2_t <b><b>vtrnq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_u16" type="checkbox"><label for="vtrnq_u16"><div>uint16x8x2_t <b><b>vtrnq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_u32" type="checkbox"><label for="vtrnq_u32"><div>uint32x4x2_t <b><b>vtrnq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_p8" type="checkbox"><label for="vtrnq_p8"><div>poly8x16x2_t <b><b>vtrnq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_p16" type="checkbox"><label for="vtrnq_p16"><div>poly16x8x2_t <b><b>vtrnq_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_s8" type="checkbox"><label for="vzip_s8"><div>int8x8x2_t <b><b>vzip_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_s16" type="checkbox"><label for="vzip_s16"><div>int16x4x2_t <b><b>vzip_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_u8" type="checkbox"><label for="vzip_u8"><div>uint8x8x2_t <b><b>vzip_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_u16" type="checkbox"><label for="vzip_u16"><div>uint16x4x2_t <b><b>vzip_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_p8" type="checkbox"><label for="vzip_p8"><div>poly8x8x2_t <b><b>vzip_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_p16" type="checkbox"><label for="vzip_p16"><div>poly16x4x2_t <b><b>vzip_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_s32" type="checkbox"><label for="vzip_s32"><div>int32x2x2_t <b><b>vzip_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_f32" type="checkbox"><label for="vzip_f32"><div>float32x2x2_t <b><b>vzip_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_u32" type="checkbox"><label for="vzip_u32"><div>uint32x2x2_t <b><b>vzip_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_s8" type="checkbox"><label for="vzipq_s8"><div>int8x16x2_t <b><b>vzipq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_s16" type="checkbox"><label for="vzipq_s16"><div>int16x8x2_t <b><b>vzipq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_s32" type="checkbox"><label for="vzipq_s32"><div>int32x4x2_t <b><b>vzipq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_f32" type="checkbox"><label for="vzipq_f32"><div>float32x4x2_t <b><b>vzipq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_u8" type="checkbox"><label for="vzipq_u8"><div>uint8x16x2_t <b><b>vzipq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_u16" type="checkbox"><label for="vzipq_u16"><div>uint16x8x2_t <b><b>vzipq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_u32" type="checkbox"><label for="vzipq_u32"><div>uint32x4x2_t <b><b>vzipq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_p8" type="checkbox"><label for="vzipq_p8"><div>poly8x16x2_t <b><b>vzipq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_p16" type="checkbox"><label for="vzipq_p16"><div>poly16x8x2_t <b><b>vzipq_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_s8" type="checkbox"><label for="vuzp_s8"><div>int8x8x2_t <b><b>vuzp_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_s16" type="checkbox"><label for="vuzp_s16"><div>int16x4x2_t <b><b>vuzp_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_s32" type="checkbox"><label for="vuzp_s32"><div>int32x2x2_t <b><b>vuzp_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_f32" type="checkbox"><label for="vuzp_f32"><div>float32x2x2_t <b><b>vuzp_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_u8" type="checkbox"><label for="vuzp_u8"><div>uint8x8x2_t <b><b>vuzp_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_u16" type="checkbox"><label for="vuzp_u16"><div>uint16x4x2_t <b><b>vuzp_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_u32" type="checkbox"><label for="vuzp_u32"><div>uint32x2x2_t <b><b>vuzp_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_p8" type="checkbox"><label for="vuzp_p8"><div>poly8x8x2_t <b><b>vuzp_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_p16" type="checkbox"><label for="vuzp_p16"><div>poly16x4x2_t <b><b>vuzp_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_s8" type="checkbox"><label for="vuzpq_s8"><div>int8x16x2_t <b><b>vuzpq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_s16" type="checkbox"><label for="vuzpq_s16"><div>int16x8x2_t <b><b>vuzpq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_s32" type="checkbox"><label for="vuzpq_s32"><div>int32x4x2_t <b><b>vuzpq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_f32" type="checkbox"><label for="vuzpq_f32"><div>float32x4x2_t <b><b>vuzpq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_u8" type="checkbox"><label for="vuzpq_u8"><div>uint8x16x2_t <b><b>vuzpq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_u16" type="checkbox"><label for="vuzpq_u16"><div>uint16x8x2_t <b><b>vuzpq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_u32" type="checkbox"><label for="vuzpq_u32"><div>uint32x4x2_t <b><b>vuzpq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_p8" type="checkbox"><label for="vuzpq_p8"><div>poly8x16x2_t <b><b>vuzpq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_p16" type="checkbox"><label for="vuzpq_p16"><div>poly16x8x2_t <b><b>vuzpq_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_s8" type="checkbox"><label for="vreinterpret_s16_s8"><div>int16x4_t <b><b>vreinterpret_s16_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_s8" type="checkbox"><label for="vreinterpret_s32_s8"><div>int32x2_t <b><b>vreinterpret_s32_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_s8" type="checkbox"><label for="vreinterpret_f32_s8"><div>float32x2_t <b><b>vreinterpret_f32_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_s8" type="checkbox"><label for="vreinterpret_u8_s8"><div>uint8x8_t <b><b>vreinterpret_u8_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_s8" type="checkbox"><label for="vreinterpret_u16_s8"><div>uint16x4_t <b><b>vreinterpret_u16_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_s8" type="checkbox"><label for="vreinterpret_u32_s8"><div>uint32x2_t <b><b>vreinterpret_u32_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_s8" type="checkbox"><label for="vreinterpret_p8_s8"><div>poly8x8_t <b><b>vreinterpret_p8_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_s8" type="checkbox"><label for="vreinterpret_p16_s8"><div>poly16x4_t <b><b>vreinterpret_p16_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_s8" type="checkbox"><label for="vreinterpret_u64_s8"><div>uint64x1_t <b><b>vreinterpret_u64_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_s8" type="checkbox"><label for="vreinterpret_s64_s8"><div>int64x1_t <b><b>vreinterpret_s64_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_s8" type="checkbox"><label for="vreinterpret_f64_s8"><div>float64x1_t <b><b>vreinterpret_f64_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_s8" type="checkbox"><label for="vreinterpret_p64_s8"><div>poly64x1_t <b><b>vreinterpret_p64_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_s8" type="checkbox"><label for="vreinterpret_f16_s8"><div>float16x4_t <b><b>vreinterpret_f16_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_s16" type="checkbox"><label for="vreinterpret_s8_s16"><div>int8x8_t <b><b>vreinterpret_s8_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_s16" type="checkbox"><label for="vreinterpret_s32_s16"><div>int32x2_t <b><b>vreinterpret_s32_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_s16" type="checkbox"><label for="vreinterpret_f32_s16"><div>float32x2_t <b><b>vreinterpret_f32_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_s16" type="checkbox"><label for="vreinterpret_u8_s16"><div>uint8x8_t <b><b>vreinterpret_u8_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_s16" type="checkbox"><label for="vreinterpret_u16_s16"><div>uint16x4_t <b><b>vreinterpret_u16_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_s16" type="checkbox"><label for="vreinterpret_u32_s16"><div>uint32x2_t <b><b>vreinterpret_u32_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_s16" type="checkbox"><label for="vreinterpret_p8_s16"><div>poly8x8_t <b><b>vreinterpret_p8_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_s16" type="checkbox"><label for="vreinterpret_p16_s16"><div>poly16x4_t <b><b>vreinterpret_p16_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_s16" type="checkbox"><label for="vreinterpret_u64_s16"><div>uint64x1_t <b><b>vreinterpret_u64_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_s16" type="checkbox"><label for="vreinterpret_s64_s16"><div>int64x1_t <b><b>vreinterpret_s64_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_s16" type="checkbox"><label for="vreinterpret_f64_s16"><div>float64x1_t <b><b>vreinterpret_f64_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_s16" type="checkbox"><label for="vreinterpret_p64_s16"><div>poly64x1_t <b><b>vreinterpret_p64_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_s16" type="checkbox"><label for="vreinterpret_f16_s16"><div>float16x4_t <b><b>vreinterpret_f16_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_s32" type="checkbox"><label for="vreinterpret_s8_s32"><div>int8x8_t <b><b>vreinterpret_s8_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_s32" type="checkbox"><label for="vreinterpret_s16_s32"><div>int16x4_t <b><b>vreinterpret_s16_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_s32" type="checkbox"><label for="vreinterpret_f32_s32"><div>float32x2_t <b><b>vreinterpret_f32_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_s32" type="checkbox"><label for="vreinterpret_u8_s32"><div>uint8x8_t <b><b>vreinterpret_u8_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_s32" type="checkbox"><label for="vreinterpret_u16_s32"><div>uint16x4_t <b><b>vreinterpret_u16_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_s32" type="checkbox"><label for="vreinterpret_u32_s32"><div>uint32x2_t <b><b>vreinterpret_u32_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_s32" type="checkbox"><label for="vreinterpret_p8_s32"><div>poly8x8_t <b><b>vreinterpret_p8_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_s32" type="checkbox"><label for="vreinterpret_p16_s32"><div>poly16x4_t <b><b>vreinterpret_p16_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_s32" type="checkbox"><label for="vreinterpret_u64_s32"><div>uint64x1_t <b><b>vreinterpret_u64_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_s32" type="checkbox"><label for="vreinterpret_s64_s32"><div>int64x1_t <b><b>vreinterpret_s64_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_s32" type="checkbox"><label for="vreinterpret_f64_s32"><div>float64x1_t <b><b>vreinterpret_f64_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_s32" type="checkbox"><label for="vreinterpret_p64_s32"><div>poly64x1_t <b><b>vreinterpret_p64_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_s32" type="checkbox"><label for="vreinterpret_f16_s32"><div>float16x4_t <b><b>vreinterpret_f16_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_f32" type="checkbox"><label for="vreinterpret_s8_f32"><div>int8x8_t <b><b>vreinterpret_s8_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_f32" type="checkbox"><label for="vreinterpret_s16_f32"><div>int16x4_t <b><b>vreinterpret_s16_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_f32" type="checkbox"><label for="vreinterpret_s32_f32"><div>int32x2_t <b><b>vreinterpret_s32_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_f32" type="checkbox"><label for="vreinterpret_u8_f32"><div>uint8x8_t <b><b>vreinterpret_u8_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_f32" type="checkbox"><label for="vreinterpret_u16_f32"><div>uint16x4_t <b><b>vreinterpret_u16_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_f32" type="checkbox"><label for="vreinterpret_u32_f32"><div>uint32x2_t <b><b>vreinterpret_u32_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_f32" type="checkbox"><label for="vreinterpret_p8_f32"><div>poly8x8_t <b><b>vreinterpret_p8_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_f32" type="checkbox"><label for="vreinterpret_p16_f32"><div>poly16x4_t <b><b>vreinterpret_p16_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_f32" type="checkbox"><label for="vreinterpret_u64_f32"><div>uint64x1_t <b><b>vreinterpret_u64_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_f32" type="checkbox"><label for="vreinterpret_s64_f32"><div>int64x1_t <b><b>vreinterpret_s64_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_f32" type="checkbox"><label for="vreinterpret_f64_f32"><div>float64x1_t <b><b>vreinterpret_f64_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_f32" type="checkbox"><label for="vreinterpret_p64_f32"><div>poly64x1_t <b><b>vreinterpret_p64_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_f64" type="checkbox"><label for="vreinterpret_p64_f64"><div>poly64x1_t <b><b>vreinterpret_p64_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_f32" type="checkbox"><label for="vreinterpret_f16_f32"><div>float16x4_t <b><b>vreinterpret_f16_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_u8" type="checkbox"><label for="vreinterpret_s8_u8"><div>int8x8_t <b><b>vreinterpret_s8_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_u8" type="checkbox"><label for="vreinterpret_s16_u8"><div>int16x4_t <b><b>vreinterpret_s16_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_u8" type="checkbox"><label for="vreinterpret_s32_u8"><div>int32x2_t <b><b>vreinterpret_s32_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_u8" type="checkbox"><label for="vreinterpret_f32_u8"><div>float32x2_t <b><b>vreinterpret_f32_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_u8" type="checkbox"><label for="vreinterpret_u16_u8"><div>uint16x4_t <b><b>vreinterpret_u16_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_u8" type="checkbox"><label for="vreinterpret_u32_u8"><div>uint32x2_t <b><b>vreinterpret_u32_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_u8" type="checkbox"><label for="vreinterpret_p8_u8"><div>poly8x8_t <b><b>vreinterpret_p8_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_u8" type="checkbox"><label for="vreinterpret_p16_u8"><div>poly16x4_t <b><b>vreinterpret_p16_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_u8" type="checkbox"><label for="vreinterpret_u64_u8"><div>uint64x1_t <b><b>vreinterpret_u64_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_u8" type="checkbox"><label for="vreinterpret_s64_u8"><div>int64x1_t <b><b>vreinterpret_s64_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_u8" type="checkbox"><label for="vreinterpret_f64_u8"><div>float64x1_t <b><b>vreinterpret_f64_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_u8" type="checkbox"><label for="vreinterpret_p64_u8"><div>poly64x1_t <b><b>vreinterpret_p64_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_u8" type="checkbox"><label for="vreinterpret_f16_u8"><div>float16x4_t <b><b>vreinterpret_f16_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_u16" type="checkbox"><label for="vreinterpret_s8_u16"><div>int8x8_t <b><b>vreinterpret_s8_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_u16" type="checkbox"><label for="vreinterpret_s16_u16"><div>int16x4_t <b><b>vreinterpret_s16_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_u16" type="checkbox"><label for="vreinterpret_s32_u16"><div>int32x2_t <b><b>vreinterpret_s32_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_u16" type="checkbox"><label for="vreinterpret_f32_u16"><div>float32x2_t <b><b>vreinterpret_f32_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_u16" type="checkbox"><label for="vreinterpret_u8_u16"><div>uint8x8_t <b><b>vreinterpret_u8_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_u16" type="checkbox"><label for="vreinterpret_u32_u16"><div>uint32x2_t <b><b>vreinterpret_u32_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_u16" type="checkbox"><label for="vreinterpret_p8_u16"><div>poly8x8_t <b><b>vreinterpret_p8_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_u16" type="checkbox"><label for="vreinterpret_p16_u16"><div>poly16x4_t <b><b>vreinterpret_p16_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_u16" type="checkbox"><label for="vreinterpret_u64_u16"><div>uint64x1_t <b><b>vreinterpret_u64_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_u16" type="checkbox"><label for="vreinterpret_s64_u16"><div>int64x1_t <b><b>vreinterpret_s64_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_u16" type="checkbox"><label for="vreinterpret_f64_u16"><div>float64x1_t <b><b>vreinterpret_f64_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_u16" type="checkbox"><label for="vreinterpret_p64_u16"><div>poly64x1_t <b><b>vreinterpret_p64_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_u16" type="checkbox"><label for="vreinterpret_f16_u16"><div>float16x4_t <b><b>vreinterpret_f16_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_u32" type="checkbox"><label for="vreinterpret_s8_u32"><div>int8x8_t <b><b>vreinterpret_s8_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_u32" type="checkbox"><label for="vreinterpret_s16_u32"><div>int16x4_t <b><b>vreinterpret_s16_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_u32" type="checkbox"><label for="vreinterpret_s32_u32"><div>int32x2_t <b><b>vreinterpret_s32_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_u32" type="checkbox"><label for="vreinterpret_f32_u32"><div>float32x2_t <b><b>vreinterpret_f32_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_u32" type="checkbox"><label for="vreinterpret_u8_u32"><div>uint8x8_t <b><b>vreinterpret_u8_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_u32" type="checkbox"><label for="vreinterpret_u16_u32"><div>uint16x4_t <b><b>vreinterpret_u16_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_u32" type="checkbox"><label for="vreinterpret_p8_u32"><div>poly8x8_t <b><b>vreinterpret_p8_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_u32" type="checkbox"><label for="vreinterpret_p16_u32"><div>poly16x4_t <b><b>vreinterpret_p16_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_u32" type="checkbox"><label for="vreinterpret_u64_u32"><div>uint64x1_t <b><b>vreinterpret_u64_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_u32" type="checkbox"><label for="vreinterpret_s64_u32"><div>int64x1_t <b><b>vreinterpret_s64_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_u32" type="checkbox"><label for="vreinterpret_f64_u32"><div>float64x1_t <b><b>vreinterpret_f64_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_u32" type="checkbox"><label for="vreinterpret_p64_u32"><div>poly64x1_t <b><b>vreinterpret_p64_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_u32" type="checkbox"><label for="vreinterpret_f16_u32"><div>float16x4_t <b><b>vreinterpret_f16_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_p8" type="checkbox"><label for="vreinterpret_s8_p8"><div>int8x8_t <b><b>vreinterpret_s8_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_p8" type="checkbox"><label for="vreinterpret_s16_p8"><div>int16x4_t <b><b>vreinterpret_s16_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_p8" type="checkbox"><label for="vreinterpret_s32_p8"><div>int32x2_t <b><b>vreinterpret_s32_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_p8" type="checkbox"><label for="vreinterpret_f32_p8"><div>float32x2_t <b><b>vreinterpret_f32_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_p8" type="checkbox"><label for="vreinterpret_u8_p8"><div>uint8x8_t <b><b>vreinterpret_u8_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_p8" type="checkbox"><label for="vreinterpret_u16_p8"><div>uint16x4_t <b><b>vreinterpret_u16_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_p8" type="checkbox"><label for="vreinterpret_u32_p8"><div>uint32x2_t <b><b>vreinterpret_u32_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_p8" type="checkbox"><label for="vreinterpret_p16_p8"><div>poly16x4_t <b><b>vreinterpret_p16_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_p8" type="checkbox"><label for="vreinterpret_u64_p8"><div>uint64x1_t <b><b>vreinterpret_u64_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_p8" type="checkbox"><label for="vreinterpret_s64_p8"><div>int64x1_t <b><b>vreinterpret_s64_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_p8" type="checkbox"><label for="vreinterpret_f64_p8"><div>float64x1_t <b><b>vreinterpret_f64_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_p8" type="checkbox"><label for="vreinterpret_p64_p8"><div>poly64x1_t <b><b>vreinterpret_p64_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_p8" type="checkbox"><label for="vreinterpret_f16_p8"><div>float16x4_t <b><b>vreinterpret_f16_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_p16" type="checkbox"><label for="vreinterpret_s8_p16"><div>int8x8_t <b><b>vreinterpret_s8_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_p16" type="checkbox"><label for="vreinterpret_s16_p16"><div>int16x4_t <b><b>vreinterpret_s16_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_p16" type="checkbox"><label for="vreinterpret_s32_p16"><div>int32x2_t <b><b>vreinterpret_s32_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_p16" type="checkbox"><label for="vreinterpret_f32_p16"><div>float32x2_t <b><b>vreinterpret_f32_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_p16" type="checkbox"><label for="vreinterpret_u8_p16"><div>uint8x8_t <b><b>vreinterpret_u8_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_p16" type="checkbox"><label for="vreinterpret_u16_p16"><div>uint16x4_t <b><b>vreinterpret_u16_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_p16" type="checkbox"><label for="vreinterpret_u32_p16"><div>uint32x2_t <b><b>vreinterpret_u32_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_p16" type="checkbox"><label for="vreinterpret_p8_p16"><div>poly8x8_t <b><b>vreinterpret_p8_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_p16" type="checkbox"><label for="vreinterpret_u64_p16"><div>uint64x1_t <b><b>vreinterpret_u64_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_p16" type="checkbox"><label for="vreinterpret_s64_p16"><div>int64x1_t <b><b>vreinterpret_s64_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_p16" type="checkbox"><label for="vreinterpret_f64_p16"><div>float64x1_t <b><b>vreinterpret_f64_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_p16" type="checkbox"><label for="vreinterpret_p64_p16"><div>poly64x1_t <b><b>vreinterpret_p64_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_p16" type="checkbox"><label for="vreinterpret_f16_p16"><div>float16x4_t <b><b>vreinterpret_f16_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_u64" type="checkbox"><label for="vreinterpret_s8_u64"><div>int8x8_t <b><b>vreinterpret_s8_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_u64" type="checkbox"><label for="vreinterpret_s16_u64"><div>int16x4_t <b><b>vreinterpret_s16_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_u64" type="checkbox"><label for="vreinterpret_s32_u64"><div>int32x2_t <b><b>vreinterpret_s32_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_u64" type="checkbox"><label for="vreinterpret_f32_u64"><div>float32x2_t <b><b>vreinterpret_f32_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_u64" type="checkbox"><label for="vreinterpret_u8_u64"><div>uint8x8_t <b><b>vreinterpret_u8_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_u64" type="checkbox"><label for="vreinterpret_u16_u64"><div>uint16x4_t <b><b>vreinterpret_u16_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_u64" type="checkbox"><label for="vreinterpret_u32_u64"><div>uint32x2_t <b><b>vreinterpret_u32_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_u64" type="checkbox"><label for="vreinterpret_p8_u64"><div>poly8x8_t <b><b>vreinterpret_p8_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_u64" type="checkbox"><label for="vreinterpret_p16_u64"><div>poly16x4_t <b><b>vreinterpret_p16_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_u64" type="checkbox"><label for="vreinterpret_s64_u64"><div>int64x1_t <b><b>vreinterpret_s64_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_u64" type="checkbox"><label for="vreinterpret_f64_u64"><div>float64x1_t <b><b>vreinterpret_f64_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_u64" type="checkbox"><label for="vreinterpret_p64_u64"><div>poly64x1_t <b><b>vreinterpret_p64_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_u64" type="checkbox"><label for="vreinterpret_f16_u64"><div>float16x4_t <b><b>vreinterpret_f16_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_s64" type="checkbox"><label for="vreinterpret_s8_s64"><div>int8x8_t <b><b>vreinterpret_s8_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_s64" type="checkbox"><label for="vreinterpret_s16_s64"><div>int16x4_t <b><b>vreinterpret_s16_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_s64" type="checkbox"><label for="vreinterpret_s32_s64"><div>int32x2_t <b><b>vreinterpret_s32_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_s64" type="checkbox"><label for="vreinterpret_f32_s64"><div>float32x2_t <b><b>vreinterpret_f32_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_s64" type="checkbox"><label for="vreinterpret_u8_s64"><div>uint8x8_t <b><b>vreinterpret_u8_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_s64" type="checkbox"><label for="vreinterpret_u16_s64"><div>uint16x4_t <b><b>vreinterpret_u16_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_s64" type="checkbox"><label for="vreinterpret_u32_s64"><div>uint32x2_t <b><b>vreinterpret_u32_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_s64" type="checkbox"><label for="vreinterpret_p8_s64"><div>poly8x8_t <b><b>vreinterpret_p8_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_s64" type="checkbox"><label for="vreinterpret_p16_s64"><div>poly16x4_t <b><b>vreinterpret_p16_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_s64" type="checkbox"><label for="vreinterpret_u64_s64"><div>uint64x1_t <b><b>vreinterpret_u64_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_s64" type="checkbox"><label for="vreinterpret_f64_s64"><div>float64x1_t <b><b>vreinterpret_f64_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_p64" type="checkbox"><label for="vreinterpret_u64_p64"><div>uint64x1_t <b><b>vreinterpret_u64_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_s64" type="checkbox"><label for="vreinterpret_f16_s64"><div>float16x4_t <b><b>vreinterpret_f16_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_f16" type="checkbox"><label for="vreinterpret_s8_f16"><div>int8x8_t <b><b>vreinterpret_s8_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_f16" type="checkbox"><label for="vreinterpret_s16_f16"><div>int16x4_t <b><b>vreinterpret_s16_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_f16" type="checkbox"><label for="vreinterpret_s32_f16"><div>int32x2_t <b><b>vreinterpret_s32_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_f16" type="checkbox"><label for="vreinterpret_f32_f16"><div>float32x2_t <b><b>vreinterpret_f32_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_f16" type="checkbox"><label for="vreinterpret_u8_f16"><div>uint8x8_t <b><b>vreinterpret_u8_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_f16" type="checkbox"><label for="vreinterpret_u16_f16"><div>uint16x4_t <b><b>vreinterpret_u16_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_f16" type="checkbox"><label for="vreinterpret_u32_f16"><div>uint32x2_t <b><b>vreinterpret_u32_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_f16" type="checkbox"><label for="vreinterpret_p8_f16"><div>poly8x8_t <b><b>vreinterpret_p8_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_f16" type="checkbox"><label for="vreinterpret_p16_f16"><div>poly16x4_t <b><b>vreinterpret_p16_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_f16" type="checkbox"><label for="vreinterpret_u64_f16"><div>uint64x1_t <b><b>vreinterpret_u64_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_f16" type="checkbox"><label for="vreinterpret_s64_f16"><div>int64x1_t <b><b>vreinterpret_s64_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_f16" type="checkbox"><label for="vreinterpret_f64_f16"><div>float64x1_t <b><b>vreinterpret_f64_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_f16" type="checkbox"><label for="vreinterpret_p64_f16"><div>poly64x1_t <b><b>vreinterpret_p64_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_s8" type="checkbox"><label for="vreinterpretq_s16_s8"><div>int16x8_t <b><b>vreinterpretq_s16_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_s8" type="checkbox"><label for="vreinterpretq_s32_s8"><div>int32x4_t <b><b>vreinterpretq_s32_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_s8" type="checkbox"><label for="vreinterpretq_f32_s8"><div>float32x4_t <b><b>vreinterpretq_f32_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_s8" type="checkbox"><label for="vreinterpretq_u8_s8"><div>uint8x16_t <b><b>vreinterpretq_u8_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_s8" type="checkbox"><label for="vreinterpretq_u16_s8"><div>uint16x8_t <b><b>vreinterpretq_u16_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_s8" type="checkbox"><label for="vreinterpretq_u32_s8"><div>uint32x4_t <b><b>vreinterpretq_u32_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_s8" type="checkbox"><label for="vreinterpretq_p8_s8"><div>poly8x16_t <b><b>vreinterpretq_p8_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_s8" type="checkbox"><label for="vreinterpretq_p16_s8"><div>poly16x8_t <b><b>vreinterpretq_p16_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_s8" type="checkbox"><label for="vreinterpretq_u64_s8"><div>uint64x2_t <b><b>vreinterpretq_u64_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_s8" type="checkbox"><label for="vreinterpretq_s64_s8"><div>int64x2_t <b><b>vreinterpretq_s64_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_s8" type="checkbox"><label for="vreinterpretq_f64_s8"><div>float64x2_t <b><b>vreinterpretq_f64_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_s8" type="checkbox"><label for="vreinterpretq_p64_s8"><div>poly64x2_t <b><b>vreinterpretq_p64_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_s8" type="checkbox"><label for="vreinterpretq_p128_s8"><div>poly128_t <b><b>vreinterpretq_p128_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_s8" type="checkbox"><label for="vreinterpretq_f16_s8"><div>float16x8_t <b><b>vreinterpretq_f16_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_s16" type="checkbox"><label for="vreinterpretq_s8_s16"><div>int8x16_t <b><b>vreinterpretq_s8_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_s16" type="checkbox"><label for="vreinterpretq_s32_s16"><div>int32x4_t <b><b>vreinterpretq_s32_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_s16" type="checkbox"><label for="vreinterpretq_f32_s16"><div>float32x4_t <b><b>vreinterpretq_f32_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_s16" type="checkbox"><label for="vreinterpretq_u8_s16"><div>uint8x16_t <b><b>vreinterpretq_u8_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_s16" type="checkbox"><label for="vreinterpretq_u16_s16"><div>uint16x8_t <b><b>vreinterpretq_u16_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_s16" type="checkbox"><label for="vreinterpretq_u32_s16"><div>uint32x4_t <b><b>vreinterpretq_u32_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_s16" type="checkbox"><label for="vreinterpretq_p8_s16"><div>poly8x16_t <b><b>vreinterpretq_p8_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_s16" type="checkbox"><label for="vreinterpretq_p16_s16"><div>poly16x8_t <b><b>vreinterpretq_p16_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_s16" type="checkbox"><label for="vreinterpretq_u64_s16"><div>uint64x2_t <b><b>vreinterpretq_u64_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_s16" type="checkbox"><label for="vreinterpretq_s64_s16"><div>int64x2_t <b><b>vreinterpretq_s64_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_s16" type="checkbox"><label for="vreinterpretq_f64_s16"><div>float64x2_t <b><b>vreinterpretq_f64_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_s16" type="checkbox"><label for="vreinterpretq_p64_s16"><div>poly64x2_t <b><b>vreinterpretq_p64_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_s16" type="checkbox"><label for="vreinterpretq_p128_s16"><div>poly128_t <b><b>vreinterpretq_p128_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_s16" type="checkbox"><label for="vreinterpretq_f16_s16"><div>float16x8_t <b><b>vreinterpretq_f16_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_s32" type="checkbox"><label for="vreinterpretq_s8_s32"><div>int8x16_t <b><b>vreinterpretq_s8_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_s32" type="checkbox"><label for="vreinterpretq_s16_s32"><div>int16x8_t <b><b>vreinterpretq_s16_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_s32" type="checkbox"><label for="vreinterpretq_f32_s32"><div>float32x4_t <b><b>vreinterpretq_f32_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_s32" type="checkbox"><label for="vreinterpretq_u8_s32"><div>uint8x16_t <b><b>vreinterpretq_u8_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_s32" type="checkbox"><label for="vreinterpretq_u16_s32"><div>uint16x8_t <b><b>vreinterpretq_u16_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_s32" type="checkbox"><label for="vreinterpretq_u32_s32"><div>uint32x4_t <b><b>vreinterpretq_u32_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_s32" type="checkbox"><label for="vreinterpretq_p8_s32"><div>poly8x16_t <b><b>vreinterpretq_p8_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_s32" type="checkbox"><label for="vreinterpretq_p16_s32"><div>poly16x8_t <b><b>vreinterpretq_p16_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_s32" type="checkbox"><label for="vreinterpretq_u64_s32"><div>uint64x2_t <b><b>vreinterpretq_u64_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_s32" type="checkbox"><label for="vreinterpretq_s64_s32"><div>int64x2_t <b><b>vreinterpretq_s64_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_s32" type="checkbox"><label for="vreinterpretq_f64_s32"><div>float64x2_t <b><b>vreinterpretq_f64_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_s32" type="checkbox"><label for="vreinterpretq_p64_s32"><div>poly64x2_t <b><b>vreinterpretq_p64_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_s32" type="checkbox"><label for="vreinterpretq_p128_s32"><div>poly128_t <b><b>vreinterpretq_p128_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_s32" type="checkbox"><label for="vreinterpretq_f16_s32"><div>float16x8_t <b><b>vreinterpretq_f16_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_f32" type="checkbox"><label for="vreinterpretq_s8_f32"><div>int8x16_t <b><b>vreinterpretq_s8_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_f32" type="checkbox"><label for="vreinterpretq_s16_f32"><div>int16x8_t <b><b>vreinterpretq_s16_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_f32" type="checkbox"><label for="vreinterpretq_s32_f32"><div>int32x4_t <b><b>vreinterpretq_s32_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_f32" type="checkbox"><label for="vreinterpretq_u8_f32"><div>uint8x16_t <b><b>vreinterpretq_u8_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_f32" type="checkbox"><label for="vreinterpretq_u16_f32"><div>uint16x8_t <b><b>vreinterpretq_u16_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_f32" type="checkbox"><label for="vreinterpretq_u32_f32"><div>uint32x4_t <b><b>vreinterpretq_u32_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_f32" type="checkbox"><label for="vreinterpretq_p8_f32"><div>poly8x16_t <b><b>vreinterpretq_p8_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_f32" type="checkbox"><label for="vreinterpretq_p16_f32"><div>poly16x8_t <b><b>vreinterpretq_p16_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_f32" type="checkbox"><label for="vreinterpretq_u64_f32"><div>uint64x2_t <b><b>vreinterpretq_u64_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_f32" type="checkbox"><label for="vreinterpretq_s64_f32"><div>int64x2_t <b><b>vreinterpretq_s64_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_f32" type="checkbox"><label for="vreinterpretq_f64_f32"><div>float64x2_t <b><b>vreinterpretq_f64_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_f32" type="checkbox"><label for="vreinterpretq_p64_f32"><div>poly64x2_t <b><b>vreinterpretq_p64_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_f32" type="checkbox"><label for="vreinterpretq_p128_f32"><div>poly128_t <b><b>vreinterpretq_p128_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_f64" type="checkbox"><label for="vreinterpretq_p64_f64"><div>poly64x2_t <b><b>vreinterpretq_p64_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_f64" type="checkbox"><label for="vreinterpretq_p128_f64"><div>poly128_t <b><b>vreinterpretq_p128_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_f32" type="checkbox"><label for="vreinterpretq_f16_f32"><div>float16x8_t <b><b>vreinterpretq_f16_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_u8" type="checkbox"><label for="vreinterpretq_s8_u8"><div>int8x16_t <b><b>vreinterpretq_s8_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_u8" type="checkbox"><label for="vreinterpretq_s16_u8"><div>int16x8_t <b><b>vreinterpretq_s16_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_u8" type="checkbox"><label for="vreinterpretq_s32_u8"><div>int32x4_t <b><b>vreinterpretq_s32_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_u8" type="checkbox"><label for="vreinterpretq_f32_u8"><div>float32x4_t <b><b>vreinterpretq_f32_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_u8" type="checkbox"><label for="vreinterpretq_u16_u8"><div>uint16x8_t <b><b>vreinterpretq_u16_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_u8" type="checkbox"><label for="vreinterpretq_u32_u8"><div>uint32x4_t <b><b>vreinterpretq_u32_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_u8" type="checkbox"><label for="vreinterpretq_p8_u8"><div>poly8x16_t <b><b>vreinterpretq_p8_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_u8" type="checkbox"><label for="vreinterpretq_p16_u8"><div>poly16x8_t <b><b>vreinterpretq_p16_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_u8" type="checkbox"><label for="vreinterpretq_u64_u8"><div>uint64x2_t <b><b>vreinterpretq_u64_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_u8" type="checkbox"><label for="vreinterpretq_s64_u8"><div>int64x2_t <b><b>vreinterpretq_s64_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_u8" type="checkbox"><label for="vreinterpretq_f64_u8"><div>float64x2_t <b><b>vreinterpretq_f64_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_u8" type="checkbox"><label for="vreinterpretq_p64_u8"><div>poly64x2_t <b><b>vreinterpretq_p64_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_u8" type="checkbox"><label for="vreinterpretq_p128_u8"><div>poly128_t <b><b>vreinterpretq_p128_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_u8" type="checkbox"><label for="vreinterpretq_f16_u8"><div>float16x8_t <b><b>vreinterpretq_f16_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_u16" type="checkbox"><label for="vreinterpretq_s8_u16"><div>int8x16_t <b><b>vreinterpretq_s8_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_u16" type="checkbox"><label for="vreinterpretq_s16_u16"><div>int16x8_t <b><b>vreinterpretq_s16_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_u16" type="checkbox"><label for="vreinterpretq_s32_u16"><div>int32x4_t <b><b>vreinterpretq_s32_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_u16" type="checkbox"><label for="vreinterpretq_f32_u16"><div>float32x4_t <b><b>vreinterpretq_f32_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_u16" type="checkbox"><label for="vreinterpretq_u8_u16"><div>uint8x16_t <b><b>vreinterpretq_u8_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_u16" type="checkbox"><label for="vreinterpretq_u32_u16"><div>uint32x4_t <b><b>vreinterpretq_u32_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_u16" type="checkbox"><label for="vreinterpretq_p8_u16"><div>poly8x16_t <b><b>vreinterpretq_p8_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_u16" type="checkbox"><label for="vreinterpretq_p16_u16"><div>poly16x8_t <b><b>vreinterpretq_p16_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_u16" type="checkbox"><label for="vreinterpretq_u64_u16"><div>uint64x2_t <b><b>vreinterpretq_u64_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_u16" type="checkbox"><label for="vreinterpretq_s64_u16"><div>int64x2_t <b><b>vreinterpretq_s64_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_u16" type="checkbox"><label for="vreinterpretq_f64_u16"><div>float64x2_t <b><b>vreinterpretq_f64_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_u16" type="checkbox"><label for="vreinterpretq_p64_u16"><div>poly64x2_t <b><b>vreinterpretq_p64_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_u16" type="checkbox"><label for="vreinterpretq_p128_u16"><div>poly128_t <b><b>vreinterpretq_p128_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_u16" type="checkbox"><label for="vreinterpretq_f16_u16"><div>float16x8_t <b><b>vreinterpretq_f16_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_u32" type="checkbox"><label for="vreinterpretq_s8_u32"><div>int8x16_t <b><b>vreinterpretq_s8_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_u32" type="checkbox"><label for="vreinterpretq_s16_u32"><div>int16x8_t <b><b>vreinterpretq_s16_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_u32" type="checkbox"><label for="vreinterpretq_s32_u32"><div>int32x4_t <b><b>vreinterpretq_s32_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_u32" type="checkbox"><label for="vreinterpretq_f32_u32"><div>float32x4_t <b><b>vreinterpretq_f32_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_u32" type="checkbox"><label for="vreinterpretq_u8_u32"><div>uint8x16_t <b><b>vreinterpretq_u8_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_u32" type="checkbox"><label for="vreinterpretq_u16_u32"><div>uint16x8_t <b><b>vreinterpretq_u16_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_u32" type="checkbox"><label for="vreinterpretq_p8_u32"><div>poly8x16_t <b><b>vreinterpretq_p8_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_u32" type="checkbox"><label for="vreinterpretq_p16_u32"><div>poly16x8_t <b><b>vreinterpretq_p16_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_u32" type="checkbox"><label for="vreinterpretq_u64_u32"><div>uint64x2_t <b><b>vreinterpretq_u64_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_u32" type="checkbox"><label for="vreinterpretq_s64_u32"><div>int64x2_t <b><b>vreinterpretq_s64_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_u32" type="checkbox"><label for="vreinterpretq_f64_u32"><div>float64x2_t <b><b>vreinterpretq_f64_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_u32" type="checkbox"><label for="vreinterpretq_p64_u32"><div>poly64x2_t <b><b>vreinterpretq_p64_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_u32" type="checkbox"><label for="vreinterpretq_p128_u32"><div>poly128_t <b><b>vreinterpretq_p128_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_u32" type="checkbox"><label for="vreinterpretq_f16_u32"><div>float16x8_t <b><b>vreinterpretq_f16_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_p8" type="checkbox"><label for="vreinterpretq_s8_p8"><div>int8x16_t <b><b>vreinterpretq_s8_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_p8" type="checkbox"><label for="vreinterpretq_s16_p8"><div>int16x8_t <b><b>vreinterpretq_s16_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_p8" type="checkbox"><label for="vreinterpretq_s32_p8"><div>int32x4_t <b><b>vreinterpretq_s32_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_p8" type="checkbox"><label for="vreinterpretq_f32_p8"><div>float32x4_t <b><b>vreinterpretq_f32_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_p8" type="checkbox"><label for="vreinterpretq_u8_p8"><div>uint8x16_t <b><b>vreinterpretq_u8_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_p8" type="checkbox"><label for="vreinterpretq_u16_p8"><div>uint16x8_t <b><b>vreinterpretq_u16_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_p8" type="checkbox"><label for="vreinterpretq_u32_p8"><div>uint32x4_t <b><b>vreinterpretq_u32_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_p8" type="checkbox"><label for="vreinterpretq_p16_p8"><div>poly16x8_t <b><b>vreinterpretq_p16_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p8" type="checkbox"><label for="vreinterpretq_u64_p8"><div>uint64x2_t <b><b>vreinterpretq_u64_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_p8" type="checkbox"><label for="vreinterpretq_s64_p8"><div>int64x2_t <b><b>vreinterpretq_s64_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_p8" type="checkbox"><label for="vreinterpretq_f64_p8"><div>float64x2_t <b><b>vreinterpretq_f64_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_p8" type="checkbox"><label for="vreinterpretq_p64_p8"><div>poly64x2_t <b><b>vreinterpretq_p64_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_p8" type="checkbox"><label for="vreinterpretq_p128_p8"><div>poly128_t <b><b>vreinterpretq_p128_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_p8" type="checkbox"><label for="vreinterpretq_f16_p8"><div>float16x8_t <b><b>vreinterpretq_f16_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_p16" type="checkbox"><label for="vreinterpretq_s8_p16"><div>int8x16_t <b><b>vreinterpretq_s8_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_p16" type="checkbox"><label for="vreinterpretq_s16_p16"><div>int16x8_t <b><b>vreinterpretq_s16_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_p16" type="checkbox"><label for="vreinterpretq_s32_p16"><div>int32x4_t <b><b>vreinterpretq_s32_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_p16" type="checkbox"><label for="vreinterpretq_f32_p16"><div>float32x4_t <b><b>vreinterpretq_f32_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_p16" type="checkbox"><label for="vreinterpretq_u8_p16"><div>uint8x16_t <b><b>vreinterpretq_u8_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_p16" type="checkbox"><label for="vreinterpretq_u16_p16"><div>uint16x8_t <b><b>vreinterpretq_u16_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_p16" type="checkbox"><label for="vreinterpretq_u32_p16"><div>uint32x4_t <b><b>vreinterpretq_u32_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_p16" type="checkbox"><label for="vreinterpretq_p8_p16"><div>poly8x16_t <b><b>vreinterpretq_p8_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p16" type="checkbox"><label for="vreinterpretq_u64_p16"><div>uint64x2_t <b><b>vreinterpretq_u64_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_p16" type="checkbox"><label for="vreinterpretq_s64_p16"><div>int64x2_t <b><b>vreinterpretq_s64_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_p16" type="checkbox"><label for="vreinterpretq_f64_p16"><div>float64x2_t <b><b>vreinterpretq_f64_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_p16" type="checkbox"><label for="vreinterpretq_p64_p16"><div>poly64x2_t <b><b>vreinterpretq_p64_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_p16" type="checkbox"><label for="vreinterpretq_p128_p16"><div>poly128_t <b><b>vreinterpretq_p128_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_p16" type="checkbox"><label for="vreinterpretq_f16_p16"><div>float16x8_t <b><b>vreinterpretq_f16_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_u64" type="checkbox"><label for="vreinterpretq_s8_u64"><div>int8x16_t <b><b>vreinterpretq_s8_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_u64" type="checkbox"><label for="vreinterpretq_s16_u64"><div>int16x8_t <b><b>vreinterpretq_s16_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_u64" type="checkbox"><label for="vreinterpretq_s32_u64"><div>int32x4_t <b><b>vreinterpretq_s32_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_u64" type="checkbox"><label for="vreinterpretq_f32_u64"><div>float32x4_t <b><b>vreinterpretq_f32_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_u64" type="checkbox"><label for="vreinterpretq_u8_u64"><div>uint8x16_t <b><b>vreinterpretq_u8_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_u64" type="checkbox"><label for="vreinterpretq_u16_u64"><div>uint16x8_t <b><b>vreinterpretq_u16_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_u64" type="checkbox"><label for="vreinterpretq_u32_u64"><div>uint32x4_t <b><b>vreinterpretq_u32_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_u64" type="checkbox"><label for="vreinterpretq_p8_u64"><div>poly8x16_t <b><b>vreinterpretq_p8_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_u64" type="checkbox"><label for="vreinterpretq_p16_u64"><div>poly16x8_t <b><b>vreinterpretq_p16_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_u64" type="checkbox"><label for="vreinterpretq_s64_u64"><div>int64x2_t <b><b>vreinterpretq_s64_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_u64" type="checkbox"><label for="vreinterpretq_f64_u64"><div>float64x2_t <b><b>vreinterpretq_f64_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_s64" type="checkbox"><label for="vreinterpretq_f64_s64"><div>float64x2_t <b><b>vreinterpretq_f64_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_s64" type="checkbox"><label for="vreinterpretq_p64_s64"><div>poly64x2_t <b><b>vreinterpretq_p64_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_s64" type="checkbox"><label for="vreinterpretq_p128_s64"><div>poly128_t <b><b>vreinterpretq_p128_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_u64" type="checkbox"><label for="vreinterpretq_p64_u64"><div>poly64x2_t <b><b>vreinterpretq_p64_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_u64" type="checkbox"><label for="vreinterpretq_p128_u64"><div>poly128_t <b><b>vreinterpretq_p128_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_u64" type="checkbox"><label for="vreinterpretq_f16_u64"><div>float16x8_t <b><b>vreinterpretq_f16_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_s64" type="checkbox"><label for="vreinterpretq_s8_s64"><div>int8x16_t <b><b>vreinterpretq_s8_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_s64" type="checkbox"><label for="vreinterpretq_s16_s64"><div>int16x8_t <b><b>vreinterpretq_s16_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_s64" type="checkbox"><label for="vreinterpretq_s32_s64"><div>int32x4_t <b><b>vreinterpretq_s32_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_s64" type="checkbox"><label for="vreinterpretq_f32_s64"><div>float32x4_t <b><b>vreinterpretq_f32_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_s64" type="checkbox"><label for="vreinterpretq_u8_s64"><div>uint8x16_t <b><b>vreinterpretq_u8_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_s64" type="checkbox"><label for="vreinterpretq_u16_s64"><div>uint16x8_t <b><b>vreinterpretq_u16_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_s64" type="checkbox"><label for="vreinterpretq_u32_s64"><div>uint32x4_t <b><b>vreinterpretq_u32_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_s64" type="checkbox"><label for="vreinterpretq_p8_s64"><div>poly8x16_t <b><b>vreinterpretq_p8_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_s64" type="checkbox"><label for="vreinterpretq_p16_s64"><div>poly16x8_t <b><b>vreinterpretq_p16_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_s64" type="checkbox"><label for="vreinterpretq_u64_s64"><div>uint64x2_t <b><b>vreinterpretq_u64_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p64" type="checkbox"><label for="vreinterpretq_u64_p64"><div>uint64x2_t <b><b>vreinterpretq_u64_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_s64" type="checkbox"><label for="vreinterpretq_f16_s64"><div>float16x8_t <b><b>vreinterpretq_f16_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_f16" type="checkbox"><label for="vreinterpretq_s8_f16"><div>int8x16_t <b><b>vreinterpretq_s8_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_f16" type="checkbox"><label for="vreinterpretq_s16_f16"><div>int16x8_t <b><b>vreinterpretq_s16_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_f16" type="checkbox"><label for="vreinterpretq_s32_f16"><div>int32x4_t <b><b>vreinterpretq_s32_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_f16" type="checkbox"><label for="vreinterpretq_f32_f16"><div>float32x4_t <b><b>vreinterpretq_f32_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_f16" type="checkbox"><label for="vreinterpretq_u8_f16"><div>uint8x16_t <b><b>vreinterpretq_u8_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_f16" type="checkbox"><label for="vreinterpretq_u16_f16"><div>uint16x8_t <b><b>vreinterpretq_u16_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_f16" type="checkbox"><label for="vreinterpretq_u32_f16"><div>uint32x4_t <b><b>vreinterpretq_u32_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_f16" type="checkbox"><label for="vreinterpretq_p8_f16"><div>poly8x16_t <b><b>vreinterpretq_p8_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_f16" type="checkbox"><label for="vreinterpretq_p16_f16"><div>poly16x8_t <b><b>vreinterpretq_p16_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_f16" type="checkbox"><label for="vreinterpretq_u64_f16"><div>uint64x2_t <b><b>vreinterpretq_u64_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_f16" type="checkbox"><label for="vreinterpretq_s64_f16"><div>int64x2_t <b><b>vreinterpretq_s64_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_f16" type="checkbox"><label for="vreinterpretq_f64_f16"><div>float64x2_t <b><b>vreinterpretq_f64_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_f16" type="checkbox"><label for="vreinterpretq_p64_f16"><div>poly64x2_t <b><b>vreinterpretq_p64_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_f16" type="checkbox"><label for="vreinterpretq_p128_f16"><div>poly128_t <b><b>vreinterpretq_p128_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_f64" type="checkbox"><label for="vreinterpret_s8_f64"><div>int8x8_t <b><b>vreinterpret_s8_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_f64" type="checkbox"><label for="vreinterpret_s16_f64"><div>int16x4_t <b><b>vreinterpret_s16_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_f64" type="checkbox"><label for="vreinterpret_s32_f64"><div>int32x2_t <b><b>vreinterpret_s32_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_f64" type="checkbox"><label for="vreinterpret_u8_f64"><div>uint8x8_t <b><b>vreinterpret_u8_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_f64" type="checkbox"><label for="vreinterpret_u16_f64"><div>uint16x4_t <b><b>vreinterpret_u16_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_f64" type="checkbox"><label for="vreinterpret_u32_f64"><div>uint32x2_t <b><b>vreinterpret_u32_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_f64" type="checkbox"><label for="vreinterpret_p8_f64"><div>poly8x8_t <b><b>vreinterpret_p8_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_f64" type="checkbox"><label for="vreinterpret_p16_f64"><div>poly16x4_t <b><b>vreinterpret_p16_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_f64" type="checkbox"><label for="vreinterpret_u64_f64"><div>uint64x1_t <b><b>vreinterpret_u64_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_f64" type="checkbox"><label for="vreinterpret_s64_f64"><div>int64x1_t <b><b>vreinterpret_s64_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_f64" type="checkbox"><label for="vreinterpret_f16_f64"><div>float16x4_t <b><b>vreinterpret_f16_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_f64" type="checkbox"><label for="vreinterpret_f32_f64"><div>float32x2_t <b><b>vreinterpret_f32_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_f64" type="checkbox"><label for="vreinterpretq_s8_f64"><div>int8x16_t <b><b>vreinterpretq_s8_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_f64" type="checkbox"><label for="vreinterpretq_s16_f64"><div>int16x8_t <b><b>vreinterpretq_s16_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_f64" type="checkbox"><label for="vreinterpretq_s32_f64"><div>int32x4_t <b><b>vreinterpretq_s32_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_f64" type="checkbox"><label for="vreinterpretq_u8_f64"><div>uint8x16_t <b><b>vreinterpretq_u8_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_f64" type="checkbox"><label for="vreinterpretq_u16_f64"><div>uint16x8_t <b><b>vreinterpretq_u16_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_f64" type="checkbox"><label for="vreinterpretq_u32_f64"><div>uint32x4_t <b><b>vreinterpretq_u32_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_f64" type="checkbox"><label for="vreinterpretq_p8_f64"><div>poly8x16_t <b><b>vreinterpretq_p8_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_f64" type="checkbox"><label for="vreinterpretq_p16_f64"><div>poly16x8_t <b><b>vreinterpretq_p16_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_f64" type="checkbox"><label for="vreinterpretq_u64_f64"><div>uint64x2_t <b><b>vreinterpretq_u64_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_f64" type="checkbox"><label for="vreinterpretq_s64_f64"><div>int64x2_t <b><b>vreinterpretq_s64_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_f64" type="checkbox"><label for="vreinterpretq_f16_f64"><div>float16x8_t <b><b>vreinterpretq_f16_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_f64" type="checkbox"><label for="vreinterpretq_f32_f64"><div>float32x4_t <b><b>vreinterpretq_f32_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_p64" type="checkbox"><label for="vreinterpret_s8_p64"><div>int8x8_t <b><b>vreinterpret_s8_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_p64" type="checkbox"><label for="vreinterpret_s16_p64"><div>int16x4_t <b><b>vreinterpret_s16_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_p64" type="checkbox"><label for="vreinterpret_s32_p64"><div>int32x2_t <b><b>vreinterpret_s32_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_p64" type="checkbox"><label for="vreinterpret_u8_p64"><div>uint8x8_t <b><b>vreinterpret_u8_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_p64" type="checkbox"><label for="vreinterpret_u16_p64"><div>uint16x4_t <b><b>vreinterpret_u16_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_p64" type="checkbox"><label for="vreinterpret_u32_p64"><div>uint32x2_t <b><b>vreinterpret_u32_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_p64" type="checkbox"><label for="vreinterpret_p8_p64"><div>poly8x8_t <b><b>vreinterpret_p8_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_p64" type="checkbox"><label for="vreinterpret_p16_p64"><div>poly16x4_t <b><b>vreinterpret_p16_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_p64" type="checkbox"><label for="vreinterpret_u64_p64"><div>uint64x1_t <b><b>vreinterpret_u64_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_p64" type="checkbox"><label for="vreinterpret_s64_p64"><div>int64x1_t <b><b>vreinterpret_s64_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_p64" type="checkbox"><label for="vreinterpret_f64_p64"><div>float64x1_t <b><b>vreinterpret_f64_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_p64" type="checkbox"><label for="vreinterpret_f16_p64"><div>float16x4_t <b><b>vreinterpret_f16_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_p64" type="checkbox"><label for="vreinterpretq_s8_p64"><div>int8x16_t <b><b>vreinterpretq_s8_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_p64" type="checkbox"><label for="vreinterpretq_s16_p64"><div>int16x8_t <b><b>vreinterpretq_s16_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_p64" type="checkbox"><label for="vreinterpretq_s32_p64"><div>int32x4_t <b><b>vreinterpretq_s32_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_p64" type="checkbox"><label for="vreinterpretq_u8_p64"><div>uint8x16_t <b><b>vreinterpretq_u8_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_p64" type="checkbox"><label for="vreinterpretq_u16_p64"><div>uint16x8_t <b><b>vreinterpretq_u16_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_p64" type="checkbox"><label for="vreinterpretq_u32_p64"><div>uint32x4_t <b><b>vreinterpretq_u32_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_p64" type="checkbox"><label for="vreinterpretq_p8_p64"><div>poly8x16_t <b><b>vreinterpretq_p8_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_p64" type="checkbox"><label for="vreinterpretq_p16_p64"><div>poly16x8_t <b><b>vreinterpretq_p16_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p64" type="checkbox"><label for="vreinterpretq_u64_p64"><div>uint64x2_t <b><b>vreinterpretq_u64_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_p64" type="checkbox"><label for="vreinterpretq_s64_p64"><div>int64x2_t <b><b>vreinterpretq_s64_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_p64" type="checkbox"><label for="vreinterpretq_f64_p64"><div>float64x2_t <b><b>vreinterpretq_f64_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_p64" type="checkbox"><label for="vreinterpretq_f16_p64"><div>float16x8_t <b><b>vreinterpretq_f16_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_p128" type="checkbox"><label for="vreinterpretq_s8_p128"><div>int8x16_t <b><b>vreinterpretq_s8_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_p128" type="checkbox"><label for="vreinterpretq_s16_p128"><div>int16x8_t <b><b>vreinterpretq_s16_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_p128" type="checkbox"><label for="vreinterpretq_s32_p128"><div>int32x4_t <b><b>vreinterpretq_s32_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_p128" type="checkbox"><label for="vreinterpretq_u8_p128"><div>uint8x16_t <b><b>vreinterpretq_u8_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_p128" type="checkbox"><label for="vreinterpretq_u16_p128"><div>uint16x8_t <b><b>vreinterpretq_u16_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_p128" type="checkbox"><label for="vreinterpretq_u32_p128"><div>uint32x4_t <b><b>vreinterpretq_u32_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_p128" type="checkbox"><label for="vreinterpretq_p8_p128"><div>poly8x16_t <b><b>vreinterpretq_p8_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_p128" type="checkbox"><label for="vreinterpretq_p16_p128"><div>poly16x8_t <b><b>vreinterpretq_p16_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p128" type="checkbox"><label for="vreinterpretq_u64_p128"><div>uint64x2_t <b><b>vreinterpretq_u64_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_p128" type="checkbox"><label for="vreinterpretq_s64_p128"><div>int64x2_t <b><b>vreinterpretq_s64_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_p128" type="checkbox"><label for="vreinterpretq_f64_p128"><div>float64x2_t <b><b>vreinterpretq_f64_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_p128" type="checkbox"><label for="vreinterpretq_f16_p128"><div>float16x8_t <b><b>vreinterpretq_f16_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vldrq_p128" type="checkbox"><label for="vldrq_p128"><div>poly128_t <b><b>vldrq_p128</b></b> (poly128_t const * ptr)<span class="right">Load SIMD&amp;FP register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load SIMD&amp;FP Register (register offset). This instruction loads a SIMD&amp;FP register from memory. The address that is used for the load is calculated from a base register value and an offset register value. The offset can be optionally shifted and extended.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ldr-register-simdfp-load-simdfp-register-register-offset">LDR</a> Qd,[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(64) offset = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.ExtendReg.3" title="function: bits(N) ExtendReg(integer reg, ExtendType type, integer shift)">ExtendReg</a>(m, extend_type, shift);
+if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    boolean is_load_store = memop IN {<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_STORE" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_STORE</a>, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a>};
+    SetNotTagCheckedInstruction(is_load_store &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(64) address;
+bits(datasize) data;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+address = address + offset;
+
+case memop of
+    when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_STORE" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_STORE</a>
+        data = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address, datasize DIV 8, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = data;
+
+    when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a>
+        data = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address, datasize DIV 8, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = data;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vstrq_p128" type="checkbox"><label for="vstrq_p128"><div>void <b><b>vstrq_p128</b></b> (poly128_t * ptr, poly128_t val)<span class="right">Store SIMD&amp;FP register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store SIMD&amp;FP register (register offset). This instruction stores a single SIMD&amp;FP register to memory. The address that is used for the store is calculated from a base register value and an offset register value. The offset can be optionally shifted and extended.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/str-register-simdfp-store-simdfp-register-register-offset">STR</a> Qt,[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Qt </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(64) offset = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.ExtendReg.3" title="function: bits(N) ExtendReg(integer reg, ExtendType type, integer shift)">ExtendReg</a>(m, extend_type, shift);
+if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    boolean is_load_store = memop IN {<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_STORE" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_STORE</a>, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a>};
+    SetNotTagCheckedInstruction(is_load_store &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(64) address;
+bits(datasize) data;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+address = address + offset;
+
+case memop of
+    when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_STORE" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_STORE</a>
+        data = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address, datasize DIV 8, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = data;
+
+    when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a>
+        data = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address, datasize DIV 8, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = data;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaeseq_u8" type="checkbox"><label for="vaeseq_u8"><div>uint8x16_t <b><b>vaeseq_u8</b></b> (uint8x16_t data, uint8x16_t key)<span class="right">AES single round encryption</span></div></label><article>      <h4>Description</h4><p><p class="aml">AES single round encryption.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/aese-aes-single-round-encryption">AESE</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>data &rarr; Vd.16B <br />
+key &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) result;
+result = operand1 EOR operand2;
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESSubBytes.1" title="function: bits(128) AESSubBytes(bits(128) op)">AESSubBytes</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESShiftRows.1" title="function: bits(128) AESShiftRows(bits(128) op)">AESShiftRows</a>(result));
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaesdq_u8" type="checkbox"><label for="vaesdq_u8"><div>uint8x16_t <b><b>vaesdq_u8</b></b> (uint8x16_t data, uint8x16_t key)<span class="right">AES single round decryption</span></div></label><article>      <h4>Description</h4><p><p class="aml">AES single round decryption.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/aesd-aes-single-round-decryption">AESD</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>data &rarr; Vd.16B <br />
+key &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) result;
+result = operand1 EOR operand2;
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESInvSubBytes.1" title="function: bits(128) AESInvSubBytes(bits(128) op)">AESInvSubBytes</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESInvShiftRows.1" title="function: bits(128) AESInvShiftRows(bits(128) op)">AESInvShiftRows</a>(result));
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaesmcq_u8" type="checkbox"><label for="vaesmcq_u8"><div>uint8x16_t <b><b>vaesmcq_u8</b></b> (uint8x16_t data)<span class="right">AES mix columns</span></div></label><article>      <h4>Description</h4><p><p class="aml">AES mix columns.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/aesmc-aes-mix-columns">AESMC</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>data &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) result;
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESMixColumns.1" title="function: bits(128) AESMixColumns(bits (128) op)">AESMixColumns</a>(operand);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaesimcq_u8" type="checkbox"><label for="vaesimcq_u8"><div>uint8x16_t <b><b>vaesimcq_u8</b></b> (uint8x16_t data)<span class="right">AES inverse mix columns</span></div></label><article>      <h4>Description</h4><p><p class="aml">AES inverse mix columns.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/aesimc-aes-inverse-mix-columns">AESIMC</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>data &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) result;
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESInvMixColumns.1" title="function: bits(128) AESInvMixColumns(bits (128) op)">AESInvMixColumns</a>(operand);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1cq_u32" type="checkbox"><label for="vsha1cq_u32"><div>uint32x4_t <b><b>vsha1cq_u32</b></b> (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)<span class="right">SHA1 hash update (choose)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 hash update (choose).</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1c-sha1-hash-update-choose">SHA1C</a> Qd,Sn,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>hash_abcd &rarr; Qd <br />
+hash_e &rarr; Sn <br />
+wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) X = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(32) Y = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];    // Note: 32 not 128 bits wide
+bits(128) W = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(32) t;
+
+for e = 0 to 3
+    t = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHAchoose.3" title="function: bits(32) SHAchoose(bits(32) x, bits(32) y, bits(32) z)">SHAchoose</a>(X&lt;63:32&gt;, X&lt;95:64&gt;, X&lt;127:96&gt;);
+    Y = Y + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;31:0&gt;, 5) + t + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[W, e, 32];
+    X&lt;63:32&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;63:32&gt;, 30);
+    &lt;Y, X&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(Y:X, 32);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = X;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1pq_u32" type="checkbox"><label for="vsha1pq_u32"><div>uint32x4_t <b><b>vsha1pq_u32</b></b> (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)<span class="right">SHA1 hash update (parity)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 hash update (parity).</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1p-sha1-hash-update-parity">SHA1P</a> Qd,Sn,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>hash_abcd &rarr; Qd <br />
+hash_e &rarr; Sn <br />
+wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) X = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(32) Y = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];    // Note: 32 not 128 bits wide
+bits(128) W = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(32) t;
+
+for e = 0 to 3
+    t = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHAparity.3" title="function: bits(32) SHAparity(bits(32) x, bits(32) y, bits(32) z)">SHAparity</a>(X&lt;63:32&gt;, X&lt;95:64&gt;, X&lt;127:96&gt;);
+    Y = Y + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;31:0&gt;, 5) + t + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[W, e, 32];
+    X&lt;63:32&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;63:32&gt;, 30);
+    &lt;Y, X&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(Y:X, 32);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = X;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1mq_u32" type="checkbox"><label for="vsha1mq_u32"><div>uint32x4_t <b><b>vsha1mq_u32</b></b> (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)<span class="right">SHA1 hash update (majority)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 hash update (majority).</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1m-sha1-hash-update-majority">SHA1M</a> Qd,Sn,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>hash_abcd &rarr; Qd <br />
+hash_e &rarr; Sn <br />
+wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) X = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(32) Y = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];    // Note: 32 not 128 bits wide
+bits(128) W = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(32) t;
+
+for e = 0 to 3
+    t = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHAmajority.3" title="function: bits(32) SHAmajority(bits(32) x, bits(32) y, bits(32) z)">SHAmajority</a>(X&lt;63:32&gt;, X&lt;95:64&gt;, X&lt;127:96&gt;);
+    Y = Y + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;31:0&gt;, 5) + t + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[W, e, 32];
+    X&lt;63:32&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;63:32&gt;, 30);
+    &lt;Y, X&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(Y:X, 32);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = X;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1h_u32" type="checkbox"><label for="vsha1h_u32"><div>uint32_t <b><b>vsha1h_u32</b></b> (uint32_t hash_e)<span class="right">SHA1 fixed rotate</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 fixed rotate.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1h-sha1-fixed-rotate">SHA1H</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>hash_e &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(32) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];    // read element [0] only,  [1-3] zeroed
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(operand, 30);</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1su0q_u32" type="checkbox"><label for="vsha1su0q_u32"><div>uint32x4_t <b><b>vsha1su0q_u32</b></b> (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)<span class="right">SHA1 schedule update 0</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 schedule update 0.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1su0-sha1-schedule-update-0">SHA1SU0</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>w0_3 &rarr; Vd.4S <br />
+w4_7 &rarr; Vn.4S <br />
+w8_11 &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128) result;
+
+result = operand2&lt;63:0&gt;:operand1&lt;127:64&gt;;
+result = result EOR operand1 EOR operand3;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1su1q_u32" type="checkbox"><label for="vsha1su1q_u32"><div>uint32x4_t <b><b>vsha1su1q_u32</b></b> (uint32x4_t tw0_3, uint32x4_t w12_15)<span class="right">SHA1 schedule update 1</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 schedule update 1.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1su1-sha1-schedule-update-1">SHA1SU1</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>tw0_3 &rarr; Vd.4S <br />
+w12_15 &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) result;
+bits(128) T = operand1 EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(operand2, 32);
+result&lt;31:0&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;31:0&gt;, 1);
+result&lt;63:32&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;63:32&gt;, 1);
+result&lt;95:64&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;95:64&gt;, 1);
+result&lt;127:96&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;127:96&gt;, 1) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;31:0&gt;, 2);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha256hq_u32" type="checkbox"><label for="vsha256hq_u32"><div>uint32x4_t <b><b>vsha256hq_u32</b></b> (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)<span class="right">SHA256 hash update (part 1)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA256 hash update (part 1).</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha256h-sha256-hash-update-part-1">SHA256H</a> Qd,Qn,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>hash_abcd &rarr; Qd <br />
+hash_efgh &rarr; Qn <br />
+wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) result;
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHA256hash.4" title="function: bits(128) SHA256hash(bits (128) X, bits(128) Y, bits(128) W, boolean part1)">SHA256hash</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d], <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n], <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m], TRUE);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha256h2q_u32" type="checkbox"><label for="vsha256h2q_u32"><div>uint32x4_t <b><b>vsha256h2q_u32</b></b> (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)<span class="right">SHA256 hash update (part 2)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA256 hash update (part 2).</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha256h2-sha256-hash-update-part-2">SHA256H2</a> Qd,Qn,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>hash_efgh &rarr; Qd <br />
+hash_abcd &rarr; Qn <br />
+wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) result;
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHA256hash.4" title="function: bits(128) SHA256hash(bits (128) X, bits(128) Y, bits(128) W, boolean part1)">SHA256hash</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n], <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d], <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m], FALSE);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha256su0q_u32" type="checkbox"><label for="vsha256su0q_u32"><div>uint32x4_t <b><b>vsha256su0q_u32</b></b> (uint32x4_t w0_3, uint32x4_t w4_7)<span class="right">SHA256 schedule update 0</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA256 schedule update 0.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha256su0-sha256-schedule-update-0">SHA256SU0</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>w0_3 &rarr; Vd.4S <br />
+w4_7 &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) result;
+bits(128) T = operand2&lt;31:0&gt;:operand1&lt;127:32&gt;;
+bits(32) elt;
+
+for e = 0 to 3
+    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T, e, 32];
+    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 7) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 18) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(elt, 3);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = elt + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 32];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha256su1q_u32" type="checkbox"><label for="vsha256su1q_u32"><div>uint32x4_t <b><b>vsha256su1q_u32</b></b> (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)<span class="right">SHA256 schedule update 1</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA256 schedule update 1.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha256su1-sha256-schedule-update-1">SHA256SU1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>tw0_3 &rarr; Vd.4S <br />
+w8_11 &rarr; Vn.4S <br />
+w12_15 &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128) result;
+bits(128) T0 = operand3&lt;31:0&gt;:operand2&lt;127:32&gt;;
+bits(64) T1;
+bits(32) elt;
+
+T1 = operand3&lt;127:64&gt;;
+for e = 0 to 1
+    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T1, e, 32];
+    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 17) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 19) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(elt, 10);
+    elt = elt + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 32] + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T0, e, 32];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = elt;
+
+T1 = result&lt;63:0&gt;;
+for e = 2 to 3
+    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T1, e-2, 32];
+    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 17) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 19) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(elt, 10);
+    elt = elt + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 32] + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T0, e, 32];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = elt;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_p64" type="checkbox"><label for="vmull_p64"><div>poly128_t <b><b>vmull_p64</b></b> (poly64_t a, poly64_t b)<span class="right">Polynomial multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmull-pmull2-polynomial-multiply-long">PMULL</a> Vd.1Q,Vn.1D,Vm.1D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.1D <br />
+b &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_p64" type="checkbox"><label for="vmull_high_p64"><div>poly128_t <b><b>vmull_high_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Polynomial multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmull-pmull2-polynomial-multiply-long">PMULL2</a> Vd.1Q,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32b" type="checkbox"><label for="__crc32b"><div>uint32_t <b><b>__crc32b</b></b> (uint32_t a, uint8_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32b-crc32h-crc32w-crc32x-crc32-checksum">CRC32B</a> Wd,Wn,Wm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x04C11DB7&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32h" type="checkbox"><label for="__crc32h"><div>uint32_t <b><b>__crc32h</b></b> (uint32_t a, uint16_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32b-crc32h-crc32w-crc32x-crc32-checksum">CRC32H</a> Wd,Wn,Wm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x04C11DB7&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32w" type="checkbox"><label for="__crc32w"><div>uint32_t <b><b>__crc32w</b></b> (uint32_t a, uint32_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32b-crc32h-crc32w-crc32x-crc32-checksum">CRC32W</a> Wd,Wn,Wm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x04C11DB7&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32d" type="checkbox"><label for="__crc32d"><div>uint32_t <b><b>__crc32d</b></b> (uint32_t a, uint64_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32b-crc32h-crc32w-crc32x-crc32-checksum">CRC32X</a> Wd,Wn,Xm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Xm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x04C11DB7&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32cb" type="checkbox"><label for="__crc32cb"><div>uint32_t <b><b>__crc32cb</b></b> (uint32_t a, uint8_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32cb-crc32ch-crc32cw-crc32cx-crc32c-checksum">CRC32CB</a> Wd,Wn,Wm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x1EDC6F41&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32ch" type="checkbox"><label for="__crc32ch"><div>uint32_t <b><b>__crc32ch</b></b> (uint32_t a, uint16_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32cb-crc32ch-crc32cw-crc32cx-crc32c-checksum">CRC32CH</a> Wd,Wn,Wm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x1EDC6F41&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32cw" type="checkbox"><label for="__crc32cw"><div>uint32_t <b><b>__crc32cw</b></b> (uint32_t a, uint32_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32cb-crc32ch-crc32cw-crc32cx-crc32c-checksum">CRC32CW</a> Wd,Wn,Wm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x1EDC6F41&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32cd" type="checkbox"><label for="__crc32cd"><div>uint32_t <b><b>__crc32cd</b></b> (uint32_t a, uint64_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32cb-crc32ch-crc32cw-crc32cx-crc32c-checksum">CRC32CX</a> Wd,Wn,Xm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Xm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x1EDC6F41&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div>
+</section>
+            
+        </div>
+    </div>    
+</div>
+
+</div>
+<!-- END ProductItemContent -->
+
+</main>
+
+
+<footer class="c-footer u-no-print" id="footer">
+
+    <!-- START Newsletter -->
+    <div class="c-footer__newsletter">
+        <div class="row">
+            <div class="columns">
+                <!-- START NewsLetterHorizontal -->
+<div class="c-widget c-newsletter-widget is-horizontal">
+    
+    
+    <!-- Newsletter form -->
+    <div class="row">
+        <div class="large-4 columns">
+            <h4 class="c-newsletter-widget__title">Stay Informed</h4>
+            <span class="c-newsletter-widget__description">Sign up for news and updates.</span>
+        </div>
+        <div class="large-8 columns">
+            <div class="row">
+                <form class="js-newsletter__form" data-abide>
+                    <div class="large-3 columns">
+                        <label for="news_sign_first_name" class="hide">First Name</label>
+                        <input type="text" name="First Name" id="news_sign_first_id" class="input" placeholder="First Name" required />
+                        <small class="error">Please enter your first name.</small>
+                    </div>
+                    <div class="large-3 columns">
+                        <label for="news_sign_last_name" class="hide">Last Name</label>
+                        <input type="text" name="Last Name" id="news_sign_last_id" class="input" placeholder="Last Name" required />
+                        <small class="error">Please enter your last name.</small>
+                    </div>
+                    <div class="large-4 columns">
+                        <label for="news_sign_email_address" class="hide">E-Mail</label>
+                        <input type="email" name="Email Address" id="news_sign_email_address_id" class="input" placeholder="E-Mail" required />
+                        <small class="error">Please enter your e-mail.</small>
+                    </div>
+                    <div class="large-2 columns end">
+                        <input type="submit" value="Sign up" class="c-button" style="margin-bottom: 0; margin-top: 0;" />
+                    </div>
+                </form>
+            </div>
+        </div>
+    </div>
+    <!-- END newsletter form -->
+
+    <!-- START newsletter modal window -->
+    <div class="reveal-modal medium" id="newsletterModal" data-reveal aria-hidden="true" role="dialog">
+        <div class="modal-hider">
+            <iframe width="100%" height="100%" data-src="/forms/newsletter-signup"></iframe>
+        </div>
+        <p>
+            <a class="close-reveal-modal" aria-label="Close">&#215;</a>
+        </p>
+    </div>
+    <!-- END newsletter modal window -->
+
+</div>
+<!-- End NewsLetterHorizontal -->
+
+            </div>
+        </div>
+    </div>
+    <!-- END Newsletter -->
+
+    <!-- START Internal Footer -->
+    <div class="c-footer__internal">
+        <div class="row small-text-center large-text-left">
+            <div class="large-3 columns spacing-3 pushing-3">
+                <!-- START Footer Section -->
+            <h3 class="c-footer-section__title">
+Arm Developer            </h3>
+            <div class="row">
+                <div class="columns">
+                    <ul class="o-list c-footer-section__list">
+                            <li><a class="c-footer-section__link" href="/embedded" title="Embedded Software Developers">Embedded Software Developers</a></li>
+    <li><a class="c-footer-section__link" href="/open-source" title="Linux and Open Source">Linux and Open Source</a></li>
+    <li><a class="c-footer-section__link" href="https://www.arm.com/resources/education" title="Education">Education</a></li>
+    <li><a class="c-footer-section__link" href="https://www.arm.com/resources/research" title="Research">Research</a></li>
+    <li><a class="c-footer-section__link" href="/graphics" title="Graphics and Multimedia Development">Graphics and Multimedia Development</a></li>
+    <li><a class="c-footer-section__link" href="/soc" title="SoC Design">SoC Design</a></li>
+    <li><a class="c-footer-section__link" href="/hpc" title="High Performance Computing">High Performance Computing</a></li>
+
+
+                    </ul>
+                </div>
+            </div>
+            <h3 class="c-footer-section__title">
+                    <a href="/products/architecture" title="Architecture" class="home">Architecture</a>
+            </h3>
+            <div class="row">
+                <div class="columns">
+                    <ul class="o-list c-footer-section__list">
+                        
+        <li><a class="c-footer-section__link" href="/products/architecture/cpu-architecture" title="CPU Architecture">CPU Architecture</a></li>
+        <li><a class="c-footer-section__link" href="/products/architecture/system-architectures" title="System Architectures">System Architectures</a></li>
+        <li><a class="c-footer-section__link" href="/products/architecture/security-architectures" title="Security Architectures">Security Architectures</a></li>
+        <li><a class="c-footer-section__link" href="/products/architecture/instruction-sets" title="Instruction Sets">Instruction Sets</a></li>
+        <li><a class="c-footer-section__link" href="/products/architecture/platform-design" title="Platform Design">Platform Design</a></li>
+        <li><a class="c-footer-section__link" href="/products/architecture/reference-library" title="Reference Library">Reference Library</a></li>
+
+                    </ul>
+                </div>
+            </div>
+<!-- END Footer Section -->
+
+            </div>
+            <div class="large-offset-1 large-8 columns spacing-3 pushing-3">
+                <!-- START Internal Right -->
+
+
+            <h3 class="c-footer-section__title">
+                    <a href="/products" title="IP Products" class="home">IP Products</a>
+            </h3>
+<div class="row">                <div class="large-4 medium-6 columns left">
+                    <ul class="o-list c-footer-section__list">
+                        <li>
+
+                            <a href="/products/processors" title="Processors">
+                                    <strong class="c-footer-section__subtitle">Processors</strong>
+                            </a>
+                        </li>
+                        
+        <li><a class="c-footer-section__link" href="/products/processors/cortex-a" title="Cortex-A">Cortex-A</a></li>
+        <li><a class="c-footer-section__link" href="/products/processors/cortex-r" title="Cortex-R">Cortex-R</a></li>
+        <li><a class="c-footer-section__link" href="/products/processors/cortex-m" title="Cortex-M">Cortex-M</a></li>
+        <li><a class="c-footer-section__link" href="/products/processors/classic-processors" title="Classic Processors">Classic Processors</a></li>
+        <li><a class="c-footer-section__link" href="/products/processors/machine-learning" title="Machine Learning">Machine Learning</a></li>
+
+                    </ul>
+                </div>
+                <div class="large-4 medium-6 columns left">
+                    <ul class="o-list c-footer-section__list">
+                        <li>
+
+                            <a href="/products/physical-ip" title="Physical IP">
+                                    <strong class="c-footer-section__subtitle">Physical IP</strong>
+                            </a>
+                        </li>
+                        
+        <li><a class="c-footer-section__link" href="/products/physical-ip/logic-ip" title="Logic IP">Logic IP</a></li>
+        <li><a class="c-footer-section__link" href="/products/physical-ip/memory-compilers" title="Memory Compilers">Memory Compilers</a></li>
+        <li><a class="c-footer-section__link" href="/products/physical-ip/interface-ip" title="Interface IP">Interface IP</a></li>
+        <li><a class="c-footer-section__link" href="/products/physical-ip/pop-ip" title="POP IP">POP IP</a></li>
+
+                    </ul>
+                </div>
+                <div class="large-4 medium-6 columns left">
+                    <ul class="o-list c-footer-section__list">
+                        <li>
+
+                            <a href="/products/system-ip" title="System IP">
+                                    <strong class="c-footer-section__subtitle">System IP</strong>
+                            </a>
+                        </li>
+                        
+        <li><a class="c-footer-section__link" href="/products/system-ip/free-system-ip-whitepapers" title="Free System IP Whitepapers">Free System IP Whitepapers</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-ip/corelink-interconnect" title="CoreLink Interconnect">CoreLink Interconnect</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-ip/coresight-debug-and-trace" title="CoreSight Debug and Trace">CoreSight Debug and Trace</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-ip/socrates-system-builder" title="Socrates System Builder">Socrates System Builder</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-ip/memory-controllers" title="CoreLink Memory Controllers">CoreLink Memory Controllers</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-ip/system-controllers" title="CoreLink System Controllers">CoreLink System Controllers</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-ip/security-ip" title="Security IP">Security IP</a></li>
+
+                    </ul>
+                </div>
+</div><div class="row">                <div class="large-4 medium-6 columns left">
+                    <ul class="o-list c-footer-section__list">
+                        <li>
+
+                            <a href="/products/graphics-and-multimedia" title="Graphics and Multimedia Processors">
+                                    <strong class="c-footer-section__subtitle">Graphics and Multimedia Processors</strong>
+                            </a>
+                        </li>
+                        
+        <li><a class="c-footer-section__link" href="/products/graphics-and-multimedia/mali-gpus" title="Mali GPUs">Mali GPUs</a></li>
+        <li><a class="c-footer-section__link" href="/products/graphics-and-multimedia/mali-video-processors" title="Mali Video Processors">Mali Video Processors</a></li>
+        <li><a class="c-footer-section__link" href="/products/graphics-and-multimedia/mali-display-processors" title="Mali Display Processors">Mali Display Processors</a></li>
+        <li><a class="c-footer-section__link" href="/products/graphics-and-multimedia/assertive-display" title="Assertive Display">Assertive Display</a></li>
+
+                    </ul>
+                </div>
+                <div class="large-4 medium-6 columns left">
+                    <ul class="o-list c-footer-section__list">
+                        <li>
+
+                            <a href="/products/system-design" title="System Design Tools">
+                                    <strong class="c-footer-section__subtitle">System Design Tools</strong>
+                            </a>
+                        </li>
+                        
+        <li><a class="c-footer-section__link" href="/products/system-design/subsystems" title="Subsystems">Subsystems</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-design/corstone-foundation-ip" title="Corstone Foundation IP">Corstone Foundation IP</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-design/system-guidance" title="System Guidance">System Guidance</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-design/fast-models" title="Fast Models">Fast Models</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-design/cycle-models" title="Cycle Models">Cycle Models</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-design/development-boards" title="Development Boards">Development Boards</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-design/fixed-virtual-platforms" title="Fixed Virtual Platforms">Fixed Virtual Platforms</a></li>
+
+                    </ul>
+                </div>
+                <div class="large-4 medium-6 columns left">
+                    <ul class="o-list c-footer-section__list">
+                        <li>
+
+                            <a href="/products/software-development-tools" title="Software Tools">
+                                    <strong class="c-footer-section__subtitle">Software Tools</strong>
+                            </a>
+                        </li>
+                        
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/arm-development-studio" title="Arm Development Studio">Arm Development Studio</a></li>
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/graphics-development-tools" title="Graphics Development Tools">Graphics Development Tools</a></li>
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/buy" title="Buy">Buy</a></li>
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/compilers" title="Compilers">Compilers</a></li>
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/hpc" title="HPC">HPC</a></li>
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/keil-mdk" title="Keil MDK">Keil MDK</a></li>
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/debug-probes-and-adapters" title="Debug Probes and Adapters">Debug Probes and Adapters</a></li>
+
+                    </ul>
+                </div>
+</div><!-- END Internal Right -->
+
+            </div>
+        </div>
+    </div>
+    <!-- END Internal Footer -->
+
+    <!-- START External Footer -->
+    <div class="c-footer__external">
+
+        <!-- START External links -->
+        <div class="row small-text-center large-text-left">
+            <div class="large-4 spacing-3 columns">
+                <!-- START Footer Section -->
+            <h3 class="c-footer-section__title">
+Arm Corporate            </h3>
+            <div class="row">
+                <div class="columns">
+                    <ul class="o-list c-footer-section__list">
+                            <li><a class="c-footer-section__link" href="https://www.arm.com/" title="arm.com">arm.com</a></li>
+    <li><a class="c-footer-section__link" href="https://www.arm.com/company" title="Company Profile">Company Profile</a></li>
+    <li><a class="c-footer-section__link" href="https://www.arm.com/company/careers" title="Careers">Careers</a></li>
+    <li><a class="c-footer-section__link" href="https://www.arm.com/company/news" title="Newsroom">Newsroom</a></li>
+    <li><a class="c-footer-section__link" href="https://www.arm.com/company/offices" title="Our Offices">Our Offices</a></li>
+
+
+                    </ul>
+                </div>
+            </div>
+<!-- END Footer Section -->
+
+            </div>
+            <!-- START External Right -->
+        <div class="large-4 spacing-3 columns">
+                <h3 class="c-footer-section__title">
+More                </h3>
+                <div class="row">
+                    <div class="columns">
+                        <ul class="o-list c-footer-section__list">
+                                <li><a class="c-footer-section__link" href="http://community.arm.com/" title="Arm Community">Arm Community</a></li>
+    <li><a class="c-footer-section__link" href="/support" title="Support">Support</a></li>
+    <li><a class="c-footer-section__link" href="/products/designstart" title="DesignStart">DesignStart</a></li>
+    <li><a class="c-footer-section__link" href="http://www.keil.com/" title="Keil Tools">Keil Tools</a></li>
+    <li><a class="c-footer-section__link" href="/graphics" title="Mali Developer">Mali Developer</a></li>
+
+
+                        </ul>
+                    </div>
+                </div>
+        </div>
+        <div class="large-4 spacing-3 columns">
+                <h3 class="c-footer-section__title">
+Social                </h3>
+                <div class="row">
+                    <div class="columns">
+                        <ul class="o-list c-footer-section__list">
+                                <li><a class="c-footer-section__link" href="https://www.facebook.com/pg/ARM-71946799587/" title="Facebook">Facebook</a></li>
+    <li><a class="c-footer-section__link" href="http://www.linkedin.com/company/Arm" title="LinkedIn">LinkedIn</a></li>
+    <li><a class="c-footer-section__link" href="https://twitter.com/Arm" title="Twitter">Twitter</a></li>
+    <li><a class="c-footer-section__link" href="https://www.youtube.com/user/Armflix" title="YouTube">YouTube</a></li>
+    <li><a class="c-footer-section__link" href="http://i.youku.com/armchina" title="优酷 (YouKu)">优酷 (YouKu)</a></li>
+    <li><a class="c-footer-section__link" href="http://weibo.com/armcn" title="@Arm中国 (Arm Sina)">@Arm中国 (Arm Sina)</a></li>
+
+
+                        </ul>
+                    </div>
+                </div>
+        </div>
+<!-- END External Right -->
+
+        </div>
+        <!-- END External Links -->
+
+        <!-- START Trademark Statement -->
+        <div class="row small-text-center large-text-left">
+            <div class="columns spacing-1">
+                <div class="c-footer__trademark">
+                    AMBA, Arm, Arm7, Arm9, Arm11, Artisan, big.LITTLE, Cordio, CoreLink, CoreSight,
+Cortex, DesignStart, Jazelle, Keil, Mali, Mbed, NEON, POP, SecurCore, Socrates,
+Thumb, TrustZone, ULINK, &#181;Vision, Versatile are trademarks or registered trademarks
+of Arm Limited (or its subsidiaries) in the US and/or elsewhere.
+The related technology may be protected by any or all of patents, copyrights,
+designs and trade secrets. All rights reserved. All other brands or product 
+names are the property of their respective holders. <a href="http://www.arm.com/about/trademarks/">Click here for further details</a>.
+                </div>
+            </div>
+        </div>
+        <!-- END Trademark Statement -->
+
+        <!-- START Legal -->
+        <div class="c-legal row small-text-center large-text-left" role="contentinfo">
+            <div class="large-1 spacing-3 columns">
+                <div class="c-legal__logo"></div>
+            </div>
+            <div class="large-8 spacing-3 columns">
+                    <p class="c-legal__links">
+
+                <a href="https://www.arm.com/company/policies/cookies">Cookie Policy</a>
+ |                <a href="https://www.arm.com/company/policies/terms-and-conditions">Terms of Use</a>
+ |                <a href="https://www.arm.com/company/policies/privacy">Privacy Policy</a>
+ |                <a href="https://www.arm.com/company/policies/accessibility">Accessibility</a>
+ |                <a href="https://login.arm.com/subscriptions.php">Subscription Center</a>
+ |                <a href="https://www.arm.com/company/policies/trademarks">Trademarks</a>
+        <br class="hide-for-large-up" />
+    </p>
+
+            </div>
+            <div class="large-3 spacing-3 columns">
+                <p class="c-legal__copyright">Copyright &#169; 1995-2018 Arm Limited (or its affiliates). All rights reserved. </p>
+            </div>
+        </div>
+        <!-- END Legal -->
+
+    </div>
+    <!-- END External Footer -->
+
+</footer>
+
+
+
+    <div class="c-component c-policies u-no-print" role="contentinfo">
+            <div class="c-component c-policy c-cookie-policy js-policy" data-key="com.arm.accepted.cookie" data-updated="01/02/2018 16:28:25" data-iscookiepolicy="true" title="Cookie Policy" role="alert" style="display: none;">
+        <div class="row">
+            <div class="small-12 large-9 small-text-center large-text-left columns">
+                <p>Important Information for the Arm website. This site uses cookies to store information on your computer. By continuing to use our site, you consent to our cookies. If you are not happy with the use of these cookies, please review our <a class="cookie-link" target="_blank" href="http://www.arm.com/about/cookie_policy.php" title="Cookie Policy">Cookie Policy</a> to learn how they can be disabled. By disabling cookies, some features of the site will not work.</p>
+            </div>
+            <div class="small-12 large-3 text-center columns">
+                <a class="c-button c-policy__accept-button js-accept-policy" tabindex="1" title="Accept and hide this message ">Accept and hide this message  <i class="fa fa-times"></i></a>
+            </div>
+        </div>
+    </div>
+
+        
+    </div>
+
+<script type="text/javascript" src="https://nebula-cdn.kampyle.com/we/8144/onsite/embed.js"></script>
+
+
+    
+
+<script src="/bundles/modernizr?v=inCVuEFe6J4Q07A0AcRsbJic_UE5MwpRMNGcOtk94TE1"></script>
+
+
+
+<script type="text/javascript">
+    if (Modernizr && !Modernizr.svg) {
+        var imgs = document.getElementsByTagName('img');
+        var svgExtension = /.*\.svg$/;
+        var l = imgs.length;
+        for (var i = 0; i < l; i++) {
+            if (imgs[i].src.match(svgExtension)) {
+                imgs[i].src = imgs[i].src.slice(0, -3) + 'png';
+            }
+        }
+    }
+</script>
+
+
+<script src="/shared/vendor/jquery-1.12.4.min.js"></script>
+<script src="/shared/vendor/foundation.min.js"></script>
+<script src="/shared/vendor/moment.min.js"></script>
+<script src="/shared/vendor/js/jquery-rss/src/jquery.rss.js"></script>
+
+<script src="/bundles/clipboard?v=IPc2U7tMxf_2TKh6_qbfzIsYI3pmBbWZxHb5M8V-fhg1"></script>
+
+<script src="/bundles/placeholder?v=Aw-bm4sJPSuBeTzPpRw_GfXYXI4wKmH607vgMic22c01"></script>
+
+<script src="/bundles/waypoints?v=E5Sm2NPVxzLqGyd5lIz-NjBvArn4w7w7IvCs35wz6dA1"></script>
+
+
+
+<script src="/shared/developer.arm.com/js/common.js?v=09142182FF441DC932039AB1D8CD216F"></script>
+<script src="/shared/developer.arm.com/js/app.bundle.js?v=09142182FF441DC932039AB1D8CD216F"></script>
+
+
+<script src="/shared/arm.com-new/js/app.constants.js?v=09142182FF441DC932039AB1D8CD216F"></script>
+<script src="/shared/arm.com-new/js/app.navigation.js?v=09142182FF441DC932039AB1D8CD216F"></script>
+<script type="text/javascript">
+    (function() {
+        var $userMenu = $('.c-user-menu__root');
+        if ($userMenu) {
+            $userMenu.navigation();
+        }
+    })();
+</script>
+
+
+
+<script src="/bundles/jquery-ui?v=atr-jO-t-9RdxuVusckf7yNy0MEEBlVW5TaJCAetR6A1"></script>
+
+<script src="/bundles/jqueryval?v=shBfM8gvrYJt6eNs9xKMaOYfzyGdVGLhvPUMJ92MwmM1"></script>
+
+<script src="/sitecore%20modules/Web/Web%20Forms%20for%20Marketers/mvc/wffm.min.js"></script>
+<script>
+  $(document).ready(function() {
+    $("form[data-wffm]").each(function() { $(this).wffmForm(); });
+  });
+</script>
+
+<link rel="stylesheet" type="text/css" href="//fast.fonts.net/t/1.css?apiType=css&projectid=5616bfa5-8ba9-4061-8e15-3a2d29551ced" />
+
+
+<script src="//munchkin.marketo.net/munchkin.js" type="text/javascript"></script>
+<script type="text/javascript">
+    Munchkin.init('312-SAX-488', {'asyncOnly': true});
+</script>
+
+
+
+    
+    
+    
+</body>
+</html>
diff --git a/library/stdarch/crates/stdarch-verify/build.rs b/library/stdarch/crates/stdarch-verify/build.rs
new file mode 100644
index 00000000000..c0dc81b6a61
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/build.rs
@@ -0,0 +1,28 @@
+use std::path::Path;
+
+fn main() {
+    let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
+    let root = dir.parent().unwrap();
+    eprintln!("root: {}", root.display());
+    walk(&root.join("core_arch/src/x86"));
+    walk(&root.join("core_arch/src/x86_64"));
+    walk(&root.join("core_arch/src/arm"));
+    walk(&root.join("core_arch/src/aarch64"));
+}
+
+fn walk(root: &Path) {
+    for file in root.read_dir().unwrap() {
+        eprintln!("root: {}", root.display());
+        let file = file.unwrap();
+        if file.file_type().unwrap().is_dir() {
+            walk(&file.path());
+            continue;
+        }
+        let path = file.path();
+        if path.extension().and_then(|s| s.to_str()) != Some("rs") {
+            continue;
+        }
+
+        println!("cargo:rerun-if-changed={}", path.display());
+    }
+}
diff --git a/library/stdarch/crates/stdarch-verify/mips-msa.h b/library/stdarch/crates/stdarch-verify/mips-msa.h
new file mode 100644
index 00000000000..881f1918f6b
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/mips-msa.h
@@ -0,0 +1,707 @@
+v16i8 __builtin_msa_add_a_b (v16i8, v16i8);
+v8i16 __builtin_msa_add_a_h (v8i16, v8i16);
+v4i32 __builtin_msa_add_a_w (v4i32, v4i32);
+v2i64 __builtin_msa_add_a_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_adds_a_b (v16i8, v16i8);
+v8i16 __builtin_msa_adds_a_h (v8i16, v8i16);
+v4i32 __builtin_msa_adds_a_w (v4i32, v4i32);
+v2i64 __builtin_msa_adds_a_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_adds_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_adds_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_adds_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_adds_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_adds_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_adds_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_adds_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_adds_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_addv_b (v16i8, v16i8);
+v8i16 __builtin_msa_addv_h (v8i16, v8i16);
+v4i32 __builtin_msa_addv_w (v4i32, v4i32);
+v2i64 __builtin_msa_addv_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_addvi_b (v16i8, imm0_31);
+v8i16 __builtin_msa_addvi_h (v8i16, imm0_31);
+v4i32 __builtin_msa_addvi_w (v4i32, imm0_31);
+v2i64 __builtin_msa_addvi_d (v2i64, imm0_31);
+
+v16u8 __builtin_msa_and_v (v16u8, v16u8);
+
+v16u8 __builtin_msa_andi_b (v16u8, imm0_255);
+
+v16i8 __builtin_msa_asub_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_asub_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_asub_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_asub_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_asub_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_asub_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_asub_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_asub_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_ave_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_ave_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_ave_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_ave_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_ave_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_ave_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_ave_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_ave_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_aver_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_aver_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_aver_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_aver_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_aver_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_aver_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_aver_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_aver_u_d (v2u64, v2u64);
+
+v16u8 __builtin_msa_bclr_b (v16u8, v16u8);
+v8u16 __builtin_msa_bclr_h (v8u16, v8u16);
+v4u32 __builtin_msa_bclr_w (v4u32, v4u32);
+v2u64 __builtin_msa_bclr_d (v2u64, v2u64);
+
+v16u8 __builtin_msa_bclri_b (v16u8, imm0_7);
+v8u16 __builtin_msa_bclri_h (v8u16, imm0_15);
+v4u32 __builtin_msa_bclri_w (v4u32, imm0_31);
+v2u64 __builtin_msa_bclri_d (v2u64, imm0_63);
+
+v16u8 __builtin_msa_binsl_b (v16u8, v16u8, v16u8);
+v8u16 __builtin_msa_binsl_h (v8u16, v8u16, v8u16);
+v4u32 __builtin_msa_binsl_w (v4u32, v4u32, v4u32);
+v2u64 __builtin_msa_binsl_d (v2u64, v2u64, v2u64);
+
+v16u8 __builtin_msa_binsli_b (v16u8, v16u8, imm0_7);
+v8u16 __builtin_msa_binsli_h (v8u16, v8u16, imm0_15);
+v4u32 __builtin_msa_binsli_w (v4u32, v4u32, imm0_31);
+v2u64 __builtin_msa_binsli_d (v2u64, v2u64, imm0_63);
+
+v16u8 __builtin_msa_binsr_b (v16u8, v16u8, v16u8);
+v8u16 __builtin_msa_binsr_h (v8u16, v8u16, v8u16);
+v4u32 __builtin_msa_binsr_w (v4u32, v4u32, v4u32);
+v2u64 __builtin_msa_binsr_d (v2u64, v2u64, v2u64);
+
+v16u8 __builtin_msa_binsri_b (v16u8, v16u8, imm0_7);
+v8u16 __builtin_msa_binsri_h (v8u16, v8u16, imm0_15);
+v4u32 __builtin_msa_binsri_w (v4u32, v4u32, imm0_31);
+v2u64 __builtin_msa_binsri_d (v2u64, v2u64, imm0_63);
+
+v16u8 __builtin_msa_bmnz_v (v16u8, v16u8, v16u8);
+
+v16u8 __builtin_msa_bmnzi_b (v16u8, v16u8, imm0_255);
+
+v16u8 __builtin_msa_bmz_v (v16u8, v16u8, v16u8);
+
+v16u8 __builtin_msa_bmzi_b (v16u8, v16u8, imm0_255);
+
+v16u8 __builtin_msa_bneg_b (v16u8, v16u8);
+v8u16 __builtin_msa_bneg_h (v8u16, v8u16);
+v4u32 __builtin_msa_bneg_w (v4u32, v4u32);
+v2u64 __builtin_msa_bneg_d (v2u64, v2u64);
+
+v16u8 __builtin_msa_bnegi_b (v16u8, imm0_7);
+v8u16 __builtin_msa_bnegi_h (v8u16, imm0_15);
+v4u32 __builtin_msa_bnegi_w (v4u32, imm0_31);
+v2u64 __builtin_msa_bnegi_d (v2u64, imm0_63);
+
+i32 __builtin_msa_bnz_b (v16u8);
+i32 __builtin_msa_bnz_h (v8u16);
+i32 __builtin_msa_bnz_w (v4u32);
+i32 __builtin_msa_bnz_d (v2u64);
+
+i32 __builtin_msa_bnz_v (v16u8);
+
+v16u8 __builtin_msa_bsel_v (v16u8, v16u8, v16u8);
+
+v16u8 __builtin_msa_bseli_b (v16u8, v16u8, imm0_255);
+
+v16u8 __builtin_msa_bset_b (v16u8, v16u8);
+v8u16 __builtin_msa_bset_h (v8u16, v8u16);
+v4u32 __builtin_msa_bset_w (v4u32, v4u32);
+v2u64 __builtin_msa_bset_d (v2u64, v2u64);
+
+v16u8 __builtin_msa_bseti_b (v16u8, imm0_7);
+v8u16 __builtin_msa_bseti_h (v8u16, imm0_15);
+v4u32 __builtin_msa_bseti_w (v4u32, imm0_31);
+v2u64 __builtin_msa_bseti_d (v2u64, imm0_63);
+
+i32 __builtin_msa_bz_b (v16u8);
+i32 __builtin_msa_bz_h (v8u16);
+i32 __builtin_msa_bz_w (v4u32);
+i32 __builtin_msa_bz_d (v2u64);
+
+i32 __builtin_msa_bz_v (v16u8);
+
+v16i8 __builtin_msa_ceq_b (v16i8, v16i8);
+v8i16 __builtin_msa_ceq_h (v8i16, v8i16);
+v4i32 __builtin_msa_ceq_w (v4i32, v4i32);
+v2i64 __builtin_msa_ceq_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_ceqi_b (v16i8, imm_n16_15);
+v8i16 __builtin_msa_ceqi_h (v8i16, imm_n16_15);
+v4i32 __builtin_msa_ceqi_w (v4i32, imm_n16_15);
+v2i64 __builtin_msa_ceqi_d (v2i64, imm_n16_15);
+
+i32 __builtin_msa_cfcmsa (imm0_31);
+
+v16i8 __builtin_msa_cle_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_cle_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_cle_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_cle_s_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_cle_u_b (v16u8, v16u8);
+v8i16 __builtin_msa_cle_u_h (v8u16, v8u16);
+v4i32 __builtin_msa_cle_u_w (v4u32, v4u32);
+v2i64 __builtin_msa_cle_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_clei_s_b (v16i8, imm_n16_15);
+v8i16 __builtin_msa_clei_s_h (v8i16, imm_n16_15);
+v4i32 __builtin_msa_clei_s_w (v4i32, imm_n16_15);
+v2i64 __builtin_msa_clei_s_d (v2i64, imm_n16_15);
+
+v16i8 __builtin_msa_clei_u_b (v16u8, imm0_31);
+v8i16 __builtin_msa_clei_u_h (v8u16, imm0_31);
+v4i32 __builtin_msa_clei_u_w (v4u32, imm0_31);
+v2i64 __builtin_msa_clei_u_d (v2u64, imm0_31);
+
+v16i8 __builtin_msa_clt_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_clt_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_clt_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_clt_s_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_clt_u_b (v16u8, v16u8);
+v8i16 __builtin_msa_clt_u_h (v8u16, v8u16);
+v4i32 __builtin_msa_clt_u_w (v4u32, v4u32);
+v2i64 __builtin_msa_clt_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_clti_s_b (v16i8, imm_n16_15);
+v8i16 __builtin_msa_clti_s_h (v8i16, imm_n16_15);
+v4i32 __builtin_msa_clti_s_w (v4i32, imm_n16_15);
+v2i64 __builtin_msa_clti_s_d (v2i64, imm_n16_15);
+
+v16i8 __builtin_msa_clti_u_b (v16u8, imm0_31);
+v8i16 __builtin_msa_clti_u_h (v8u16, imm0_31);
+v4i32 __builtin_msa_clti_u_w (v4u32, imm0_31);
+v2i64 __builtin_msa_clti_u_d (v2u64, imm0_31);
+
+i32 __builtin_msa_copy_s_b (v16i8, imm0_15);
+i32 __builtin_msa_copy_s_h (v8i16, imm0_7);
+i32 __builtin_msa_copy_s_w (v4i32, imm0_3);
+i64 __builtin_msa_copy_s_d (v2i64, imm0_1);
+
+u32 __builtin_msa_copy_u_b (v16i8, imm0_15);
+u32 __builtin_msa_copy_u_h (v8i16, imm0_7);
+u32 __builtin_msa_copy_u_w (v4i32, imm0_3);
+u64 __builtin_msa_copy_u_d (v2i64, imm0_1);
+
+void __builtin_msa_ctcmsa (imm0_31, i32);
+
+v16i8 __builtin_msa_div_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_div_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_div_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_div_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_div_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_div_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_div_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_div_u_d (v2u64, v2u64);
+
+v8i16 __builtin_msa_dotp_s_h (v16i8, v16i8);
+v4i32 __builtin_msa_dotp_s_w (v8i16, v8i16);
+v2i64 __builtin_msa_dotp_s_d (v4i32, v4i32);
+
+v8u16 __builtin_msa_dotp_u_h (v16u8, v16u8);
+v4u32 __builtin_msa_dotp_u_w (v8u16, v8u16);
+v2u64 __builtin_msa_dotp_u_d (v4u32, v4u32);
+
+v8i16 __builtin_msa_dpadd_s_h (v8i16, v16i8, v16i8);
+v4i32 __builtin_msa_dpadd_s_w (v4i32, v8i16, v8i16);
+v2i64 __builtin_msa_dpadd_s_d (v2i64, v4i32, v4i32);
+
+v8u16 __builtin_msa_dpadd_u_h (v8u16, v16u8, v16u8);
+v4u32 __builtin_msa_dpadd_u_w (v4u32, v8u16, v8u16);
+v2u64 __builtin_msa_dpadd_u_d (v2u64, v4u32, v4u32);
+
+v8i16 __builtin_msa_dpsub_s_h (v8i16, v16i8, v16i8);
+v4i32 __builtin_msa_dpsub_s_w (v4i32, v8i16, v8i16);
+v2i64 __builtin_msa_dpsub_s_d (v2i64, v4i32, v4i32);
+
+v8i16 __builtin_msa_dpsub_u_h (v8i16, v16u8, v16u8);
+v4i32 __builtin_msa_dpsub_u_w (v4i32, v8u16, v8u16);
+v2i64 __builtin_msa_dpsub_u_d (v2i64, v4u32, v4u32);
+
+v4f32 __builtin_msa_fadd_w (v4f32, v4f32);
+v2f64 __builtin_msa_fadd_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcaf_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcaf_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fceq_w (v4f32, v4f32);
+v2i64 __builtin_msa_fceq_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fclass_w (v4f32);
+v2i64 __builtin_msa_fclass_d (v2f64);
+
+v4i32 __builtin_msa_fcle_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcle_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fclt_w (v4f32, v4f32);
+v2i64 __builtin_msa_fclt_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcne_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcne_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcor_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcor_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcueq_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcueq_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcule_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcule_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcult_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcult_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcun_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcun_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcune_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcune_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_fdiv_w (v4f32, v4f32);
+v2f64 __builtin_msa_fdiv_d (v2f64, v2f64);
+
+v8i16 __builtin_msa_fexdo_h (v4f32, v4f32);
+v4f32 __builtin_msa_fexdo_w (v2f64, v2f64);
+
+v4f32 __builtin_msa_fexp2_w (v4f32, v4i32);
+v2f64 __builtin_msa_fexp2_d (v2f64, v2i64);
+
+v4f32 __builtin_msa_fexupl_w (v8i16);
+v2f64 __builtin_msa_fexupl_d (v4f32);
+
+v4f32 __builtin_msa_fexupr_w (v8i16);
+v2f64 __builtin_msa_fexupr_d (v4f32);
+
+v4f32 __builtin_msa_ffint_s_w (v4i32);
+v2f64 __builtin_msa_ffint_s_d (v2i64);
+
+v4f32 __builtin_msa_ffint_u_w (v4u32);
+v2f64 __builtin_msa_ffint_u_d (v2u64);
+
+v4f32 __builtin_msa_ffql_w (v8i16);
+v2f64 __builtin_msa_ffql_d (v4i32);
+
+v4f32 __builtin_msa_ffqr_w (v8i16);
+v2f64 __builtin_msa_ffqr_d (v4i32);
+
+v16i8 __builtin_msa_fill_b (i32);
+v8i16 __builtin_msa_fill_h (i32);
+v4i32 __builtin_msa_fill_w (i32);
+v2i64 __builtin_msa_fill_d (i64);
+
+v4f32 __builtin_msa_flog2_w (v4f32);
+v2f64 __builtin_msa_flog2_d (v2f64);
+
+v4f32 __builtin_msa_fmadd_w (v4f32, v4f32, v4f32);
+v2f64 __builtin_msa_fmadd_d (v2f64, v2f64, v2f64);
+
+v4f32 __builtin_msa_fmax_w (v4f32, v4f32);
+v2f64 __builtin_msa_fmax_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_fmax_a_w (v4f32, v4f32);
+v2f64 __builtin_msa_fmax_a_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_fmin_w (v4f32, v4f32);
+v2f64 __builtin_msa_fmin_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_fmin_a_w (v4f32, v4f32);
+v2f64 __builtin_msa_fmin_a_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_fmsub_w (v4f32, v4f32, v4f32);
+v2f64 __builtin_msa_fmsub_d (v2f64, v2f64, v2f64);
+
+v4f32 __builtin_msa_fmul_w (v4f32, v4f32);
+v2f64 __builtin_msa_fmul_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_frint_w (v4f32);
+v2f64 __builtin_msa_frint_d (v2f64);
+
+v4f32 __builtin_msa_frcp_w (v4f32);
+v2f64 __builtin_msa_frcp_d (v2f64);
+
+v4f32 __builtin_msa_frsqrt_w (v4f32);
+v2f64 __builtin_msa_frsqrt_d (v2f64);
+
+v4i32 __builtin_msa_fsaf_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsaf_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fseq_w (v4f32, v4f32);
+v2i64 __builtin_msa_fseq_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsle_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsle_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fslt_w (v4f32, v4f32);
+v2i64 __builtin_msa_fslt_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsne_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsne_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsor_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsor_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_fsqrt_w (v4f32);
+v2f64 __builtin_msa_fsqrt_d (v2f64);
+
+v4f32 __builtin_msa_fsub_w (v4f32, v4f32);
+v2f64 __builtin_msa_fsub_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsueq_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsueq_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsule_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsule_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsult_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsult_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsun_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsun_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsune_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsune_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_ftint_s_w (v4f32);
+v2i64 __builtin_msa_ftint_s_d (v2f64);
+
+v4u32 __builtin_msa_ftint_u_w (v4f32);
+v2u64 __builtin_msa_ftint_u_d (v2f64);
+
+v8i16 __builtin_msa_ftq_h (v4f32, v4f32);
+v4i32 __builtin_msa_ftq_w (v2f64, v2f64);
+
+v4i32 __builtin_msa_ftrunc_s_w (v4f32);
+v2i64 __builtin_msa_ftrunc_s_d (v2f64);
+
+v4u32 __builtin_msa_ftrunc_u_w (v4f32);
+v2u64 __builtin_msa_ftrunc_u_d (v2f64);
+
+v8i16 __builtin_msa_hadd_s_h (v16i8, v16i8);
+v4i32 __builtin_msa_hadd_s_w (v8i16, v8i16);
+v2i64 __builtin_msa_hadd_s_d (v4i32, v4i32);
+
+v8u16 __builtin_msa_hadd_u_h (v16u8, v16u8);
+v4u32 __builtin_msa_hadd_u_w (v8u16, v8u16);
+v2u64 __builtin_msa_hadd_u_d (v4u32, v4u32);
+
+v8i16 __builtin_msa_hsub_s_h (v16i8, v16i8);
+v4i32 __builtin_msa_hsub_s_w (v8i16, v8i16);
+v2i64 __builtin_msa_hsub_s_d (v4i32, v4i32);
+
+v8i16 __builtin_msa_hsub_u_h (v16u8, v16u8);
+v4i32 __builtin_msa_hsub_u_w (v8u16, v8u16);
+v2i64 __builtin_msa_hsub_u_d (v4u32, v4u32);
+
+v16i8 __builtin_msa_ilvev_b (v16i8, v16i8);
+v8i16 __builtin_msa_ilvev_h (v8i16, v8i16);
+v4i32 __builtin_msa_ilvev_w (v4i32, v4i32);
+v2i64 __builtin_msa_ilvev_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_ilvl_b (v16i8, v16i8);
+v8i16 __builtin_msa_ilvl_h (v8i16, v8i16);
+v4i32 __builtin_msa_ilvl_w (v4i32, v4i32);
+v2i64 __builtin_msa_ilvl_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_ilvod_b (v16i8, v16i8);
+v8i16 __builtin_msa_ilvod_h (v8i16, v8i16);
+v4i32 __builtin_msa_ilvod_w (v4i32, v4i32);
+v2i64 __builtin_msa_ilvod_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_ilvr_b (v16i8, v16i8);
+v8i16 __builtin_msa_ilvr_h (v8i16, v8i16);
+v4i32 __builtin_msa_ilvr_w (v4i32, v4i32);
+v2i64 __builtin_msa_ilvr_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_insert_b (v16i8, imm0_15, i32);
+v8i16 __builtin_msa_insert_h (v8i16, imm0_7, i32);
+v4i32 __builtin_msa_insert_w (v4i32, imm0_3, i32);
+v2i64 __builtin_msa_insert_d (v2i64, imm0_1, i64);
+
+v16i8 __builtin_msa_insve_b (v16i8, imm0_15, v16i8);
+v8i16 __builtin_msa_insve_h (v8i16, imm0_7, v8i16);
+v4i32 __builtin_msa_insve_w (v4i32, imm0_3, v4i32);
+v2i64 __builtin_msa_insve_d (v2i64, imm0_1, v2i64);
+
+v16i8 __builtin_msa_ld_b (void *, imm_n512_511);
+v8i16 __builtin_msa_ld_h (void *, imm_n1024_1022);
+v4i32 __builtin_msa_ld_w (void *, imm_n2048_2044);
+v2i64 __builtin_msa_ld_d (void *, imm_n4096_4088);
+
+v16i8 __builtin_msa_ldi_b (imm_n512_511);
+v8i16 __builtin_msa_ldi_h (imm_n512_511);
+v4i32 __builtin_msa_ldi_w (imm_n512_511);
+v2i64 __builtin_msa_ldi_d (imm_n512_511);
+
+v8i16 __builtin_msa_madd_q_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_madd_q_w (v4i32, v4i32, v4i32);
+
+v8i16 __builtin_msa_maddr_q_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_maddr_q_w (v4i32, v4i32, v4i32);
+
+v16i8 __builtin_msa_maddv_b (v16i8, v16i8, v16i8);
+v8i16 __builtin_msa_maddv_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_maddv_w (v4i32, v4i32, v4i32);
+v2i64 __builtin_msa_maddv_d (v2i64, v2i64, v2i64);
+
+v16i8 __builtin_msa_max_a_b (v16i8, v16i8);
+v8i16 __builtin_msa_max_a_h (v8i16, v8i16);
+v4i32 __builtin_msa_max_a_w (v4i32, v4i32);
+v2i64 __builtin_msa_max_a_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_max_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_max_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_max_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_max_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_max_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_max_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_max_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_max_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_maxi_s_b (v16i8, imm_n16_15);
+v8i16 __builtin_msa_maxi_s_h (v8i16, imm_n16_15);
+v4i32 __builtin_msa_maxi_s_w (v4i32, imm_n16_15);
+v2i64 __builtin_msa_maxi_s_d (v2i64, imm_n16_15);
+
+v16u8 __builtin_msa_maxi_u_b (v16u8, imm0_31);
+v8u16 __builtin_msa_maxi_u_h (v8u16, imm0_31);
+v4u32 __builtin_msa_maxi_u_w (v4u32, imm0_31);
+v2u64 __builtin_msa_maxi_u_d (v2u64, imm0_31);
+
+v16i8 __builtin_msa_min_a_b (v16i8, v16i8);
+v8i16 __builtin_msa_min_a_h (v8i16, v8i16);
+v4i32 __builtin_msa_min_a_w (v4i32, v4i32);
+v2i64 __builtin_msa_min_a_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_min_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_min_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_min_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_min_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_min_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_min_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_min_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_min_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_mini_s_b (v16i8, imm_n16_15);
+v8i16 __builtin_msa_mini_s_h (v8i16, imm_n16_15);
+v4i32 __builtin_msa_mini_s_w (v4i32, imm_n16_15);
+v2i64 __builtin_msa_mini_s_d (v2i64, imm_n16_15);
+
+v16u8 __builtin_msa_mini_u_b (v16u8, imm0_31);
+v8u16 __builtin_msa_mini_u_h (v8u16, imm0_31);
+v4u32 __builtin_msa_mini_u_w (v4u32, imm0_31);
+v2u64 __builtin_msa_mini_u_d (v2u64, imm0_31);
+
+v16i8 __builtin_msa_mod_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_mod_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_mod_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_mod_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_mod_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_mod_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_mod_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_mod_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_move_v (v16i8);
+
+v8i16 __builtin_msa_msub_q_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_msub_q_w (v4i32, v4i32, v4i32);
+
+v8i16 __builtin_msa_msubr_q_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_msubr_q_w (v4i32, v4i32, v4i32);
+
+v16i8 __builtin_msa_msubv_b (v16i8, v16i8, v16i8);
+v8i16 __builtin_msa_msubv_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_msubv_w (v4i32, v4i32, v4i32);
+v2i64 __builtin_msa_msubv_d (v2i64, v2i64, v2i64);
+
+v8i16 __builtin_msa_mul_q_h (v8i16, v8i16);
+v4i32 __builtin_msa_mul_q_w (v4i32, v4i32);
+
+v8i16 __builtin_msa_mulr_q_h (v8i16, v8i16);
+v4i32 __builtin_msa_mulr_q_w (v4i32, v4i32);
+
+v16i8 __builtin_msa_mulv_b (v16i8, v16i8);
+v8i16 __builtin_msa_mulv_h (v8i16, v8i16);
+v4i32 __builtin_msa_mulv_w (v4i32, v4i32);
+v2i64 __builtin_msa_mulv_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_nloc_b (v16i8);
+v8i16 __builtin_msa_nloc_h (v8i16);
+v4i32 __builtin_msa_nloc_w (v4i32);
+v2i64 __builtin_msa_nloc_d (v2i64);
+
+v16i8 __builtin_msa_nlzc_b (v16i8);
+v8i16 __builtin_msa_nlzc_h (v8i16);
+v4i32 __builtin_msa_nlzc_w (v4i32);
+v2i64 __builtin_msa_nlzc_d (v2i64);
+
+v16u8 __builtin_msa_nor_v (v16u8, v16u8);
+
+v16u8 __builtin_msa_nori_b (v16u8, imm0_255);
+
+v16u8 __builtin_msa_or_v (v16u8, v16u8);
+
+v16u8 __builtin_msa_ori_b (v16u8, imm0_255);
+
+v16i8 __builtin_msa_pckev_b (v16i8, v16i8);
+v8i16 __builtin_msa_pckev_h (v8i16, v8i16);
+v4i32 __builtin_msa_pckev_w (v4i32, v4i32);
+v2i64 __builtin_msa_pckev_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_pckod_b (v16i8, v16i8);
+v8i16 __builtin_msa_pckod_h (v8i16, v8i16);
+v4i32 __builtin_msa_pckod_w (v4i32, v4i32);
+v2i64 __builtin_msa_pckod_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_pcnt_b (v16i8);
+v8i16 __builtin_msa_pcnt_h (v8i16);
+v4i32 __builtin_msa_pcnt_w (v4i32);
+v2i64 __builtin_msa_pcnt_d (v2i64);
+
+v16i8 __builtin_msa_sat_s_b (v16i8, imm0_7);
+v8i16 __builtin_msa_sat_s_h (v8i16, imm0_15);
+v4i32 __builtin_msa_sat_s_w (v4i32, imm0_31);
+v2i64 __builtin_msa_sat_s_d (v2i64, imm0_63);
+
+v16u8 __builtin_msa_sat_u_b (v16u8, imm0_7);
+v8u16 __builtin_msa_sat_u_h (v8u16, imm0_15);
+v4u32 __builtin_msa_sat_u_w (v4u32, imm0_31);
+v2u64 __builtin_msa_sat_u_d (v2u64, imm0_63);
+
+v16i8 __builtin_msa_shf_b (v16i8, imm0_255);
+v8i16 __builtin_msa_shf_h (v8i16, imm0_255);
+v4i32 __builtin_msa_shf_w (v4i32, imm0_255);
+
+v16i8 __builtin_msa_sld_b (v16i8, v16i8, i32);
+v8i16 __builtin_msa_sld_h (v8i16, v8i16, i32);
+v4i32 __builtin_msa_sld_w (v4i32, v4i32, i32);
+v2i64 __builtin_msa_sld_d (v2i64, v2i64, i32);
+
+v16i8 __builtin_msa_sldi_b (v16i8, v16i8, imm0_15);
+v8i16 __builtin_msa_sldi_h (v8i16, v8i16, imm0_7);
+v4i32 __builtin_msa_sldi_w (v4i32, v4i32, imm0_3);
+v2i64 __builtin_msa_sldi_d (v2i64, v2i64, imm0_1);
+
+v16i8 __builtin_msa_sll_b (v16i8, v16i8);
+v8i16 __builtin_msa_sll_h (v8i16, v8i16);
+v4i32 __builtin_msa_sll_w (v4i32, v4i32);
+v2i64 __builtin_msa_sll_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_slli_b (v16i8, imm0_7);
+v8i16 __builtin_msa_slli_h (v8i16, imm0_15);
+v4i32 __builtin_msa_slli_w (v4i32, imm0_31);
+v2i64 __builtin_msa_slli_d (v2i64, imm0_63);
+
+v16i8 __builtin_msa_splat_b (v16i8, i32);
+v8i16 __builtin_msa_splat_h (v8i16, i32);
+v4i32 __builtin_msa_splat_w (v4i32, i32);
+v2i64 __builtin_msa_splat_d (v2i64, i32);
+
+v16i8 __builtin_msa_splati_b (v16i8, imm0_15);
+v8i16 __builtin_msa_splati_h (v8i16, imm0_7);
+v4i32 __builtin_msa_splati_w (v4i32, imm0_3);
+v2i64 __builtin_msa_splati_d (v2i64, imm0_1);
+
+v16i8 __builtin_msa_sra_b (v16i8, v16i8);
+v8i16 __builtin_msa_sra_h (v8i16, v8i16);
+v4i32 __builtin_msa_sra_w (v4i32, v4i32);
+v2i64 __builtin_msa_sra_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_srai_b (v16i8, imm0_7);
+v8i16 __builtin_msa_srai_h (v8i16, imm0_15);
+v4i32 __builtin_msa_srai_w (v4i32, imm0_31);
+v2i64 __builtin_msa_srai_d (v2i64, imm0_63);
+
+v16i8 __builtin_msa_srar_b (v16i8, v16i8);
+v8i16 __builtin_msa_srar_h (v8i16, v8i16);
+v4i32 __builtin_msa_srar_w (v4i32, v4i32);
+v2i64 __builtin_msa_srar_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_srari_b (v16i8, imm0_7);
+v8i16 __builtin_msa_srari_h (v8i16, imm0_15);
+v4i32 __builtin_msa_srari_w (v4i32, imm0_31);
+v2i64 __builtin_msa_srari_d (v2i64, imm0_63);
+
+v16i8 __builtin_msa_srl_b (v16i8, v16i8);
+v8i16 __builtin_msa_srl_h (v8i16, v8i16);
+v4i32 __builtin_msa_srl_w (v4i32, v4i32);
+v2i64 __builtin_msa_srl_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_srli_b (v16i8, imm0_7);
+v8i16 __builtin_msa_srli_h (v8i16, imm0_15);
+v4i32 __builtin_msa_srli_w (v4i32, imm0_31);
+v2i64 __builtin_msa_srli_d (v2i64, imm0_63);
+
+v16i8 __builtin_msa_srlr_b (v16i8, v16i8);
+v8i16 __builtin_msa_srlr_h (v8i16, v8i16);
+v4i32 __builtin_msa_srlr_w (v4i32, v4i32);
+v2i64 __builtin_msa_srlr_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_srlri_b (v16i8, imm0_7);
+v8i16 __builtin_msa_srlri_h (v8i16, imm0_15);
+v4i32 __builtin_msa_srlri_w (v4i32, imm0_31);
+v2i64 __builtin_msa_srlri_d (v2i64, imm0_63);
+
+void __builtin_msa_st_b (v16i8, void *, imm_n512_511);
+void __builtin_msa_st_h (v8i16, void *, imm_n1024_1022);
+void __builtin_msa_st_w (v4i32, void *, imm_n2048_2044);
+void __builtin_msa_st_d (v2i64, void *, imm_n4096_4088);
+
+v16i8 __builtin_msa_subs_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_subs_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_subs_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_subs_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_subs_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_subs_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_subs_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_subs_u_d (v2u64, v2u64);
+
+v16u8 __builtin_msa_subsus_u_b (v16u8, v16i8);
+v8u16 __builtin_msa_subsus_u_h (v8u16, v8i16);
+v4u32 __builtin_msa_subsus_u_w (v4u32, v4i32);
+v2u64 __builtin_msa_subsus_u_d (v2u64, v2i64);
+
+v16i8 __builtin_msa_subsuu_s_b (v16u8, v16u8);
+v8i16 __builtin_msa_subsuu_s_h (v8u16, v8u16);
+v4i32 __builtin_msa_subsuu_s_w (v4u32, v4u32);
+v2i64 __builtin_msa_subsuu_s_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_subv_b (v16i8, v16i8);
+v8i16 __builtin_msa_subv_h (v8i16, v8i16);
+v4i32 __builtin_msa_subv_w (v4i32, v4i32);
+v2i64 __builtin_msa_subv_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_subvi_b (v16i8, imm0_31);
+v8i16 __builtin_msa_subvi_h (v8i16, imm0_31);
+v4i32 __builtin_msa_subvi_w (v4i32, imm0_31);
+v2i64 __builtin_msa_subvi_d (v2i64, imm0_31);
+
+v16i8 __builtin_msa_vshf_b (v16i8, v16i8, v16i8);
+v8i16 __builtin_msa_vshf_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_vshf_w (v4i32, v4i32, v4i32);
+v2i64 __builtin_msa_vshf_d (v2i64, v2i64, v2i64);
+
+v16u8 __builtin_msa_xor_v (v16u8, v16u8);
+
+v16u8 __builtin_msa_xori_b (v16u8, imm0_255);
diff --git a/library/stdarch/crates/stdarch-verify/src/lib.rs b/library/stdarch/crates/stdarch-verify/src/lib.rs
new file mode 100644
index 00000000000..cbe0530970b
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/src/lib.rs
@@ -0,0 +1,387 @@
+extern crate proc_macro;
+extern crate proc_macro2;
+#[macro_use]
+extern crate quote;
+#[macro_use]
+extern crate syn;
+
+use proc_macro::TokenStream;
+use proc_macro2::Span;
+use std::{fs::File, io::Read, path::Path};
+use syn::ext::IdentExt;
+
+#[proc_macro]
+pub fn x86_functions(input: TokenStream) -> TokenStream {
+    functions(input, &["core_arch/src/x86", "core_arch/src/x86_64"])
+}
+
+#[proc_macro]
+pub fn arm_functions(input: TokenStream) -> TokenStream {
+    functions(input, &["core_arch/src/arm", "core_arch/src/aarch64"])
+}
+
+#[proc_macro]
+pub fn mips_functions(input: TokenStream) -> TokenStream {
+    functions(input, &["core_arch/src/mips"])
+}
+
+fn functions(input: TokenStream, dirs: &[&str]) -> TokenStream {
+    let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
+    let root = dir.parent().expect("root-dir not found");
+
+    let mut files = Vec::new();
+    for dir in dirs {
+        walk(&root.join(dir), &mut files);
+    }
+    assert!(!files.is_empty());
+
+    let mut functions = Vec::new();
+    for &mut (ref mut file, ref path) in &mut files {
+        for item in file.items.drain(..) {
+            if let syn::Item::Fn(f) = item {
+                functions.push((f, path))
+            }
+        }
+    }
+    assert!(!functions.is_empty());
+
+    functions.retain(|&(ref f, _)| {
+        if let syn::Visibility::Public(_) = f.vis {
+            if f.unsafety.is_some() {
+                return true;
+            }
+        }
+        false
+    });
+    assert!(!functions.is_empty());
+
+    let input = proc_macro2::TokenStream::from(input);
+
+    let functions = functions
+        .iter()
+        .map(|&(ref f, path)| {
+            let name = &f.ident;
+            // println!("{}", name);
+            let mut arguments = Vec::new();
+            for input in f.decl.inputs.iter() {
+                let ty = match *input {
+                    syn::FnArg::Captured(ref c) => &c.ty,
+                    _ => panic!("invalid argument on {}", name),
+                };
+                arguments.push(to_type(ty));
+            }
+            let ret = match f.decl.output {
+                syn::ReturnType::Default => quote! { None },
+                syn::ReturnType::Type(_, ref t) => {
+                    let ty = to_type(t);
+                    quote! { Some(#ty) }
+                }
+            };
+            let instrs = find_instrs(&f.attrs);
+            let target_feature = if let Some(i) = find_target_feature(&f.attrs) {
+                quote! { Some(#i) }
+            } else {
+                quote! { None }
+            };
+            let required_const = find_required_const(&f.attrs);
+            quote! {
+                Function {
+                    name: stringify!(#name),
+                    arguments: &[#(#arguments),*],
+                    ret: #ret,
+                    target_feature: #target_feature,
+                    instrs: &[#(#instrs),*],
+                    file: stringify!(#path),
+                    required_const: &[#(#required_const),*],
+                }
+            }
+        })
+        .collect::<Vec<_>>();
+
+    let ret = quote! { #input: &[Function] = &[#(#functions),*]; };
+    // println!("{}", ret);
+    ret.into()
+}
+
+fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
+    match *t {
+        syn::Type::Path(ref p) => match extract_path_ident(&p.path).to_string().as_ref() {
+            // x86 ...
+            "__m128" => quote! { &M128 },
+            "__m128d" => quote! { &M128D },
+            "__m128i" => quote! { &M128I },
+            "__m256" => quote! { &M256 },
+            "__m256d" => quote! { &M256D },
+            "__m256i" => quote! { &M256I },
+            "__m512" => quote! { &M512 },
+            "__m512d" => quote! { &M512D },
+            "__m512i" => quote! { &M512I },
+            "__mmask16" => quote! { &MMASK16 },
+            "__m64" => quote! { &M64 },
+            "bool" => quote! { &BOOL },
+            "f32" => quote! { &F32 },
+            "f64" => quote! { &F64 },
+            "i16" => quote! { &I16 },
+            "i32" => quote! { &I32 },
+            "i64" => quote! { &I64 },
+            "i8" => quote! { &I8 },
+            "u16" => quote! { &U16 },
+            "u32" => quote! { &U32 },
+            "u64" => quote! { &U64 },
+            "u128" => quote! { &U128 },
+            "u8" => quote! { &U8 },
+            "Ordering" => quote! { &ORDERING },
+            "CpuidResult" => quote! { &CPUID },
+
+            // arm ...
+            "int8x4_t" => quote! { &I8X4 },
+            "int8x8_t" => quote! { &I8X8 },
+            "int8x8x2_t" => quote! { &I8X8X2 },
+            "int8x8x3_t" => quote! { &I8X8X3 },
+            "int8x8x4_t" => quote! { &I8X8X4 },
+            "int8x16x2_t" => quote! { &I8X16X2 },
+            "int8x16x3_t" => quote! { &I8X16X3 },
+            "int8x16x4_t" => quote! { &I8X16X4 },
+            "int8x16_t" => quote! { &I8X16 },
+            "int16x2_t" => quote! { &I16X2 },
+            "int16x4_t" => quote! { &I16X4 },
+            "int16x8_t" => quote! { &I16X8 },
+            "int32x2_t" => quote! { &I32X2 },
+            "int32x4_t" => quote! { &I32X4 },
+            "int64x1_t" => quote! { &I64X1 },
+            "int64x2_t" => quote! { &I64X2 },
+            "uint8x8_t" => quote! { &U8X8 },
+            "uint8x8x2_t" => quote! { &U8X8X2 },
+            "uint8x16x2_t" => quote! { &U8X16X2 },
+            "uint8x16x3_t" => quote! { &U8X16X3 },
+            "uint8x16x4_t" => quote! { &U8X16X4 },
+            "uint8x8x3_t" => quote! { &U8X8X3 },
+            "uint8x8x4_t" => quote! { &U8X8X4 },
+            "uint8x16_t" => quote! { &U8X16 },
+            "uint16x4_t" => quote! { &U16X4 },
+            "uint16x8_t" => quote! { &U16X8 },
+            "uint32x2_t" => quote! { &U32X2 },
+            "uint32x4_t" => quote! { &U32X4 },
+            "uint64x1_t" => quote! { &U64X1 },
+            "uint64x2_t" => quote! { &U64X2 },
+            "float32x2_t" => quote! { &F32X2 },
+            "float32x4_t" => quote! { &F32X4 },
+            "float64x1_t" => quote! { &F64X1 },
+            "float64x2_t" => quote! { &F64X2 },
+            "poly8x8_t" => quote! { &POLY8X8 },
+            "poly8x8x2_t" => quote! { &POLY8X8X2 },
+            "poly8x8x3_t" => quote! { &POLY8X8X3 },
+            "poly8x8x4_t" => quote! { &POLY8X8X4 },
+            "poly8x16x2_t" => quote! { &POLY8X16X2 },
+            "poly8x16x3_t" => quote! { &POLY8X16X3 },
+            "poly8x16x4_t" => quote! { &POLY8X16X4 },
+            "poly64x1_t" => quote! { &POLY64X1 },
+            "poly64x2_t" => quote! { &POLY64X2 },
+            "poly8x16_t" => quote! { &POLY8X16 },
+            "poly16x4_t" => quote! { &POLY16X4 },
+            "poly16x8_t" => quote! { &POLY16X8 },
+
+            "v16i8" => quote! { &v16i8 },
+            "v8i16" => quote! { &v8i16 },
+            "v4i32" => quote! { &v4i32 },
+            "v2i64" => quote! { &v2i64 },
+            "v16u8" => quote! { &v16u8 },
+            "v8u16" => quote! { &v8u16 },
+            "v4u32" => quote! { &v4u32 },
+            "v2u64" => quote! { &v2u64 },
+            "v8f16" => quote! { &v8f16 },
+            "v4f32" => quote! { &v4f32 },
+            "v2f64" => quote! { &v2f64 },
+
+            s => panic!("unspported type: \"{}\"", s),
+        },
+        syn::Type::Ptr(syn::TypePtr {
+            ref elem,
+            ref mutability,
+            ..
+        })
+        | syn::Type::Reference(syn::TypeReference {
+            ref elem,
+            ref mutability,
+            ..
+        }) => {
+            // Both pointers and references can have a mut token (*mut and &mut)
+            if mutability.is_some() {
+                let tokens = to_type(&elem);
+                quote! { &Type::MutPtr(#tokens) }
+            } else {
+                // If they don't (*const or &) then they are "const"
+                let tokens = to_type(&elem);
+                quote! { &Type::ConstPtr(#tokens) }
+            }
+        }
+
+        syn::Type::Slice(_) => panic!("unsupported slice"),
+        syn::Type::Array(_) => panic!("unsupported array"),
+        syn::Type::Tuple(_) => quote! { &TUPLE },
+        syn::Type::Never(_) => quote! { &NEVER },
+        _ => panic!("unsupported type"),
+    }
+}
+
+fn extract_path_ident(path: &syn::Path) -> syn::Ident {
+    if path.leading_colon.is_some() {
+        panic!("unsupported leading colon in path")
+    }
+    if path.segments.len() != 1 {
+        panic!("unsupported path that needs name resolution")
+    }
+    match path
+        .segments
+        .first()
+        .expect("segment not found")
+        .value()
+        .arguments
+    {
+        syn::PathArguments::None => {}
+        _ => panic!("unsupported path that has path arguments"),
+    }
+    path.segments
+        .first()
+        .expect("segment not found")
+        .value()
+        .ident
+        .clone()
+}
+
+fn walk(root: &Path, files: &mut Vec<(syn::File, String)>) {
+    for file in root.read_dir().unwrap() {
+        let file = file.unwrap();
+        if file.file_type().unwrap().is_dir() {
+            walk(&file.path(), files);
+            continue;
+        }
+        let path = file.path();
+        if path.extension().and_then(std::ffi::OsStr::to_str) != Some("rs") {
+            continue;
+        }
+
+        if path.file_name().and_then(std::ffi::OsStr::to_str) == Some("test.rs") {
+            continue;
+        }
+
+        let mut contents = String::new();
+        File::open(&path)
+            .unwrap_or_else(|_| panic!("can't open file at path: {}", path.display()))
+            .read_to_string(&mut contents)
+            .expect("failed to read file to string");
+
+        files.push((
+            syn::parse_str::<syn::File>(&contents).expect("failed to parse"),
+            path.display().to_string(),
+        ));
+    }
+}
+
+fn find_instrs(attrs: &[syn::Attribute]) -> Vec<String> {
+    struct AssertInstr {
+        instr: String,
+    }
+
+    // A small custom parser to parse out the instruction in `assert_instr`.
+    //
+    // TODO: should probably just reuse `Invoc` from the `assert-instr-macro`
+    // crate.
+    impl syn::parse::Parse for AssertInstr {
+        fn parse(content: syn::parse::ParseStream) -> syn::parse::Result<Self> {
+            let input;
+            parenthesized!(input in content);
+            let _ = input.parse::<syn::Meta>()?;
+            let _ = input.parse::<Token![,]>()?;
+            let ident = input.parse::<syn::Ident>()?;
+            if ident != "assert_instr" {
+                return Err(input.error("expected `assert_instr`"));
+            }
+            let instrs;
+            parenthesized!(instrs in input);
+
+            let mut instr = String::new();
+            while !instrs.is_empty() {
+                if let Ok(lit) = instrs.parse::<syn::LitStr>() {
+                    instr.push_str(&lit.value());
+                } else if let Ok(ident) = instrs.call(syn::Ident::parse_any) {
+                    instr.push_str(&ident.to_string());
+                } else if instrs.parse::<Token![.]>().is_ok() {
+                    instr.push_str(".");
+                } else if instrs.parse::<Token![,]>().is_ok() {
+                    // consume everything remaining
+                    drop(instrs.parse::<proc_macro2::TokenStream>());
+                    break;
+                } else {
+                    return Err(input.error("failed to parse instruction"));
+                }
+            }
+            Ok(Self { instr })
+        }
+    }
+
+    attrs
+        .iter()
+        .filter(|a| a.path == syn::Ident::new("cfg_attr", Span::call_site()).into())
+        .filter_map(|a| {
+            syn::parse2::<AssertInstr>(a.tts.clone())
+                .ok()
+                .map(|a| a.instr)
+        })
+        .collect()
+}
+
+fn find_target_feature(attrs: &[syn::Attribute]) -> Option<syn::Lit> {
+    attrs
+        .iter()
+        .flat_map(|a| {
+            if let Some(a) = a.interpret_meta() {
+                if let syn::Meta::List(i) = a {
+                    if i.ident == "target_feature" {
+                        return i.nested;
+                    }
+                }
+            }
+            syn::punctuated::Punctuated::new()
+        })
+        .filter_map(|nested| match nested {
+            syn::NestedMeta::Meta(m) => Some(m),
+            syn::NestedMeta::Literal(_) => None,
+        })
+        .find_map(|m| match m {
+            syn::Meta::NameValue(ref i) if i.ident == "enable" => Some(i.clone().lit),
+            _ => None,
+        })
+}
+
+fn find_required_const(attrs: &[syn::Attribute]) -> Vec<usize> {
+    attrs
+        .iter()
+        .flat_map(|a| {
+            if a.path.segments[0].ident == "rustc_args_required_const" {
+                syn::parse::<RustcArgsRequiredConst>(a.tts.clone().into())
+                    .unwrap()
+                    .args
+            } else {
+                Vec::new()
+            }
+        })
+        .collect()
+}
+
+struct RustcArgsRequiredConst {
+    args: Vec<usize>,
+}
+
+impl syn::parse::Parse for RustcArgsRequiredConst {
+    #[allow(clippy::cast_possible_truncation)]
+    fn parse(input: syn::parse::ParseStream) -> syn::parse::Result<Self> {
+        let content;
+        parenthesized!(content in input);
+        let list =
+            syn::punctuated::Punctuated::<syn::LitInt, Token![,]>::parse_terminated(&content)?;
+        Ok(Self {
+            args: list.into_iter().map(|a| a.value() as usize).collect(),
+        })
+    }
+}
diff --git a/library/stdarch/crates/stdarch-verify/tests/arm.rs b/library/stdarch/crates/stdarch-verify/tests/arm.rs
new file mode 100644
index 00000000000..6f92933bc73
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/tests/arm.rs
@@ -0,0 +1,592 @@
+#![allow(bad_style)]
+
+use std::{collections::HashMap, rc::Rc};
+
+use html5ever::{
+    driver::ParseOpts,
+    parse_document,
+    rcdom::{Node, NodeData, RcDom},
+    tendril::TendrilSink,
+    tree_builder::TreeBuilderOpts,
+};
+
+struct Function {
+    name: &'static str,
+    arguments: &'static [&'static Type],
+    ret: Option<&'static Type>,
+    target_feature: Option<&'static str>,
+    instrs: &'static [&'static str],
+    file: &'static str,
+    required_const: &'static [usize],
+}
+
+static F16: Type = Type::PrimFloat(16);
+static F32: Type = Type::PrimFloat(32);
+static F64: Type = Type::PrimFloat(64);
+static I16: Type = Type::PrimSigned(16);
+static I32: Type = Type::PrimSigned(32);
+static I64: Type = Type::PrimSigned(64);
+static I8: Type = Type::PrimSigned(8);
+static U16: Type = Type::PrimUnsigned(16);
+static U32: Type = Type::PrimUnsigned(32);
+static U64: Type = Type::PrimUnsigned(64);
+static U8: Type = Type::PrimUnsigned(8);
+static NEVER: Type = Type::Never;
+
+static F16X4: Type = Type::F(16, 4, 1);
+static F16X4X2: Type = Type::F(16, 4, 2);
+static F16X4X3: Type = Type::F(16, 4, 3);
+static F16X4X4: Type = Type::F(16, 4, 4);
+static F16X8: Type = Type::F(16, 8, 1);
+static F16X8X2: Type = Type::F(16, 8, 2);
+static F16X8X3: Type = Type::F(16, 8, 3);
+static F16X8X4: Type = Type::F(16, 8, 4);
+static F32X2: Type = Type::F(32, 2, 1);
+static F32X2X2: Type = Type::F(32, 2, 2);
+static F32X2X3: Type = Type::F(32, 2, 3);
+static F32X2X4: Type = Type::F(32, 2, 4);
+static F32X4: Type = Type::F(32, 4, 1);
+static F32X4X2: Type = Type::F(32, 4, 2);
+static F32X4X3: Type = Type::F(32, 4, 3);
+static F32X4X4: Type = Type::F(32, 4, 4);
+static F64X1: Type = Type::F(64, 1, 1);
+static F64X1X2: Type = Type::F(64, 1, 2);
+static F64X1X3: Type = Type::F(64, 1, 3);
+static F64X1X4: Type = Type::F(64, 1, 4);
+static F64X2: Type = Type::F(64, 2, 1);
+static F64X2X2: Type = Type::F(64, 2, 2);
+static F64X2X3: Type = Type::F(64, 2, 3);
+static F64X2X4: Type = Type::F(64, 2, 4);
+static I16X2: Type = Type::I(16, 2, 1);
+static I16X4: Type = Type::I(16, 4, 1);
+static I16X4X2: Type = Type::I(16, 4, 2);
+static I16X4X3: Type = Type::I(16, 4, 3);
+static I16X4X4: Type = Type::I(16, 4, 4);
+static I16X8: Type = Type::I(16, 8, 1);
+static I16X8X2: Type = Type::I(16, 8, 2);
+static I16X8X3: Type = Type::I(16, 8, 3);
+static I16X8X4: Type = Type::I(16, 8, 4);
+static I32X2: Type = Type::I(32, 2, 1);
+static I32X2X2: Type = Type::I(32, 2, 2);
+static I32X2X3: Type = Type::I(32, 2, 3);
+static I32X2X4: Type = Type::I(32, 2, 4);
+static I32X4: Type = Type::I(32, 4, 1);
+static I32X4X2: Type = Type::I(32, 4, 2);
+static I32X4X3: Type = Type::I(32, 4, 3);
+static I32X4X4: Type = Type::I(32, 4, 4);
+static I64X1: Type = Type::I(64, 1, 1);
+static I64X1X2: Type = Type::I(64, 1, 2);
+static I64X1X3: Type = Type::I(64, 1, 3);
+static I64X1X4: Type = Type::I(64, 1, 4);
+static I64X2: Type = Type::I(64, 2, 1);
+static I64X2X2: Type = Type::I(64, 2, 2);
+static I64X2X3: Type = Type::I(64, 2, 3);
+static I64X2X4: Type = Type::I(64, 2, 4);
+static I8X16: Type = Type::I(8, 16, 1);
+static I8X16X2: Type = Type::I(8, 16, 2);
+static I8X16X3: Type = Type::I(8, 16, 3);
+static I8X16X4: Type = Type::I(8, 16, 4);
+static I8X4: Type = Type::I(8, 4, 1);
+static I8X8: Type = Type::I(8, 8, 1);
+static I8X8X2: Type = Type::I(8, 8, 2);
+static I8X8X3: Type = Type::I(8, 8, 3);
+static I8X8X4: Type = Type::I(8, 8, 4);
+static P128: Type = Type::PrimPoly(128);
+static P16: Type = Type::PrimPoly(16);
+static P16X4X2: Type = Type::P(16, 4, 2);
+static P16X4X3: Type = Type::P(16, 4, 3);
+static P16X4X4: Type = Type::P(16, 4, 4);
+static P16X8X2: Type = Type::P(16, 8, 2);
+static P16X8X3: Type = Type::P(16, 8, 3);
+static P16X8X4: Type = Type::P(16, 8, 4);
+static P64: Type = Type::PrimPoly(64);
+static P64X1X2: Type = Type::P(64, 1, 2);
+static P64X1X3: Type = Type::P(64, 1, 3);
+static P64X1X4: Type = Type::P(64, 1, 4);
+static P64X2X2: Type = Type::P(64, 2, 2);
+static P64X2X3: Type = Type::P(64, 2, 3);
+static P64X2X4: Type = Type::P(64, 2, 4);
+static P8: Type = Type::PrimPoly(8);
+static POLY16X4: Type = Type::P(16, 4, 1);
+static POLY16X8: Type = Type::P(16, 8, 1);
+static POLY64X1: Type = Type::P(64, 1, 1);
+static POLY64X2: Type = Type::P(64, 2, 1);
+static POLY8X16: Type = Type::P(8, 16, 1);
+static POLY8X16X2: Type = Type::P(8, 16, 2);
+static POLY8X16X3: Type = Type::P(8, 16, 3);
+static POLY8X16X4: Type = Type::P(8, 16, 4);
+static POLY8X8: Type = Type::P(8, 8, 1);
+static POLY8X8X2: Type = Type::P(8, 8, 2);
+static POLY8X8X3: Type = Type::P(8, 8, 3);
+static POLY8X8X4: Type = Type::P(8, 8, 4);
+static U16X4: Type = Type::U(16, 4, 1);
+static U16X4X2: Type = Type::U(16, 4, 2);
+static U16X4X3: Type = Type::U(16, 4, 3);
+static U16X4X4: Type = Type::U(16, 4, 4);
+static U16X8: Type = Type::U(16, 8, 1);
+static U16X8X2: Type = Type::U(16, 8, 2);
+static U16X8X3: Type = Type::U(16, 8, 3);
+static U16X8X4: Type = Type::U(16, 8, 4);
+static U32X2: Type = Type::U(32, 2, 1);
+static U32X2X2: Type = Type::U(32, 2, 2);
+static U32X2X3: Type = Type::U(32, 2, 3);
+static U32X2X4: Type = Type::U(32, 2, 4);
+static U32X4: Type = Type::U(32, 4, 1);
+static U32X4X2: Type = Type::U(32, 4, 2);
+static U32X4X3: Type = Type::U(32, 4, 3);
+static U32X4X4: Type = Type::U(32, 4, 4);
+static U64X1: Type = Type::U(64, 1, 1);
+static U64X1X2: Type = Type::U(64, 1, 2);
+static U64X1X3: Type = Type::U(64, 1, 3);
+static U64X1X4: Type = Type::U(64, 1, 4);
+static U64X2: Type = Type::U(64, 2, 1);
+static U64X2X2: Type = Type::U(64, 2, 2);
+static U64X2X3: Type = Type::U(64, 2, 3);
+static U64X2X4: Type = Type::U(64, 2, 4);
+static U8X16: Type = Type::U(8, 16, 1);
+static U8X16X2: Type = Type::U(8, 16, 2);
+static U8X16X3: Type = Type::U(8, 16, 3);
+static U8X16X4: Type = Type::U(8, 16, 4);
+static U8X8: Type = Type::U(8, 8, 1);
+static U8X8X2: Type = Type::U(8, 8, 2);
+static U8X8X3: Type = Type::U(8, 8, 3);
+static U8X8X4: Type = Type::U(8, 8, 4);
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+enum Type {
+    PrimFloat(u8),
+    PrimSigned(u8),
+    PrimUnsigned(u8),
+    PrimPoly(u8),
+    MutPtr(&'static Type),
+    ConstPtr(&'static Type),
+    I(u8, u8, u8),
+    U(u8, u8, u8),
+    P(u8, u8, u8),
+    F(u8, u8, u8),
+    Never,
+}
+
+stdarch_verify::arm_functions!(static FUNCTIONS);
+
+macro_rules! bail {
+    ($($t:tt)*) => (return Err(format!($($t)*)))
+}
+
+#[test]
+fn verify_all_signatures() {
+    // This is a giant HTML blob downloaded from
+    // https://developer.arm.com/technologies/neon/intrinsics which contains all
+    // NEON intrinsics at least. We do manual HTML parsing below.
+    let html = include_bytes!("../arm-intrinsics.html");
+    let mut html = &html[..];
+    let opts = ParseOpts {
+        tree_builder: TreeBuilderOpts {
+            drop_doctype: true,
+            ..Default::default()
+        },
+        ..Default::default()
+    };
+    let dom = parse_document(RcDom::default(), opts)
+        .from_utf8()
+        .read_from(&mut html)
+        .unwrap();
+
+    let accordion = find_accordion(&dom.document).unwrap();
+    let map = parse_intrinsics(&accordion);
+
+    let mut all_valid = true;
+    'outer: for rust in FUNCTIONS {
+        // Skip some intrinsics that aren't NEON and are located in different
+        // places than the whitelists below.
+        match rust.name {
+            "brk" | "__breakpoint" | "udf" => continue,
+            _ => {}
+        }
+        let arm = match map.get(rust.name) {
+            Some(i) => i,
+            None => {
+                // Skip all these intrinsics as they're not listed in NEON
+                // descriptions online.
+                //
+                // TODO: we still need to verify these intrinsics or find a
+                // reference for them, need to figure out where though!
+                if !rust.file.ends_with("dsp.rs\"")
+                    && !rust.file.ends_with("cmsis.rs\"")
+                    && !rust.file.ends_with("v6.rs\"")
+                    && !rust.file.ends_with("v7.rs\"")
+                    && !rust.file.ends_with("v8.rs\"")
+                {
+                    println!(
+                        "missing arm definition for {:?} in {}",
+                        rust.name, rust.file
+                    );
+                    all_valid = false;
+                }
+                continue;
+            }
+        };
+
+        if let Err(e) = matches(rust, arm) {
+            println!("failed to verify `{}`", rust.name);
+            println!("  * {}", e);
+            all_valid = false;
+        }
+    }
+    assert!(all_valid);
+}
+
+fn matches(rust: &Function, arm: &Intrinsic) -> Result<(), String> {
+    if rust.ret != arm.ret.as_ref() {
+        bail!("mismatched return value")
+    }
+    if rust.arguments.len() != arm.arguments.len() {
+        bail!("mismatched argument lengths");
+    }
+
+    let mut nconst = 0;
+    let iter = rust.arguments.iter().zip(&arm.arguments).enumerate();
+    for (i, (rust_ty, (arm, arm_const))) in iter {
+        if *rust_ty != arm {
+            bail!("mismatched arguments")
+        }
+        if *arm_const {
+            nconst += 1;
+            if !rust.required_const.contains(&i) {
+                bail!("argument const mismatch");
+            }
+        }
+    }
+    if nconst != rust.required_const.len() {
+        bail!("wrong number of const arguments");
+    }
+
+    if rust.instrs.is_empty() {
+        bail!(
+            "instruction not listed for `{}`, but arm lists {:?}",
+            rust.name,
+            arm.instruction
+        );
+    } else if false
+    /* not super reliable, but can be used to manually check */
+    {
+        for instr in rust.instrs {
+            if arm.instruction.starts_with(instr) {
+                continue;
+            }
+            // sometimes arm says `foo` and disassemblers say `vfoo`, or
+            // sometimes disassemblers say `vfoo` and arm says `sfoo` or `ffoo`
+            if instr.starts_with("v")
+                && (arm.instruction.starts_with(&instr[1..])
+                    || arm.instruction[1..].starts_with(&instr[1..]))
+            {
+                continue;
+            }
+            bail!(
+                "arm failed to list `{}` as an instruction for `{}` in {:?}",
+                instr,
+                rust.name,
+                arm.instruction,
+            );
+        }
+    }
+
+    // TODO: verify `target_feature`.
+
+    Ok(())
+}
+
+fn find_accordion(node: &Rc<Node>) -> Option<Rc<Node>> {
+    if let NodeData::Element { attrs, .. } = &node.data {
+        for attr in attrs.borrow().iter() {
+            if attr.name.local.eq_str_ignore_ascii_case("class") {
+                if attr.value.to_string() == "intrinsic-accordion" {
+                    return Some(node.clone());
+                }
+            }
+        }
+    }
+
+    node.children
+        .borrow()
+        .iter()
+        .filter_map(|node| find_accordion(node))
+        .next()
+}
+
+#[derive(PartialEq)]
+struct Intrinsic {
+    name: String,
+    ret: Option<Type>,
+    arguments: Vec<(Type, bool)>,
+    instruction: String,
+}
+
+fn parse_intrinsics(node: &Rc<Node>) -> HashMap<String, Intrinsic> {
+    let mut ret = HashMap::new();
+    for child in node.children.borrow().iter() {
+        if let NodeData::Element { .. } = child.data {
+            let f = parse_intrinsic(child);
+            ret.insert(f.name.clone(), f);
+        }
+    }
+    return ret;
+}
+
+fn parse_intrinsic(node: &Rc<Node>) -> Intrinsic {
+    // <div class='intrinsic'>
+    //  <input>...</input>
+    //  <label for=$name>
+    //    <div>
+    //      $signature...
+    //  <article>
+    //    ...
+
+    let children = node.children.borrow();
+    let mut children = children.iter().filter(|node| match node.data {
+        NodeData::Element { .. } => true,
+        _ => false,
+    });
+    let _input = children.next().expect("no <input>");
+    let label = children.next().expect("no <label>");
+    let article = children.next().expect("no <article>");
+    assert!(children.next().is_none());
+
+    // Find `for="..."` in `<label>`
+    let name = match &label.data {
+        NodeData::Element { attrs, .. } => attrs
+            .borrow()
+            .iter()
+            .filter(|attr| attr.name.local.eq_str_ignore_ascii_case("for"))
+            .map(|attr| attr.value.to_string())
+            .next()
+            .expect("no `for` attribute"),
+        _ => panic!(),
+    };
+
+    // Find contents of inner `<div>` in `<label>`
+    let label_children = label.children.borrow();
+    let mut label_children = label_children.iter().filter(|node| match node.data {
+        NodeData::Element { .. } => true,
+        _ => false,
+    });
+    let label_div = label_children.next().expect("no <div> in <label>");
+    assert!(label_children.next().is_none());
+    let text = label_div.children.borrow();
+    let mut text = text.iter().filter_map(|node| match &node.data {
+        NodeData::Text { contents } => Some(contents.borrow().to_string()),
+        _ => None,
+    });
+    let ret = text.next().unwrap();
+    let ret = ret.trim();
+    let args = text.next().unwrap();
+    let args = args.trim();
+    assert!(text.next().is_none());
+
+    // Find the instruction within the article
+    let article_children = article.children.borrow();
+    let mut article_children = article_children.iter().filter(|node| match node.data {
+        NodeData::Element { .. } => true,
+        _ => false,
+    });
+    let mut instruction = None;
+    while let Some(child) = article_children.next() {
+        let mut header = String::new();
+        collect_text(&mut header, child);
+        if !header.ends_with(" Instruction") {
+            continue;
+        }
+        let next = article_children.next().expect("no next child");
+        assert!(instruction.is_none());
+        let mut instr = String::new();
+        collect_text(&mut instr, &next);
+        instruction = Some(instr);
+    }
+
+    let instruction = match instruction {
+        Some(s) => s.trim().to_lowercase(),
+        None => panic!("can't find instruction for `{}`", name),
+    };
+
+    Intrinsic {
+        name,
+        ret: if ret == "void" {
+            None
+        } else {
+            Some(parse_ty(ret))
+        },
+        instruction,
+        arguments: args // "(...)"
+            .trim_start_matches("(") // "...)"
+            .trim_end_matches(")") // "..."
+            .split(",") // " Type name ", ".."
+            .map(|s| s.trim()) // "Type name"
+            .map(|s| s.rsplitn(2, ' ').nth(1).unwrap()) // "Type"
+            .map(|s| {
+                let const_ = "const ";
+                if s.starts_with(const_) {
+                    (parse_ty(&s[const_.len()..]), true)
+                } else {
+                    (parse_ty(s), false)
+                }
+            })
+            .collect(),
+    }
+}
+
+fn parse_ty(s: &str) -> Type {
+    let suffix = " const *";
+    if s.ends_with(suffix) {
+        Type::ConstPtr(parse_ty_base(&s[..s.len() - suffix.len()]))
+    } else if s.ends_with(" *") {
+        Type::MutPtr(parse_ty_base(&s[..s.len() - 2]))
+    } else {
+        *parse_ty_base(s)
+    }
+}
+
+fn parse_ty_base(s: &str) -> &'static Type {
+    match s {
+        "float16_t" => &F16,
+        "float16x4_t" => &F16X4,
+        "float16x4x2_t" => &F16X4X2,
+        "float16x4x3_t" => &F16X4X3,
+        "float16x4x4_t" => &F16X4X4,
+        "float16x8_t" => &F16X8,
+        "float16x8x2_t" => &F16X8X2,
+        "float16x8x3_t" => &F16X8X3,
+        "float16x8x4_t" => &F16X8X4,
+        "float32_t" => &F32,
+        "float32x2_t" => &F32X2,
+        "float32x2x2_t" => &F32X2X2,
+        "float32x2x3_t" => &F32X2X3,
+        "float32x2x4_t" => &F32X2X4,
+        "float32x4_t" => &F32X4,
+        "float32x4x2_t" => &F32X4X2,
+        "float32x4x3_t" => &F32X4X3,
+        "float32x4x4_t" => &F32X4X4,
+        "float64_t" => &F64,
+        "float64x1_t" => &F64X1,
+        "float64x1x2_t" => &F64X1X2,
+        "float64x1x3_t" => &F64X1X3,
+        "float64x1x4_t" => &F64X1X4,
+        "float64x2_t" => &F64X2,
+        "float64x2x2_t" => &F64X2X2,
+        "float64x2x3_t" => &F64X2X3,
+        "float64x2x4_t" => &F64X2X4,
+        "int16_t" => &I16,
+        "int16x2_t" => &I16X2,
+        "int16x4_t" => &I16X4,
+        "int16x4x2_t" => &I16X4X2,
+        "int16x4x3_t" => &I16X4X3,
+        "int16x4x4_t" => &I16X4X4,
+        "int16x8_t" => &I16X8,
+        "int16x8x2_t" => &I16X8X2,
+        "int16x8x3_t" => &I16X8X3,
+        "int16x8x4_t" => &I16X8X4,
+        "int32_t" | "int" => &I32,
+        "int32x2_t" => &I32X2,
+        "int32x2x2_t" => &I32X2X2,
+        "int32x2x3_t" => &I32X2X3,
+        "int32x2x4_t" => &I32X2X4,
+        "int32x4_t" => &I32X4,
+        "int32x4x2_t" => &I32X4X2,
+        "int32x4x3_t" => &I32X4X3,
+        "int32x4x4_t" => &I32X4X4,
+        "int64_t" => &I64,
+        "int64x1_t" => &I64X1,
+        "int64x1x2_t" => &I64X1X2,
+        "int64x1x3_t" => &I64X1X3,
+        "int64x1x4_t" => &I64X1X4,
+        "int64x2_t" => &I64X2,
+        "int64x2x2_t" => &I64X2X2,
+        "int64x2x3_t" => &I64X2X3,
+        "int64x2x4_t" => &I64X2X4,
+        "int8_t" => &I8,
+        "int8x16_t" => &I8X16,
+        "int8x16x2_t" => &I8X16X2,
+        "int8x16x3_t" => &I8X16X3,
+        "int8x16x4_t" => &I8X16X4,
+        "int8x4_t" => &I8X4,
+        "int8x8_t" => &I8X8,
+        "int8x8x2_t" => &I8X8X2,
+        "int8x8x3_t" => &I8X8X3,
+        "int8x8x4_t" => &I8X8X4,
+        "poly128_t" => &P128,
+        "poly16_t" => &P16,
+        "poly16x4_t" => &POLY16X4,
+        "poly16x4x2_t" => &P16X4X2,
+        "poly16x4x3_t" => &P16X4X3,
+        "poly16x4x4_t" => &P16X4X4,
+        "poly16x8_t" => &POLY16X8,
+        "poly16x8x2_t" => &P16X8X2,
+        "poly16x8x3_t" => &P16X8X3,
+        "poly16x8x4_t" => &P16X8X4,
+        "poly64_t" => &P64,
+        "poly64x1_t" => &POLY64X1,
+        "poly64x1x2_t" => &P64X1X2,
+        "poly64x1x3_t" => &P64X1X3,
+        "poly64x1x4_t" => &P64X1X4,
+        "poly64x2_t" => &POLY64X2,
+        "poly64x2x2_t" => &P64X2X2,
+        "poly64x2x3_t" => &P64X2X3,
+        "poly64x2x4_t" => &P64X2X4,
+        "poly8_t" => &P8,
+        "poly8x16_t" => &POLY8X16,
+        "poly8x16x2_t" => &POLY8X16X2,
+        "poly8x16x3_t" => &POLY8X16X3,
+        "poly8x16x4_t" => &POLY8X16X4,
+        "poly8x8_t" => &POLY8X8,
+        "poly8x8x2_t" => &POLY8X8X2,
+        "poly8x8x3_t" => &POLY8X8X3,
+        "poly8x8x4_t" => &POLY8X8X4,
+        "uint16_t" => &U16,
+        "uint16x4_t" => &U16X4,
+        "uint16x4x2_t" => &U16X4X2,
+        "uint16x4x3_t" => &U16X4X3,
+        "uint16x4x4_t" => &U16X4X4,
+        "uint16x8_t" => &U16X8,
+        "uint16x8x2_t" => &U16X8X2,
+        "uint16x8x3_t" => &U16X8X3,
+        "uint16x8x4_t" => &U16X8X4,
+        "uint32_t" => &U32,
+        "uint32x2_t" => &U32X2,
+        "uint32x2x2_t" => &U32X2X2,
+        "uint32x2x3_t" => &U32X2X3,
+        "uint32x2x4_t" => &U32X2X4,
+        "uint32x4_t" => &U32X4,
+        "uint32x4x2_t" => &U32X4X2,
+        "uint32x4x3_t" => &U32X4X3,
+        "uint32x4x4_t" => &U32X4X4,
+        "uint64_t" => &U64,
+        "uint64x1_t" => &U64X1,
+        "uint64x1x2_t" => &U64X1X2,
+        "uint64x1x3_t" => &U64X1X3,
+        "uint64x1x4_t" => &U64X1X4,
+        "uint64x2_t" => &U64X2,
+        "uint64x2x2_t" => &U64X2X2,
+        "uint64x2x3_t" => &U64X2X3,
+        "uint64x2x4_t" => &U64X2X4,
+        "uint8_t" => &U8,
+        "uint8x16_t" => &U8X16,
+        "uint8x16x2_t" => &U8X16X2,
+        "uint8x16x3_t" => &U8X16X3,
+        "uint8x16x4_t" => &U8X16X4,
+        "uint8x8_t" => &U8X8,
+        "uint8x8x2_t" => &U8X8X2,
+        "uint8x8x3_t" => &U8X8X3,
+        "uint8x8x4_t" => &U8X8X4,
+
+        _ => panic!("failed to parse html type {:?}", s),
+    }
+}
+
+fn collect_text(s: &mut String, node: &Node) {
+    if let NodeData::Text { contents } = &node.data {
+        s.push_str(" ");
+        s.push_str(&contents.borrow().to_string());
+    }
+    for child in node.children.borrow().iter() {
+        collect_text(s, child);
+    }
+}
diff --git a/library/stdarch/crates/stdarch-verify/tests/mips.rs b/library/stdarch/crates/stdarch-verify/tests/mips.rs
new file mode 100644
index 00000000000..9dba12b95a7
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/tests/mips.rs
@@ -0,0 +1,335 @@
+//! Verification of MIPS MSA intrinsics
+#![feature(try_trait)]
+#![allow(bad_style, unused)]
+
+// This file is obtained from
+// https://gcc.gnu.org/onlinedocs//gcc/MIPS-SIMD-Architecture-Built-in-Functions.html
+static HEADER: &str = include_str!("../mips-msa.h");
+
+stdarch_verify::mips_functions!(static FUNCTIONS);
+
+struct Function {
+    name: &'static str,
+    arguments: &'static [&'static Type],
+    ret: Option<&'static Type>,
+    target_feature: Option<&'static str>,
+    instrs: &'static [&'static str],
+    file: &'static str,
+    required_const: &'static [usize],
+}
+
+static F16: Type = Type::PrimFloat(16);
+static F32: Type = Type::PrimFloat(32);
+static F64: Type = Type::PrimFloat(64);
+static I8: Type = Type::PrimSigned(8);
+static I16: Type = Type::PrimSigned(16);
+static I32: Type = Type::PrimSigned(32);
+static I64: Type = Type::PrimSigned(64);
+static U8: Type = Type::PrimUnsigned(8);
+static U16: Type = Type::PrimUnsigned(16);
+static U32: Type = Type::PrimUnsigned(32);
+static U64: Type = Type::PrimUnsigned(64);
+static NEVER: Type = Type::Never;
+static TUPLE: Type = Type::Tuple;
+static v16i8: Type = Type::I(8, 16, 1);
+static v8i16: Type = Type::I(16, 8, 1);
+static v4i32: Type = Type::I(32, 4, 1);
+static v2i64: Type = Type::I(64, 2, 1);
+static v16u8: Type = Type::U(8, 16, 1);
+static v8u16: Type = Type::U(16, 8, 1);
+static v4u32: Type = Type::U(32, 4, 1);
+static v2u64: Type = Type::U(64, 2, 1);
+static v8f16: Type = Type::F(16, 8, 1);
+static v4f32: Type = Type::F(32, 4, 1);
+static v2f64: Type = Type::F(64, 2, 1);
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+enum Type {
+    PrimFloat(u8),
+    PrimSigned(u8),
+    PrimUnsigned(u8),
+    PrimPoly(u8),
+    MutPtr(&'static Type),
+    ConstPtr(&'static Type),
+    Tuple,
+    I(u8, u8, u8),
+    U(u8, u8, u8),
+    P(u8, u8, u8),
+    F(u8, u8, u8),
+    Never,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+#[allow(non_camel_case_types)]
+enum MsaTy {
+    v16i8,
+    v8i16,
+    v4i32,
+    v2i64,
+    v16u8,
+    v8u16,
+    v4u32,
+    v2u64,
+    v8f16,
+    v4f32,
+    v2f64,
+    imm0_1,
+    imm0_3,
+    imm0_7,
+    imm0_15,
+    imm0_31,
+    imm0_63,
+    imm0_255,
+    imm_n16_15,
+    imm_n512_511,
+    imm_n1024_1022,
+    imm_n2048_2044,
+    imm_n4096_4088,
+    i32,
+    u32,
+    i64,
+    u64,
+    Void,
+    MutVoidPtr,
+}
+
+impl<'a> From<&'a str> for MsaTy {
+    fn from(s: &'a str) -> MsaTy {
+        match s {
+            "v16i8" => MsaTy::v16i8,
+            "v8i16" => MsaTy::v8i16,
+            "v4i32" => MsaTy::v4i32,
+            "v2i64" => MsaTy::v2i64,
+            "v16u8" => MsaTy::v16u8,
+            "v8u16" => MsaTy::v8u16,
+            "v4u32" => MsaTy::v4u32,
+            "v2u64" => MsaTy::v2u64,
+            "v8f16" => MsaTy::v8f16,
+            "v4f32" => MsaTy::v4f32,
+            "v2f64" => MsaTy::v2f64,
+            "imm0_1" => MsaTy::imm0_1,
+            "imm0_3" => MsaTy::imm0_3,
+            "imm0_7" => MsaTy::imm0_7,
+            "imm0_15" => MsaTy::imm0_15,
+            "imm0_31" => MsaTy::imm0_31,
+            "imm0_63" => MsaTy::imm0_63,
+            "imm0_255" => MsaTy::imm0_255,
+            "imm_n16_15" => MsaTy::imm_n16_15,
+            "imm_n512_511" => MsaTy::imm_n512_511,
+            "imm_n1024_1022" => MsaTy::imm_n1024_1022,
+            "imm_n2048_2044" => MsaTy::imm_n2048_2044,
+            "imm_n4096_4088" => MsaTy::imm_n4096_4088,
+            "i32" => MsaTy::i32,
+            "u32" => MsaTy::u32,
+            "i64" => MsaTy::i64,
+            "u64" => MsaTy::u64,
+            "void" => MsaTy::Void,
+            "void *" => MsaTy::MutVoidPtr,
+            v => panic!("unknown ty: \"{}\"", v),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+struct MsaIntrinsic {
+    id: String,
+    arg_tys: Vec<MsaTy>,
+    ret_ty: MsaTy,
+    instruction: String,
+}
+
+impl std::convert::TryFrom<&'static str> for MsaIntrinsic {
+    // The intrinsics are just C function declarations of the form:
+    // $ret_ty __builtin_${fn_id}($($arg_ty),*);
+    type Error = std::option::NoneError;
+    fn try_from(line: &'static str) -> Result<Self, Self::Error> {
+        let first_whitespace = line.find(char::is_whitespace)?;
+        let ret_ty = &line[0..first_whitespace];
+        let ret_ty = MsaTy::from(ret_ty);
+
+        let first_parentheses = line.find('(')?;
+        assert!(first_parentheses > first_whitespace);
+        let id = &line[first_whitespace + 1..first_parentheses].trim();
+        assert!(id.starts_with("__builtin"));
+        let mut id_str = "_".to_string();
+        id_str += &id[9..];
+        let id = id_str;
+
+        let mut arg_tys = Vec::new();
+
+        let last_parentheses = line.find(')')?;
+        for arg in (&line[first_parentheses + 1..last_parentheses]).split(',') {
+            let arg = arg.trim();
+            arg_tys.push(MsaTy::from(arg));
+        }
+
+        // The instruction is the intrinsic name without the __msa_ prefix.
+        let instruction = &id[6..];
+        let mut instruction = instruction.to_string();
+        // With all underscores but the first one replaced with a `.`
+        if let Some(first_underscore) = instruction.find('_') {
+            let postfix = instruction[first_underscore + 1..].replace('_', ".");
+            instruction = instruction[0..=first_underscore].to_string();
+            instruction += &postfix;
+        }
+
+        Ok(MsaIntrinsic {
+            id,
+            ret_ty,
+            arg_tys,
+            instruction,
+        })
+    }
+}
+
+#[test]
+fn verify_all_signatures() {
+    // Parse the C intrinsic header file:
+    let mut intrinsics = std::collections::HashMap::<String, MsaIntrinsic>::new();
+    for line in HEADER.lines() {
+        if line.is_empty() {
+            continue;
+        }
+
+        use std::convert::TryFrom;
+        let intrinsic: MsaIntrinsic =
+            TryFrom::try_from(line).expect(&format!("failed to parse line: \"{}\"", line));
+        assert!(!intrinsics.contains_key(&intrinsic.id));
+        intrinsics.insert(intrinsic.id.clone(), intrinsic);
+    }
+
+    let mut all_valid = true;
+    for rust in FUNCTIONS {
+        // Skip some intrinsics that aren't part of MSA
+        match rust.name {
+            "break_" => continue,
+            _ => {}
+        }
+        let mips = match intrinsics.get(rust.name) {
+            Some(i) => i,
+            None => {
+                eprintln!(
+                    "missing mips definition for {:?} in {}",
+                    rust.name, rust.file
+                );
+                all_valid = false;
+                continue;
+            }
+        };
+
+        if let Err(e) = matches(rust, mips) {
+            println!("failed to verify `{}`", rust.name);
+            println!("  * {}", e);
+            all_valid = false;
+        }
+    }
+    assert!(all_valid);
+}
+
+fn matches(rust: &Function, mips: &MsaIntrinsic) -> Result<(), String> {
+    macro_rules! bail {
+        ($($t:tt)*) => (return Err(format!($($t)*)))
+    }
+
+    if rust.ret.is_none() && mips.ret_ty != MsaTy::Void {
+        bail!("mismatched return value")
+    }
+
+    if rust.arguments.len() != mips.arg_tys.len() {
+        bail!("mismatched argument lengths");
+    }
+
+    let mut nconst = 0;
+    for (i, (rust_arg, mips_arg)) in rust.arguments.iter().zip(mips.arg_tys.iter()).enumerate() {
+        match mips_arg {
+            MsaTy::v16i8 if **rust_arg == v16i8 => (),
+            MsaTy::v8i16 if **rust_arg == v8i16 => (),
+            MsaTy::v4i32 if **rust_arg == v4i32 => (),
+            MsaTy::v2i64 if **rust_arg == v2i64 => (),
+            MsaTy::v16u8 if **rust_arg == v16u8 => (),
+            MsaTy::v8u16 if **rust_arg == v8u16 => (),
+            MsaTy::v4u32 if **rust_arg == v4u32 => (),
+            MsaTy::v2u64 if **rust_arg == v2u64 => (),
+            MsaTy::v4f32 if **rust_arg == v4f32 => (),
+            MsaTy::v2f64 if **rust_arg == v2f64 => (),
+            MsaTy::imm0_1
+            | MsaTy::imm0_3
+            | MsaTy::imm0_7
+            | MsaTy::imm0_15
+            | MsaTy::imm0_31
+            | MsaTy::imm0_63
+            | MsaTy::imm0_255
+            | MsaTy::imm_n16_15
+            | MsaTy::imm_n512_511
+            | MsaTy::imm_n1024_1022
+            | MsaTy::imm_n2048_2044
+            | MsaTy::imm_n4096_4088
+                if **rust_arg == I32 =>
+            {
+                ()
+            }
+            MsaTy::i32 if **rust_arg == I32 => (),
+            MsaTy::i64 if **rust_arg == I64 => (),
+            MsaTy::u32 if **rust_arg == U32 => (),
+            MsaTy::u64 if **rust_arg == U64 => (),
+            MsaTy::MutVoidPtr if **rust_arg == Type::MutPtr(&U8) => (),
+            m => bail!(
+                "mismatched argument \"{}\"= \"{:?}\" != \"{:?}\"",
+                i,
+                m,
+                *rust_arg
+            ),
+        }
+
+        let is_const = match mips_arg {
+            MsaTy::imm0_1
+            | MsaTy::imm0_3
+            | MsaTy::imm0_7
+            | MsaTy::imm0_15
+            | MsaTy::imm0_31
+            | MsaTy::imm0_63
+            | MsaTy::imm0_255
+            | MsaTy::imm_n16_15
+            | MsaTy::imm_n512_511
+            | MsaTy::imm_n1024_1022
+            | MsaTy::imm_n2048_2044
+            | MsaTy::imm_n4096_4088 => true,
+            _ => false,
+        };
+        if is_const {
+            nconst += 1;
+            if !rust.required_const.contains(&i) {
+                bail!("argument const mismatch");
+            }
+        }
+    }
+
+    if nconst != rust.required_const.len() {
+        bail!("wrong number of const arguments");
+    }
+
+    if rust.target_feature != Some("msa") {
+        bail!("wrong target_feature");
+    }
+
+    if !rust.instrs.is_empty() {
+        // Normalize slightly to get rid of assembler differences
+        let actual = rust.instrs[0].replace(".", "_");
+        let expected = mips.instruction.replace(".", "_");
+        if actual != expected {
+            bail!(
+                "wrong instruction: \"{}\" != \"{}\"",
+                rust.instrs[0],
+                mips.instruction
+            );
+        }
+    } else {
+        bail!(
+            "missing assert_instr for \"{}\" (should be \"{}\")",
+            mips.id,
+            mips.instruction
+        );
+    }
+
+    Ok(())
+}
diff --git a/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
new file mode 100644
index 00000000000..4a40dbb8bf8
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
@@ -0,0 +1,537 @@
+#![allow(bad_style)]
+#![allow(
+    clippy::shadow_reuse,
+    clippy::cast_lossless,
+    clippy::match_same_arms,
+    clippy::nonminimal_bool,
+    clippy::print_stdout,
+    clippy::use_debug,
+    clippy::eq_op,
+    clippy::useless_format
+)]
+
+use std::collections::{BTreeMap, HashMap};
+
+use serde::Deserialize;
+
+const PRINT_INSTRUCTION_VIOLATIONS: bool = false;
+const PRINT_MISSING_LISTS: bool = false;
+const PRINT_MISSING_LISTS_MARKDOWN: bool = false;
+
+struct Function {
+    name: &'static str,
+    arguments: &'static [&'static Type],
+    ret: Option<&'static Type>,
+    target_feature: Option<&'static str>,
+    instrs: &'static [&'static str],
+    file: &'static str,
+    required_const: &'static [usize],
+}
+
+static F32: Type = Type::PrimFloat(32);
+static F64: Type = Type::PrimFloat(64);
+static I16: Type = Type::PrimSigned(16);
+static I32: Type = Type::PrimSigned(32);
+static I64: Type = Type::PrimSigned(64);
+static I8: Type = Type::PrimSigned(8);
+static U16: Type = Type::PrimUnsigned(16);
+static U32: Type = Type::PrimUnsigned(32);
+static U64: Type = Type::PrimUnsigned(64);
+static U128: Type = Type::PrimUnsigned(128);
+static U8: Type = Type::PrimUnsigned(8);
+static ORDERING: Type = Type::Ordering;
+
+static M64: Type = Type::M64;
+static M128: Type = Type::M128;
+static M128I: Type = Type::M128I;
+static M128D: Type = Type::M128D;
+static M256: Type = Type::M256;
+static M256I: Type = Type::M256I;
+static M256D: Type = Type::M256D;
+static M512: Type = Type::M512;
+static M512I: Type = Type::M512I;
+static M512D: Type = Type::M512D;
+static MMASK16: Type = Type::MMASK16;
+
+static TUPLE: Type = Type::Tuple;
+static CPUID: Type = Type::CpuidResult;
+static NEVER: Type = Type::Never;
+
+#[derive(Debug)]
+enum Type {
+    PrimFloat(u8),
+    PrimSigned(u8),
+    PrimUnsigned(u8),
+    MutPtr(&'static Type),
+    ConstPtr(&'static Type),
+    M64,
+    M128,
+    M128D,
+    M128I,
+    M256,
+    M256D,
+    M256I,
+    M512,
+    M512D,
+    M512I,
+    MMASK16,
+    Tuple,
+    CpuidResult,
+    Never,
+    Ordering,
+}
+
+stdarch_verify::x86_functions!(static FUNCTIONS);
+
+#[derive(Deserialize)]
+struct Data {
+    #[serde(rename = "intrinsic", default)]
+    intrinsics: Vec<Intrinsic>,
+}
+
+#[derive(Deserialize)]
+struct Intrinsic {
+    rettype: String,
+    name: String,
+    #[serde(rename = "CPUID", default)]
+    cpuid: Vec<String>,
+    #[serde(rename = "parameter", default)]
+    parameters: Vec<Parameter>,
+    #[serde(default)]
+    instruction: Vec<Instruction>,
+}
+
+#[derive(Deserialize)]
+struct Parameter {
+    #[serde(rename = "type")]
+    type_: String,
+}
+
+#[derive(Deserialize, Debug)]
+struct Instruction {
+    name: String,
+}
+
+macro_rules! bail {
+    ($($t:tt)*) => (return Err(format!($($t)*)))
+}
+
+#[test]
+fn verify_all_signatures() {
+    // This XML document was downloaded from Intel's site. To update this you
+    // can visit intel's intrinsics guide online documentation:
+    //
+    //   https://software.intel.com/sites/landingpage/IntrinsicsGuide/#
+    //
+    // Open up the network console and you'll see an xml file was downloaded
+    // (currently called data-3.4.xml). That's the file we downloaded
+    // here.
+    let xml = include_bytes!("../x86-intel.xml");
+
+    let xml = &xml[..];
+    let data: Data = serde_xml_rs::from_reader(xml).expect("failed to deserialize xml");
+    let mut map = HashMap::new();
+    for intrinsic in &data.intrinsics {
+        map.entry(&intrinsic.name[..])
+            .or_insert_with(Vec::new)
+            .push(intrinsic);
+    }
+
+    let mut all_valid = true;
+    'outer: for rust in FUNCTIONS {
+        match rust.name {
+            // These aren't defined by Intel but they're defined by what appears
+            // to be all other compilers. For more information see
+            // rust-lang/stdarch#307, and otherwise these signatures
+            // have all been manually verified.
+            "__readeflags" |
+            "__writeeflags" |
+            "__cpuid_count" |
+            "__cpuid" |
+            "__get_cpuid_max" |
+            // Not listed with intel, but manually verified
+            "cmpxchg16b" |
+            // The UD2 intrinsic is not defined by Intel, but it was agreed on
+            // in the RFC Issue 2512:
+            // https://github.com/rust-lang/rfcs/issues/2512
+            "ud2"
+                => continue,
+            // Intel requires the mask argument for _mm_shuffle_ps to be an
+            // unsigned integer, but all other _mm_shuffle_.. intrinsics
+            // take a signed-integer. This breaks `_MM_SHUFFLE` for
+            // `_mm_shuffle_ps`:
+            "_mm_shuffle_ps" => continue,
+            _ => {}
+        }
+
+        // these are all AMD-specific intrinsics
+        if let Some(feature) = rust.target_feature {
+            if feature.contains("sse4a") || feature.contains("tbm") {
+                continue;
+            }
+        }
+
+        let intel = match map.remove(rust.name) {
+            Some(i) => i,
+            None => panic!("missing intel definition for {}", rust.name),
+        };
+
+        let mut errors = Vec::new();
+        for intel in intel {
+            match matches(rust, intel) {
+                Ok(()) => continue 'outer,
+                Err(e) => errors.push(e),
+            }
+        }
+        println!("failed to verify `{}`", rust.name);
+        for error in errors {
+            println!("  * {}", error);
+        }
+        all_valid = false;
+    }
+    assert!(all_valid);
+
+    let mut missing = BTreeMap::new();
+    for (name, intel) in &map {
+        // currently focused mainly on missing SIMD intrinsics, but there's
+        // definitely some other assorted ones that we're missing.
+        if !name.starts_with("_mm") {
+            continue;
+        }
+
+        // we'll get to avx-512 later
+        // let avx512 = intel.iter().any(|i| {
+        //     i.name.starts_with("_mm512") || i.cpuid.iter().any(|c| {
+        //         c.contains("512")
+        //     })
+        // });
+        // if avx512 {
+        //     continue
+        // }
+
+        for intel in intel {
+            missing
+                .entry(&intel.cpuid)
+                .or_insert_with(Vec::new)
+                .push(intel);
+        }
+    }
+
+    // generate a bulleted list of missing intrinsics
+    if PRINT_MISSING_LISTS || PRINT_MISSING_LISTS_MARKDOWN {
+        for (k, v) in missing {
+            if PRINT_MISSING_LISTS_MARKDOWN {
+                println!("\n<details><summary>{:?}</summary><p>\n", k);
+                for intel in v {
+                    let url = format!(
+                        "https://software.intel.com/sites/landingpage\
+                         /IntrinsicsGuide/#text={}&expand=5236",
+                        intel.name
+                    );
+                    println!("  * [ ] [`{}`]({})", intel.name, url);
+                }
+                println!("</p></details>\n");
+            } else {
+                println!("\n{:?}\n", k);
+                for intel in v {
+                    println!("\t{}", intel.name);
+                }
+            }
+        }
+    }
+}
+
+fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
+    // Verify that all `#[target_feature]` annotations are correct,
+    // ensuring that we've actually enabled the right instruction
+    // set for this intrinsic.
+    match rust.name {
+        "_bswap" | "_bswap64" => {}
+
+        // These don't actually have a target feature unlike their brethren with
+        // the `x` inside the name which requires adx
+        "_addcarry_u32" | "_addcarry_u64" | "_subborrow_u32" | "_subborrow_u64" => {}
+
+        "_bittest"
+        | "_bittestandset"
+        | "_bittestandreset"
+        | "_bittestandcomplement"
+        | "_bittest64"
+        | "_bittestandset64"
+        | "_bittestandreset64"
+        | "_bittestandcomplement64" => {}
+
+        _ => {
+            if intel.cpuid.is_empty() {
+                bail!("missing cpuid for {}", rust.name);
+            }
+        }
+    }
+
+    for cpuid in &intel.cpuid {
+        // The pause intrinsic is in the SSE2 module, but it is backwards
+        // compatible with CPUs without SSE2, and it therefore does not need the
+        // target-feature attribute.
+        if rust.name == "_mm_pause" {
+            continue;
+        }
+        // this is needed by _xsave and probably some related intrinsics,
+        // but let's just skip it for now.
+        if *cpuid == "XSS" {
+            continue;
+        }
+
+        // these flags on the rdtsc/rtdscp intrinsics we don't test for right
+        // now, but we may wish to add these one day!
+        //
+        // For more info see #308
+        if *cpuid == "TSC" || *cpuid == "RDTSCP" {
+            continue;
+        }
+
+        let cpuid = cpuid
+            .chars()
+            .flat_map(|c| c.to_lowercase())
+            .collect::<String>();
+
+        // Fix mismatching feature names:
+        let fixup_cpuid = |cpuid: String| match cpuid.as_ref() {
+            // The XML file names IFMA as "avx512ifma52", while Rust calls
+            // it "avx512ifma".
+            "avx512ifma52" => String::from("avx512ifma"),
+            // See: https://github.com/rust-lang/stdarch/issues/738
+            // The intrinsics guide calls `f16c` `fp16c` in disagreement with
+            // Intel's architecture manuals.
+            "fp16c" => String::from("f16c"),
+            _ => cpuid,
+        };
+        let fixed_cpuid = fixup_cpuid(cpuid);
+
+        let rust_feature = rust
+            .target_feature
+            .expect(&format!("no target feature listed for {}", rust.name));
+
+        if rust_feature.contains(&fixed_cpuid) {
+            continue;
+        }
+        bail!(
+            "intel cpuid `{}` not in `{}` for {}",
+            fixed_cpuid,
+            rust_feature,
+            rust.name
+        )
+    }
+
+    if PRINT_INSTRUCTION_VIOLATIONS {
+        if rust.instrs.is_empty() {
+            if !intel.instruction.is_empty() {
+                println!(
+                    "instruction not listed for `{}`, but intel lists {:?}",
+                    rust.name, intel.instruction
+                );
+            }
+
+        // If intel doesn't list any instructions and we do then don't
+        // bother trying to look for instructions in intel, we've just got
+        // some extra assertions on our end.
+        } else if !intel.instruction.is_empty() {
+            for instr in rust.instrs {
+                let asserting = intel.instruction.iter().any(|a| a.name.starts_with(instr));
+                if !asserting {
+                    println!(
+                        "intel failed to list `{}` as an instruction for `{}`",
+                        instr, rust.name
+                    );
+                }
+            }
+        }
+    }
+
+    // Make sure we've got the right return type.
+    if let Some(t) = rust.ret {
+        equate(t, &intel.rettype, rust.name, false)?;
+    } else if intel.rettype != "" && intel.rettype != "void" {
+        bail!(
+            "{} returns `{}` with intel, void in rust",
+            rust.name,
+            intel.rettype
+        )
+    }
+
+    // If there's no arguments on Rust's side intel may list one "void"
+    // argument, so handle that here.
+    if rust.arguments.is_empty() && intel.parameters.len() == 1 {
+        if intel.parameters[0].type_ != "void" {
+            bail!("rust has 0 arguments, intel has one for")
+        }
+    } else {
+        // Otherwise we want all parameters to be exactly the same
+        if rust.arguments.len() != intel.parameters.len() {
+            bail!("wrong number of arguments on {}", rust.name)
+        }
+        for (i, (a, b)) in intel.parameters.iter().zip(rust.arguments).enumerate() {
+            let is_const = rust.required_const.contains(&i);
+            equate(b, &a.type_, &intel.name, is_const)?;
+        }
+    }
+
+    let any_i64 = rust
+        .arguments
+        .iter()
+        .cloned()
+        .chain(rust.ret)
+        .any(|arg| match *arg {
+            Type::PrimSigned(64) | Type::PrimUnsigned(64) => true,
+            _ => false,
+        });
+    let any_i64_exempt = match rust.name {
+        // These intrinsics have all been manually verified against Clang's
+        // headers to be available on x86, and the u64 arguments seem
+        // spurious I guess?
+        "_xsave" | "_xrstor" | "_xsetbv" | "_xgetbv" | "_xsaveopt" | "_xsavec" | "_xsaves"
+        | "_xrstors" => true,
+
+        // Apparently all of clang/msvc/gcc accept these intrinsics on
+        // 32-bit, so let's do the same
+        "_mm_set_epi64x" | "_mm_set1_epi64x" | "_mm256_set_epi64x" | "_mm256_setr_epi64x"
+        | "_mm256_set1_epi64x" | "_mm512_set1_epi64" => true,
+
+        // These return a 64-bit argument but they're assembled from other
+        // 32-bit registers, so these work on 32-bit just fine. See #308 for
+        // more info.
+        "_rdtsc" | "__rdtscp" => true,
+
+        _ => false,
+    };
+    if any_i64 && !any_i64_exempt && !rust.file.contains("x86_64") {
+        bail!(
+            "intrinsic `{}` uses a 64-bit bare type but may be \
+             available on 32-bit platforms",
+            rust.name
+        )
+    }
+    Ok(())
+}
+
+fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(), String> {
+    // Make pointer adjacent to the type: float * foo => float* foo
+    let mut intel = intel.replace(" *", "*");
+    // Make mutability modifier adjacent to the pointer:
+    // float const * foo => float const* foo
+    intel = intel.replace("const *", "const*");
+    // Normalize mutability modifier to after the type:
+    // const float* foo => float const*
+    if intel.starts_with("const") && intel.ends_with("*") {
+        intel = intel.replace("const ", "");
+        intel = intel.replace("*", " const*");
+    }
+    let require_const = || {
+        if is_const {
+            return Ok(());
+        }
+        Err(format!("argument required to be const but isn't"))
+    };
+    match (t, &intel[..]) {
+        (&Type::PrimFloat(32), "float") => {}
+        (&Type::PrimFloat(64), "double") => {}
+        (&Type::PrimSigned(16), "__int16") => {}
+        (&Type::PrimSigned(16), "short") => {}
+        (&Type::PrimSigned(32), "__int32") => {}
+        (&Type::PrimSigned(32), "const int") => require_const()?,
+        (&Type::PrimSigned(32), "int") => {}
+        (&Type::PrimSigned(64), "__int64") => {}
+        (&Type::PrimSigned(64), "long long") => {}
+        (&Type::PrimSigned(8), "__int8") => {}
+        (&Type::PrimSigned(8), "char") => {}
+        (&Type::PrimUnsigned(16), "unsigned short") => {}
+        (&Type::PrimUnsigned(32), "unsigned int") => {}
+        (&Type::PrimUnsigned(32), "const unsigned int") => {}
+        (&Type::PrimUnsigned(64), "unsigned __int64") => {}
+        (&Type::PrimUnsigned(8), "unsigned char") => {}
+        (&Type::M64, "__m64") => {}
+        (&Type::M128, "__m128") => {}
+        (&Type::M128I, "__m128i") => {}
+        (&Type::M128D, "__m128d") => {}
+        (&Type::M256, "__m256") => {}
+        (&Type::M256I, "__m256i") => {}
+        (&Type::M256D, "__m256d") => {}
+        (&Type::M512, "__m512") => {}
+        (&Type::M512I, "__m512i") => {}
+        (&Type::M512D, "__m512d") => {}
+
+        (&Type::MutPtr(&Type::PrimFloat(32)), "float*") => {}
+        (&Type::MutPtr(&Type::PrimFloat(64)), "double*") => {}
+        (&Type::MutPtr(&Type::PrimSigned(32)), "int*") => {}
+        (&Type::MutPtr(&Type::PrimSigned(32)), "__int32*") => {}
+        (&Type::MutPtr(&Type::PrimSigned(64)), "__int64*") => {}
+        (&Type::MutPtr(&Type::PrimSigned(8)), "char*") => {}
+        (&Type::MutPtr(&Type::PrimUnsigned(16)), "unsigned short*") => {}
+        (&Type::MutPtr(&Type::PrimUnsigned(32)), "unsigned int*") => {}
+        (&Type::MutPtr(&Type::PrimUnsigned(64)), "unsigned __int64*") => {}
+        (&Type::MutPtr(&Type::PrimUnsigned(8)), "void*") => {}
+        (&Type::MutPtr(&Type::M64), "__m64*") => {}
+        (&Type::MutPtr(&Type::M128), "__m128*") => {}
+        (&Type::MutPtr(&Type::M128I), "__m128i*") => {}
+        (&Type::MutPtr(&Type::M128D), "__m128d*") => {}
+        (&Type::MutPtr(&Type::M256), "__m256*") => {}
+        (&Type::MutPtr(&Type::M256I), "__m256i*") => {}
+        (&Type::MutPtr(&Type::M256D), "__m256d*") => {}
+        (&Type::MutPtr(&Type::M512), "__m512*") => {}
+        (&Type::MutPtr(&Type::M512I), "__m512i*") => {}
+        (&Type::MutPtr(&Type::M512D), "__m512d*") => {}
+
+        (&Type::ConstPtr(&Type::PrimFloat(32)), "float const*") => {}
+        (&Type::ConstPtr(&Type::PrimFloat(64)), "double const*") => {}
+        (&Type::ConstPtr(&Type::PrimSigned(32)), "int const*") => {}
+        (&Type::ConstPtr(&Type::PrimSigned(32)), "__int32 const*") => {}
+        (&Type::ConstPtr(&Type::PrimSigned(64)), "__int64 const*") => {}
+        (&Type::ConstPtr(&Type::PrimSigned(8)), "char const*") => {}
+        (&Type::ConstPtr(&Type::PrimUnsigned(16)), "unsigned short const*") => {}
+        (&Type::ConstPtr(&Type::PrimUnsigned(32)), "unsigned int const*") => {}
+        (&Type::ConstPtr(&Type::PrimUnsigned(64)), "unsigned __int64 const*") => {}
+        (&Type::ConstPtr(&Type::PrimUnsigned(8)), "void const*") => {}
+        (&Type::ConstPtr(&Type::M64), "__m64 const*") => {}
+        (&Type::ConstPtr(&Type::M128), "__m128 const*") => {}
+        (&Type::ConstPtr(&Type::M128I), "__m128i const*") => {}
+        (&Type::ConstPtr(&Type::M128D), "__m128d const*") => {}
+        (&Type::ConstPtr(&Type::M256), "__m256 const*") => {}
+        (&Type::ConstPtr(&Type::M256I), "__m256i const*") => {}
+        (&Type::ConstPtr(&Type::M256D), "__m256d const*") => {}
+        (&Type::ConstPtr(&Type::M512), "__m512 const*") => {}
+        (&Type::ConstPtr(&Type::M512I), "__m512i const*") => {}
+        (&Type::ConstPtr(&Type::M512D), "__m512d const*") => {}
+
+        (&Type::MMASK16, "__mmask16") => {}
+
+        // This is a macro (?) in C which seems to mutate its arguments, but
+        // that means that we're taking pointers to arguments in rust
+        // as we're not exposing it as a macro.
+        (&Type::MutPtr(&Type::M128), "__m128") if intrinsic == "_MM_TRANSPOSE4_PS" => {}
+
+        // The _rdtsc intrinsic uses a __int64 return type, but this is a bug in
+        // the intrinsics guide: https://github.com/rust-lang/stdarch/issues/559
+        // We have manually fixed the bug by changing the return type to `u64`.
+        (&Type::PrimUnsigned(64), "__int64") if intrinsic == "_rdtsc" => {}
+
+        // The _bittest and _bittest64 intrinsics takes a mutable pointer in the
+        // intrinsics guide even though it never writes through the pointer:
+        (&Type::ConstPtr(&Type::PrimSigned(32)), "__int32*") if intrinsic == "_bittest" => {}
+        (&Type::ConstPtr(&Type::PrimSigned(64)), "__int64*") if intrinsic == "_bittest64" => {}
+        // The _xrstor, _fxrstor, _xrstor64, _fxrstor64 intrinsics take a
+        // mutable pointer in the intrinsics guide even though they never write
+        // through the pointer:
+        (&Type::ConstPtr(&Type::PrimUnsigned(8)), "void*")
+            if intrinsic == "_xrstor"
+                || intrinsic == "_xrstor64"
+                || intrinsic == "_fxrstor"
+                || intrinsic == "_fxrstor64" => {}
+
+        _ => bail!(
+            "failed to equate: `{}` and {:?} for {}",
+            intel,
+            t,
+            intrinsic
+        ),
+    }
+    Ok(())
+}
diff --git a/library/stdarch/crates/stdarch-verify/x86-intel.xml b/library/stdarch/crates/stdarch-verify/x86-intel.xml
new file mode 100644
index 00000000000..c22a3adaeca
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/x86-intel.xml
@@ -0,0 +1,134861 @@
+<intrinsics_list version='3.4' date='09/07/2017'>
+<intrinsic tech='MMX' rettype='__m64' name='_m_from_int64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__int64'/>
+	<description>Copy 64-bit integer "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name='movq' form='mm, r64'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__int64' name='_m_to_int64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m64'/>
+	<description>Copy 64-bit integer "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name='movq' form='r64, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='void' name='_m_empty'>
+	<CPUID>MMX</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures.</description>
+	<instruction name='emms' form=''/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_from_int'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='int'/>
+	<description>Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper element of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[63:32] := 0
+	</operation>
+	<instruction name='movd' form='mm, r32'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='int' name='_m_to_int'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m64'/>
+	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+	</operation>
+	<instruction name='movd' form='r32, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_packsswb'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".
+	</description>
+	<operation>
+dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
+dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
+dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
+dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
+dst[39:32] := Saturate_Int16_To_Int8 (b[15:0])
+dst[47:40] := Saturate_Int16_To_Int8 (b[31:16])
+dst[55:48] := Saturate_Int16_To_Int8 (b[47:32])
+dst[63:56] := Saturate_Int16_To_Int8 (b[63:48])
+	</operation>
+	<instruction name='packsswb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_packssdw'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
+dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
+dst[47:32] := Saturate_Int32_To_Int16 (b[31:0])
+dst[63:48] := Saturate_Int32_To_Int16 (b[63:32])
+	</operation>
+	<instruction name='packssdw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_packuswb'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
+dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
+dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
+dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
+dst[39:32] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
+dst[47:40] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
+dst[55:48] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
+dst[63:56] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
+	</operation>
+	<instruction name='packuswb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_punpckhbw'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]){
+	dst[7:0] := src1[39:32]
+	dst[15:8] := src2[39:32] 
+	dst[23:16] := src1[47:40]
+	dst[31:24] := src2[47:40]
+	dst[39:32] := src1[55:48]
+	dst[47:40] := src2[55:48]
+	dst[55:48] := src1[63:56]
+	dst[63:56] := src2[63:56]
+	RETURN dst[63:0]
+}	
+	
+dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0])
+	</operation>
+	<instruction name='punpckhbw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_punpckhwd'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]){
+	dst[15:0] := src1[47:32]
+	dst[31:16] := src2[47:32]
+	dst[47:32] := src1[63:48]
+	dst[63:48] := src2[63:48]
+	RETURN dst[63:0]
+}
+
+dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0])
+	</operation>
+	<instruction name='punpcklbw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_punpckhdq'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32]
+dst[63:32] := b[63:32]
+	</operation>
+	<instruction name='punpckhdq' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_punpcklbw'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_BYTES(src1[63:0], src2[63:0]){
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	RETURN dst[63:0]
+}	
+
+dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0])
+	</operation>
+	<instruction name='punpcklbw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_punpcklwd'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_WORDS(src1[63:0], src2[63:0]){
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	RETURN dst[63:0]
+}	
+
+dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0])
+	</operation>
+	<instruction name='punpcklwd' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_punpckldq'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[63:32] := b[31:0]
+	</operation>
+	<instruction name='punpckldq' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_paddb'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := a[i+7:i] + b[i+7:i]
+ENDFOR
+	</operation>
+	<instruction name='paddb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_paddw'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := a[i+15:i] + b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name='paddw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_paddd'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='paddd' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_paddsb'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name='paddsb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_paddsw'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+	</operation>
+	<instruction name='paddsw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_paddusb'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name='paddusb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_paddusw'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+	</operation>
+	<instruction name='paddusw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psubb'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := a[i+7:i] - b[i+7:i]
+ENDFOR
+	</operation>
+	<instruction name='psubb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psubw'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := a[i+15:i] - b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name='psubw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psubd'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='psubd' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psubsb'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+	</operation>
+	<instruction name='psubsb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psubsw'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name='psubsw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psubusb'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+	</operation>
+	<instruction name='psubusb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psubusw'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])	
+ENDFOR
+	</operation>
+	<instruction name='psubusw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pmaddwd'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	st[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name='pmaddwd' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pmulhw'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+	</operation>
+	<instruction name='pmulhw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pmullw'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[15:0]
+ENDFOR
+	</operation>
+	<instruction name='pmullw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psllw'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psllw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psllwi'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psllw' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pslld'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pslld' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pslldi'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pslld' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psllq'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift 64-bit integer "a" left by "count" while shifting in zeros, and store the result in "dst". </description>
+	<operation>
+IF count[63:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend(a[63:0] &lt;&lt; count[63:0])
+FI
+	</operation>
+	<instruction name='psllq' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psllqi'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift 64-bit integer "a" left by "imm8" while shifting in zeros, and store the result in "dst". </description>
+	<operation>
+IF imm8[7:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend(a[63:0] &lt;&lt; imm8[7:0])
+FI
+	</operation>
+	<instruction name='psllq' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psraw'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := SignBit
+	ELSE
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psraw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psrawi'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := SignBit
+	ELSE
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psraw' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psrad'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := SignBit
+	ELSE
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrad' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psradi'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := SignBit
+	ELSE
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrad' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psrlw'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrlw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psrlwi'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrlw' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psrld'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrld' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psrldi'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrld' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psrlq'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift 64-bit integer "a" right by "count" while shifting in zeros, and store the result in "dst". </description>
+	<operation>
+IF count[63:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend(a[63:0] &gt;&gt; count[63:0])
+FI
+	</operation>
+	<instruction name='psrlq' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_psrlqi'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift 64-bit integer "a" right by "imm8" while shifting in zeros, and store the result in "dst". </description>
+	<operation>
+IF imm8[7:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend(a[63:0] &gt;&gt; imm8[7:0])
+FI
+	</operation>
+	<instruction name='psrlq' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pand'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compute the bitwise AND of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] AND b[63:0])
+	</operation>
+	<instruction name='pand' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pandn'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compute the bitwise NOT of 64 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := ((NOT a[63:0]) AND b[63:0])
+	</operation>
+	<instruction name='pandn' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_por'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] OR b[63:0])
+	</operation>
+	<instruction name='por' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pxor'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] XOR b[63:0])
+	</operation>
+	<instruction name='pxor' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pcmpeqb'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpeqb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pcmpeqw'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpeqw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pcmpeqd'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpeqd' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pcmpgtb'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] &gt; b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpgtb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pcmpgtw'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] &gt; b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpgtw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_m_pcmpgtd'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpgtd' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='void' name='_mm_empty'>
+	<CPUID>MMX</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures.</description>
+	<instruction name='emms' form=''/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_add_pi8'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := a[i+7:i] + b[i+7:i]
+ENDFOR
+	</operation>
+	<instruction name='paddb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_add_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := a[i+15:i] + b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name='paddw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_add_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='paddd' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_adds_pi8'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name='paddsb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_adds_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+	</operation>
+	<instruction name='paddsw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_adds_pu8'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name='paddusb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_adds_pu16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+	</operation>
+	<instruction name='paddusw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_sub_pi8'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := a[i+7:i] - b[i+7:i]
+ENDFOR
+	</operation>
+	<instruction name='psubb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_sub_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := a[i+15:i] - b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name='psubw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_sub_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='psubd' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_subs_pi8'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+	</operation>
+	<instruction name='psubsb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_subs_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name='psubsw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_subs_pu8'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+	</operation>
+	<instruction name='psubusb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_subs_pu16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])	
+ENDFOR
+	</operation>
+	<instruction name='psubusw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_madd_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	st[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name='pmaddwd' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_mulhi_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+	</operation>
+	<instruction name='pmulhw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_mullo_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[15:0]
+ENDFOR
+	</operation>
+	<instruction name='pmullw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_sll_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psllw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_slli_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psllw' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_sll_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pslld' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_slli_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pslld' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_sll_si64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift 64-bit integer "a" left by "count" while shifting in zeros, and store the result in "dst". </description>
+	<operation>
+IF count[63:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend(a[63:0] &lt;&lt; count[63:0])
+FI
+	</operation>
+	<instruction name='psllq' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_slli_si64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift 64-bit integer "a" left by "imm8" while shifting in zeros, and store the result in "dst". </description>
+	<operation>
+IF imm8[7:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend(a[63:0] &lt;&lt; imm8[7:0])
+FI
+	</operation>
+	<instruction name='psllq' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_sra_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := SignBit
+	ELSE
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psraw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_srai_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := SignBit
+	ELSE
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psraw' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_sra_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := SignBit
+	ELSE
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrad' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_srai_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := SignBit
+	ELSE
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrad' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_srl_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrlw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_srli_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrlw' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_srl_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrld' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_srli_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrld' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_srl_si64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='count' type='__m64'/>
+	<description>Shift 64-bit integer "a" right by "count" while shifting in zeros, and store the result in "dst". </description>
+	<operation>
+IF count[63:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend(a[63:0] &gt;&gt; count[63:0])
+FI
+	</operation>
+	<instruction name='psrlq' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_srli_si64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift 64-bit integer "a" right by "imm8" while shifting in zeros, and store the result in "dst". </description>
+	<operation>
+IF imm8[7:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend(a[63:0] &gt;&gt; imm8[7:0])
+FI
+	</operation>
+	<instruction name='psrlq' form='mm, imm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_and_si64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compute the bitwise AND of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] AND b[63:0])
+	</operation>
+	<instruction name='pand' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_andnot_si64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compute the bitwise NOT of 64 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := ((NOT a[63:0]) AND b[63:0])
+	</operation>
+	<instruction name='pandn' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_or_si64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] OR b[63:0])
+	</operation>
+	<instruction name='por' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_xor_si64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compute the bitwise XOR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] XOR b[63:0])
+	</operation>
+	<instruction name='pxor' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_cmpeq_pi8'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpeqb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_cmpeq_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpeqw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_cmpeq_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpeqd' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_cmpgt_pi8'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] &gt; b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpgtb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_cmpgt_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] &gt; b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpgtw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_cmpgt_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpgtd' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_cvtsi32_si64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='int'/>
+	<description>Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper element of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[63:32] := 0
+	</operation>
+	<instruction name='movd' form='mm, r32'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='int' name='_mm_cvtsi64_si32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m64'/>
+	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+	</operation>
+	<instruction name='movd' form='r32, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__int64' name='_mm_cvtm64_si64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m64'/>
+	<description>Copy 64-bit integer "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name='movq' form='r64, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_cvtsi64_m64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__int64'/>
+	<description>Copy 64-bit integer "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name='movq' form='mm, r64'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_setzero_si64'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<parameter varname='' type='void'/>
+	<description>Return vector of type __m64 with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name='pxor' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_set_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<parameter varname='e1' type='int'/>
+	<parameter varname='e0' type='int'/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_set_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<parameter varname='e3' type='short'/>
+	<parameter varname='e2' type='short'/>
+	<parameter varname='e1' type='short'/>
+	<parameter varname='e0' type='short'/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[15:0] := e0
+dst[31:16] := e1
+dst[47:32] := e2
+dst[63:48] := e3
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_set_pi8'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<parameter varname='e7' type='char'/>
+	<parameter varname='e6' type='char'/>
+	<parameter varname='e5' type='char'/>
+	<parameter varname='e4' type='char'/>
+	<parameter varname='e3' type='char'/>
+	<parameter varname='e2' type='char'/>
+	<parameter varname='e1' type='char'/>
+	<parameter varname='e0' type='char'/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[7:0] := e0
+dst[15:8] := e1
+dst[23:16] := e2
+dst[31:24] := e3
+dst[39:32] := e4
+dst[47:40] := e5
+dst[55:48] := e6
+dst[63:56] := e7
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_set1_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='int'/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_set1_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='short'/>
+	<description>Broadcast 16-bit integer "a" to all all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_set1_pi8'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='char'/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_setr_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<parameter varname='e1' type='int'/>
+	<parameter varname='e0' type='int'/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e1
+dst[63:32] := e0
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_setr_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<parameter varname='e3' type='short'/>
+	<parameter varname='e2' type='short'/>
+	<parameter varname='e1' type='short'/>
+	<parameter varname='e0' type='short'/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[15:0] := e3
+dst[31:16] := e2
+dst[47:32] := e1
+dst[63:48] := e0
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_setr_pi8'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<parameter varname='e7' type='char'/>
+	<parameter varname='e6' type='char'/>
+	<parameter varname='e5' type='char'/>
+	<parameter varname='e4' type='char'/>
+	<parameter varname='e3' type='char'/>
+	<parameter varname='e2' type='char'/>
+	<parameter varname='e1' type='char'/>
+	<parameter varname='e0' type='char'/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[7:0] := e7
+dst[15:8] := e6
+dst[23:16] := e5
+dst[31:24] := e4
+dst[39:32] := e3
+dst[47:40] := e2
+dst[55:48] := e1
+dst[63:56] := e0
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_packs_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".
+	</description>
+	<operation>
+dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
+dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
+dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
+dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
+dst[39:32] := Saturate_Int16_To_Int8 (b[15:0])
+dst[47:40] := Saturate_Int16_To_Int8 (b[31:16])
+dst[55:48] := Saturate_Int16_To_Int8 (b[47:32])
+dst[63:56] := Saturate_Int16_To_Int8 (b[63:48])
+	</operation>
+	<instruction name='packsswb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_packs_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
+dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
+dst[47:32] := Saturate_Int32_To_Int16 (b[31:0])
+dst[63:48] := Saturate_Int32_To_Int16 (b[63:32])
+	</operation>
+	<instruction name='packssdw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_packs_pu16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
+dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
+dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
+dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
+dst[39:32] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
+dst[47:40] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
+dst[55:48] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
+dst[63:56] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
+	</operation>
+	<instruction name='packuswb' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_unpackhi_pi8'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]){
+	dst[7:0] := src1[39:32]
+	dst[15:8] := src2[39:32] 
+	dst[23:16] := src1[47:40]
+	dst[31:24] := src2[47:40]
+	dst[39:32] := src1[55:48]
+	dst[47:40] := src2[55:48]
+	dst[55:48] := src1[63:56]
+	dst[63:56] := src2[63:56]
+	RETURN dst[63:0]
+}	
+
+dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0])
+	</operation>
+	<instruction name='punpckhbw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_unpackhi_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]){
+	dst[15:0] := src1[47:32]
+	dst[31:16] := src2[47:32]
+	dst[47:32] := src1[63:48]
+	dst[63:48] := src2[63:48]
+	RETURN dst[63:0]
+}
+
+dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0])
+	</operation>
+	<instruction name='punpcklbw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_unpackhi_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32]
+dst[63:32] := b[63:32]
+	</operation>
+	<instruction name='punpckhdq' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_unpacklo_pi8'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_BYTES(src1[63:0], src2[63:0]){
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	RETURN dst[63:0]
+}	
+
+dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0])
+	</operation>
+	<instruction name='punpcklbw' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_unpacklo_pi16'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_WORDS(src1[63:0], src2[63:0]){
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	RETURN dst[63:0]
+}	
+
+dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0])
+	</operation>
+	<instruction name='punpcklwd' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech='MMX' rettype='__m64' name='_mm_unpacklo_pi32'>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[63:32] := b[31:0]
+	</operation>
+	<instruction name='punpckldq' form='mm, mm'/>
+	<header>mmintrin.h</header>
+</intrinsic>
+
+<intrinsic tech='SSE' sequence='true' rettype='' name='_MM_TRANSPOSE4_PS'>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='row0' type='__m128' />
+	<parameter varname='row1' type='__m128' />
+	<parameter varname='row2' type='__m128' />
+	<parameter varname='row3' type='__m128' />
+	<description>Macro: Transpose the 4x4 matrix formed by the 4 rows of single-precision (32-bit) floating-point elements in "row0", "row1", "row2", and "row3", and store the transposed matrix in these vectors ("row0" now contains column 0, etc.).</description>
+	<operation>
+__m128 tmp3, tmp2, tmp1, tmp0;
+tmp0 = _mm_unpacklo_ps(row0, row1);
+tmp2 = _mm_unpacklo_ps(row2, row3);
+tmp1 = _mm_unpackhi_ps(row0, row1);
+tmp3 = _mm_unpackhi_ps(row2, row3);
+row0 = _mm_movelh_ps(tmp0, tmp2);
+row1 = _mm_movehl_ps(tmp2, tmp0);
+row2 = _mm_movelh_ps(tmp1, tmp3);
+row3 = _mm_movehl_ps(tmp3, tmp1);
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='unsigned int' name='_mm_getcsr'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void' />
+	<description>Get the unsigned 32-bit value of the MXCSR control and status register.</description>
+	<operation>
+dst[31:0] := MXCSR
+	</operation>
+	<instruction name='stmxcsr' form='MEMd'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='void' name='_mm_setcsr'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<parameter varname='a' type='unsigned int' />
+	<description>Set the MXCSR control and status register with the value in unsigned 32-bit integer "a".</description>
+	<operation>
+MXCSR := a[31:0]
+	</operation>
+	<instruction name='ldmxcsr' form='MEMd'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='unsigned int' name='_MM_GET_EXCEPTION_STATE'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<description>Macro: Get the exception state bits from the MXCSR control and status register. The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT</description>
+	<operation>
+dst[31:0] := MXCSR &amp; _MM_EXCEPT_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='void' name='_MM_SET_EXCEPTION_STATE'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<parameter varname='a' type='unsigned int' />
+	<description>Macro: Set the exception state bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT</description>
+	<operation>
+MXCSR := a[31:0] AND ~_MM_EXCEPT_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='unsigned int' name='_MM_GET_EXCEPTION_MASK'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<description>Macro: Get the exception mask bits from the MXCSR control and status register. The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT</description>
+	<operation>
+dst[31:0] := MXCSR &amp; _MM_MASK_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='void' name='_MM_SET_EXCEPTION_MASK'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<parameter varname='a' type='unsigned int' />
+	<description>Macro: Set the exception mask bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT</description>
+	<operation>
+MXCSR := a[31:0] AND ~_MM_MASK_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='unsigned int' name='_MM_GET_ROUNDING_MODE'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<description>Macro: Get the rounding mode bits from the MXCSR control and status register. The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO</description>
+	<operation>
+dst[31:0] := MXCSR &amp; _MM_ROUND_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='void' name='_MM_SET_ROUNDING_MODE'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<parameter varname='a' type='unsigned int' />
+	<description>Macro: Set the rounding mode bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO</description>
+	<operation>
+MXCSR := a[31:0] AND ~_MM_ROUND_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='unsigned int' name='_MM_GET_FLUSH_ZERO_MODE'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<description>Macro: Get the flush zero bits from the MXCSR control and status register. The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF</description>
+	<operation>
+dst[31:0] := MXCSR &amp; _MM_FLUSH_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='void' name='_MM_SET_FLUSH_ZERO_MODE'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<parameter varname='a' type='unsigned int' />
+	<description>Macro: Set the flush zero bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF</description>
+	<operation>
+MXCSR := a[31:0] AND ~_MM_FLUSH_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='void' name='_mm_prefetch'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<parameter varname='p' type='char const*' />
+	<parameter varname='i' type='int' />
+	<description>Fetch the line of data from memory that contains address "p" to a location in the cache heirarchy specified by the locality hint "i".</description>
+	<instruction name='prefetchnta' form='mprefetch'/>
+	<instruction name='prefetcht0' form='mprefetch'/>
+	<instruction name='prefetcht1' form='mprefetch'/>
+	<instruction name='prefetcht2' form='mprefetch'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm_prefetch">
+	<CPUID>KNCNI</CPUID>
+	<category>General Support</category>
+	<parameter varname='p' type='char const*' />
+	<parameter varname='i' type='int' />
+	<description>Fetch the line of data from memory that contains address "p" to a location in the cache heirarchy specified by the locality hint "i".</description>
+	<instruction name='vprefetch0' form='mprefetch' xed=''/>
+	<instruction name='vprefetch1' form='mprefetch' xed=''/>
+	<instruction name='vprefetch2' form='mprefetch' xed=''/>
+	<instruction name='vprefetchnta' form='mprefetch' xed=''/>
+	<instruction name='vprefetche0' form='mprefetch' xed=''/>
+	<instruction name='vprefetche1' form='mprefetch' xed=''/>
+	<instruction name='vprefetche2' form='mprefetch' xed=''/>
+	<instruction name='vprefetchenta' form='mprefetch' xed=''/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="void" name="_mm_prefetch">
+	<CPUID>PREFETCHWT1</CPUID>
+	<category>General Support</category>
+	<parameter varname='p' type='char const*' />
+	<parameter varname='i' type='int' />
+	<description>Fetch the line of data from memory that contains address "p" to a location in the cache heirarchy specified by the locality hint "i".</description>
+	<instruction name='prefetchwt1' form='mprefetch' xed=''/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='void' name='_mm_sfence'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void' />
+	<description>Perform a serializing operation on all store-to-memory instructions that were issued prior to this instruction. Guarantees that every store instruction that precedes, in program order, is globally visible before any store instruction which follows the fence in program order.</description>
+	<instruction name='sfence' form=''/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_max_pi16'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF a[i+15:i] &gt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pmaxsw' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_m_pmaxsw'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF a[i+15:i] &gt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pmaxsw' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_max_pu8'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	IF a[i+7:i] &gt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pmaxub' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_m_pmaxub'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	IF a[i+7:i] &gt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pmaxub' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_min_pi16'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF a[i+15:i] &lt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pminsw' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_m_pminsw'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF a[i+15:i] &lt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pminsw' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_min_pu8'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	IF a[i+7:i] &lt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pminub' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_m_pminub'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	IF a[i+7:i] &lt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pminub' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_mulhi_pu16'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+	</operation>
+	<instruction name='pmulhuw' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_m_pmulhuw'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+	</operation>
+	<instruction name='pmulhuw' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_avg_pu8'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+ENDFOR
+	</operation>
+	<instruction name='pavgb' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_m_pavgb'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+ENDFOR
+	</operation>
+	<instruction name='pavgb' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_avg_pu16'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+ENDFOR
+	</operation>
+	<instruction name='pavgw' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_m_pavgw'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+ENDFOR
+	</operation>
+	<instruction name='pavgw' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_sad_pu8'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
+ENDFOR
+
+dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + tmp[47:40] + tmp[55:48] + tmp[63:56]
+dst[63:16] := 0
+	</operation>
+	<instruction name='psadbw' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_m_psadbw'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
+ENDFOR
+
+dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + tmp[47:40] + tmp[55:48] + tmp[63:56]
+dst[63:16] := 0
+	</operation>
+	<instruction name='psadbw' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cvtsi32_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='int' />
+	<description>Convert the 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cvtsi2ss' form='xmm, r32'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cvt_si2ss'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='int' />
+	<description>Convert the 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cvtsi2ss' form='xmm, r32'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m128' name='_mm_cvtsi64_ss'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__int64' />
+	<description>Convert the 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='cvtsi2ss' form='xmm, r64'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m128' name='_mm_cvtpi32_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m64' />
+	<description>Convert packed 32-bit integers in "b" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", and copy the upper 2 packed elements from "a" to the upper elements of "dst". </description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[63:32] := Convert_Int32_To_FP32(b[63:32])
+dst[95:64] := a[95:64]
+dst[127:96] := a[127:96]
+	</operation>
+	<instruction name='cvtpi2ps' form='xmm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m128' name='_mm_cvt_pi2ps'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m64' />
+	<description>Convert packed 32-bit integers in "b" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", and copy the upper 2 packed elements from "a" to the upper elements of "dst". </description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[63:32] := Convert_Int32_To_FP32(b[63:32])
+dst[95:64] := a[95:64]
+dst[127:96] := a[127:96]
+	</operation>
+	<instruction name='cvtpi2ps' form='xmm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_cvtpi16_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m64' />
+	<description>Convert packed 16-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	m := j*32
+	dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i])
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_cvtpu16_ps'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m64' />
+	<description>Convert packed unsigned 16-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	m := j*32
+	dst[m+31:m] := Convert_UnsignedInt16_To_FP32(a[i+15:i])
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_cvtpi8_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m64' />
+	<description>Convert the lower packed 8-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*8
+	m := j*32
+	dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i])
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_cvtpu8_ps'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m64' />
+	<description>Convert the lower packed unsigned 8-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*8
+	m := j*32
+	dst[m+31:m] := Convert_UnsignedInt8_To_FP32(a[i+7:i])
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_cvtpi32x2_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='b' type='__m64' />
+	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", then covert the packed 32-bit integers in "a" to single-precision (32-bit) floating-point element, and store the results in the upper 2 elements of "dst". </description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(a[31:0])
+dst[63:32] := Convert_Int32_To_FP32(a[63:32])
+dst[95:64] := Convert_Int32_To_FP32(b[31:0])
+dst[127:96] := Convert_Int32_To_FP32(b[63:32])
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='void' name='_mm_stream_pi'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='__m64*' />
+	<parameter varname='a' type='__m64' />
+	<description>Store 64-bits of integer data from "a" into memory using a non-temporal memory hint.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+	</operation>
+	<instruction name='movntq' form='m64, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='void' name='_mm_maskmove_si64'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='mask' type='__m64' />
+	<parameter varname='mem_addr' type='char*' />
+	<description>Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	IF mask[i+7]
+		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='maskmovq' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='void' name='_m_maskmovq'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='mask' type='__m64' />
+	<parameter varname='mem_addr' type='char*' />
+	<description>Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	IF mask[i+7]
+		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='maskmovq' form='mm, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='int' name='_mm_extract_pi16'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname="imm8" type='int' />
+	<description>Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst".</description>
+	<operation>
+dst[15:0] := (a[63:0] &gt;&gt; (imm8[1:0] * 16))[15:0]
+dst[31:16] := 0
+	</operation>
+	<instruction name='pextrw' form='r32, mm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='int' name='_m_pextrw'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname="imm8" type='int' />
+	<description>Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst".</description>
+	<operation>
+dst[15:0] := (a[63:0] &gt;&gt; (imm8[1:0] * 16))[15:0]
+dst[31:16] := 0
+	</operation>
+	<instruction name='pextrw' form='r32, mm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_insert_pi16'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='i' type='int' />
+	<parameter varname="imm8" type='int' />
+	<description>Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". </description>
+	<operation>
+dst[63:0] := a[63:0]
+sel := imm8[1:0]*16
+dst[sel+15:sel] := i[15:0]
+	</operation>
+	<instruction name='pinsrw' form='xmm, r32, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_m_pinsrw'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname='i' type='int' />
+	<parameter varname="imm8" type='int' />
+	<description>Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". </description>
+	<operation>
+dst[63:0] := a[63:0]
+sel := imm8[1:0]*16
+dst[sel+15:sel] := i[15:0]
+	</operation>
+	<instruction name='pinsrw' form='mm, r32, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='int' name='_mm_movemask_pi8'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m64' />
+	<description>Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[j] := a[i+7]
+ENDFOR
+dst[MAX:8] := 0
+	</operation>
+	<instruction name='pmovmskb' form='r32, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='int' name='_m_pmovmskb'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m64' />
+	<description>Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[j] := a[i+7]
+ENDFOR
+dst[MAX:8] := 0
+	</operation>
+	<instruction name='pmovmskb' form='r32, mm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_shuffle_pi16'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname="imm8" type='int' />
+	<description>Shuffle 16-bit integers in "a" using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[15:0] := src[15:0]
+	1:	tmp[15:0] := src[31:16]
+	2:	tmp[15:0] := src[47:32]
+	3:	tmp[15:0] := src[63:48]
+	ESAC
+	RETURN tmp[15:0]
+}
+
+dst[15:0] := SELECT4(a[63:0], imm8[1:0])
+dst[31:16] := SELECT4(a[63:0], imm8[3:2])
+dst[47:32] := SELECT4(a[63:0], imm8[5:4])
+dst[63:48] := SELECT4(a[63:0], imm8[7:6])
+	</operation>
+	<instruction name='pshufw' form='mm, mm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_m_pshufw'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64' />
+	<parameter varname="imm8" type='int' />
+	<description>Shuffle 16-bit integers in "a" using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[15:0] := src[15:0]
+	1:	tmp[15:0] := src[31:16]
+	2:	tmp[15:0] := src[47:32]
+	3:	tmp[15:0] := src[63:48]
+	ESAC
+	RETURN tmp[15:0]
+}
+
+dst[15:0] := SELECT4(a[63:0], imm8[1:0])
+dst[31:16] := SELECT4(a[63:0], imm8[3:2])
+dst[47:32] := SELECT4(a[63:0], imm8[5:4])
+dst[63:48] := SELECT4(a[63:0], imm8[7:6])
+	</operation>
+	<instruction name='pshufw' form='mm, mm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_add_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". </description>
+	<operation>
+dst[31:0] := a[31:0] + b[31:0]
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='addss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_add_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='addps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_sub_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] - b[31:0]
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='subss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_sub_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='subps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_mul_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] * b[31:0]
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='mulss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_mul_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='mulps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_div_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". </description>
+	<operation>
+dst[31:0] := a[31:0] / b[31:0]
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='divss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_div_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := a[i+31:i] / b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='divps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_sqrt_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128' />
+	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := SQRT(a[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='sqrtss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_sqrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128' />
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='sqrtps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_rcp_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128' />
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
+	<operation>
+dst[31:0] := APPROXIMATE(1.0/a[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='rcpss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_rcp_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128' />
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='rcpps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_rsqrt_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128' />
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
+	<operation>
+dst[31:0] := APPROXIMATE(1.0 / SQRT(a[31:0]))
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='rsqrtss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_rsqrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128' />
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
+ENDFOR
+	</operation>
+	<instruction name='rsqrtps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_min_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[31:0] := MIN(a[31:0], b[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='minss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_min_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='minps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_max_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[31:0] := MAX(a[31:0], b[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='maxss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_max_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='maxps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_and_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='andps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_andnot_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='andnps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_or_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='orps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_xor_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='xorps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpeq_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for equality, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] == b[31:0] ) ? 0xffffffff : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cmpss' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpeq_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xffffffff : 0
+ENDFOR
+	</operation>
+	<instruction name='cmpps' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmplt_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for less-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] &lt; b[31:0] ) ? 0xffffffff : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cmpss' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmplt_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &lt; b[i+31:i] ) ? 0xffffffff : 0
+ENDFOR
+	</operation>
+	<instruction name='cmpps' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmple_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] &lt;= b[31:0] ) ? 0xffffffff : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cmpss' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmple_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 0xffffffff : 0
+ENDFOR
+	</operation>
+	<instruction name='cmpps' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpgt_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for greater-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] &gt; b[31:0] ) ? 0xffffffff : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cmpss' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpgt_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xffffffff : 0
+ENDFOR
+	</operation>
+	<instruction name='cmpps' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpge_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for greater-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] &gt;= b[31:0] ) ? 0xffffffff : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cmpss' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpge_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 0xffffffff : 0
+ENDFOR
+	</operation>
+	<instruction name='cmpps' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpneq_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] != b[31:0] ) ? 0xffffffff : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cmpss' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpneq_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] != b[i+31:i] ) ? 0xffffffff : 0
+ENDFOR
+	</operation>
+	<instruction name='cmpps' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpnlt_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := !( a[31:0] &lt; b[31:0] ) ? 0xffffffff : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cmpss' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpnlt_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := !( a[i+31:i] &lt; b[i+31:i] ) ? 0xffffffff : 0
+ENDFOR
+	</operation>
+	<instruction name='cmpps' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpnle_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := !( a[31:0] &lt;= b[31:0] ) ? 0xffffffff : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cmpss' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpnle_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := !( a[i+31:i] &lt;= b[i+31:i] ) ? 0xffffffff : 0
+ENDFOR
+	</operation>
+	<instruction name='cmpps' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpngt_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := !( a[31:0] &gt; b[31:0] ) ? 0xffffffff : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cmpss' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpngt_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := !( a[i+31:i] &gt; b[i+31:i] ) ? 0xffffffff : 0
+ENDFOR
+	</operation>
+	<instruction name='cmpps' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpnge_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := !( a[31:0] &gt;= b[31:0] ) ? 0xffffffff : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cmpss' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpnge_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := !( a[i+31:i] &gt;= b[i+31:i] ) ? 0xffffffff : 0
+ENDFOR
+	</operation>
+	<instruction name='cmpps' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpord_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] != NaN AND b[31:0] != NaN ) ? 0xffffffff : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cmpss' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpord_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] != NaN AND b[i+31:i] != NaN ) ? 0xffffffff : 0
+ENDFOR
+	</operation>
+	<instruction name='cmpps' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpunord_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] == NaN OR b[31:0] == NaN ) ? 0xffffffff : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='cmpss' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpunord_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] == NaN OR b[i+31:i] == NaN ) ? 0xffffffff : 0
+ENDFOR
+	</operation>
+	<instruction name='cmpps' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='int' name='_mm_comieq_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[31:0] == b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name='comiss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='int' name='_mm_comilt_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[31:0] &lt; b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name='comiss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='int' name='_mm_comile_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[31:0] &lt;= b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name='comiss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='int' name='_mm_comigt_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[31:0] &gt; b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name='comiss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='int' name='_mm_comige_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[31:0] &gt;= b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name='comiss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='int' name='_mm_comineq_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[31:0] != b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name='comiss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomieq_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[31:0] == b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name='ucomiss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomilt_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[31:0] &lt; b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name='ucomiss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomile_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[31:0] &lt;= b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name='ucomiss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomigt_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[31:0] &gt; b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name='ucomiss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomige_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[31:0] &gt;= b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name='ucomiss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomineq_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[31:0] != b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name='ucomiss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvtss_si32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32(a[31:0])
+	</operation>
+	<instruction name='cvtss2si' form='r32, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvt_ss2si'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32(a[31:0])
+	</operation>
+	<instruction name='cvtss2si' form='r32, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__int64' name='_mm_cvtss_si64'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64(a[31:0])
+	</operation>
+	<instruction name='cvtss2si' form='r64, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='float' name='_mm_cvtss_f32'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<description>Copy the lower single-precision (32-bit) floating-point element of "a" to "dst".</description>
+	<operation>dst[31:0] := a[31:0]</operation>
+	<instruction name='movss' form='m32, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_cvtps_pi32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='cvtps2pi' form='mm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_cvt_ps2pi'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='cvtps2pi' form='mm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvttss_si32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
+	</operation>
+	<instruction name='cvttss2si' form='r32, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvtt_ss2si'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
+	</operation>
+	<instruction name='cvttss2si' form='r32, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__int64' name='_mm_cvttss_si64'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int32_Truncate(a[31:0])
+	</operation>
+	<instruction name='cvttss2si' form='r64, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_cvttps_pi32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='cvttps2pi' form='mm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m64' name='_mm_cvtt_ps2pi'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='cvttps2pi' form='mm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m64' name='_mm_cvtps_pi16'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 16*j
+	k := 32*j
+	dst[i+15:i] := Convert_FP32_To_Int16(a[k+31:k])
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m64' name='_mm_cvtps_pi8'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128' />
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 8-bit integers, and store the results in lower 4 elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 8*j
+	k := 32*j
+	dst[i+7:i] := Convert_FP32_To_Int8(a[k+31:k])
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_set_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='float' />
+	<description>Copy single-precision (32-bit) floating-point element "a" to the lower element of "dst", and zero the upper 3 elements.</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[127:32] := 0
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_set1_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='float' />
+	<description>Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_set_ps1'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='float' />
+	<description>Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_set_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Set</category>
+	<parameter varname='e3' type='float'/>
+	<parameter varname='e2' type='float'/>
+	<parameter varname='e1' type='float'/>
+	<parameter varname='e0' type='float'/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+dst[95:64] := e2
+dst[127:96] := e3
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_setr_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Set</category>
+	<parameter varname='e3' type='float'/>
+	<parameter varname='e2' type='float'/>
+	<parameter varname='e1' type='float'/>
+	<parameter varname='e0' type='float'/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e3
+dst[63:32] := e2
+dst[95:64] := e1
+dst[127:96] := e0
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m128' name='_mm_setzero_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Set</category>
+	<parameter varname='' type='void' />
+	<description>Return vector of type __m128 with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name='xorps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_loadh_pi'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='mem_addr' type='__m64 const*' />
+	<description>Load 2 single-precision (32-bit) floating-point elements from memory into the upper 2 elements of "dst", and copy the lower 2 elements from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[63:32] := a[63:32]
+dst[95:64] := MEM[mem_addr+31:mem_addr]
+dst[127:96] := MEM[mem_addr+63:mem_addr+32]
+	</operation>
+	<instruction name='movhps' form='xmm, m64'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_loadl_pi'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='mem_addr' type='__m64 const*' />
+	<description>Load 2 single-precision (32-bit) floating-point elements from memory into the lower 2 elements of "dst", and copy the upper 2 elements from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[31:0] := MEM[mem_addr+31:mem_addr]
+dst[63:32] := MEM[mem_addr+63:mem_addr+32]
+dst[95:64] := a[95:64]
+dst[127:96] := a[127:96]
+	</operation>
+	<instruction name='movlps' form='xmm, m64'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_load_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='float const*' />
+	<description>Load a single-precision (32-bit) floating-point element from memory into the lower of "dst", and zero the upper 3 elements. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[31:0] := MEM[mem_addr+31:mem_addr]
+dst[127:32] := 0
+	</operation>
+	<instruction name='movss' form='xmm, m32'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_load1_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='float const*' />
+	<description>Load a single-precision (32-bit) floating-point element from memory into all elements of "dst".</description>
+	<operation>
+dst[31:0] := MEM[mem_addr+31:mem_addr]
+dst[63:32] := MEM[mem_addr+31:mem_addr]
+dst[95:64] := MEM[mem_addr+31:mem_addr]
+dst[127:96] := MEM[mem_addr+31:mem_addr]
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_load_ps1'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='float const*' />
+	<description>Load a single-precision (32-bit) floating-point element from memory into all elements of "dst".</description>
+	<operation>
+dst[31:0] := MEM[mem_addr+31:mem_addr]
+dst[63:32] := MEM[mem_addr+31:mem_addr]
+dst[95:64] := MEM[mem_addr+31:mem_addr]
+dst[127:96] := MEM[mem_addr+31:mem_addr]
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_load_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='float const*' />
+	<description>Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into "dst".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name='movaps' form='xmm, m128'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_loadu_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='float const*' />
+	<description>Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name='movups' form='xmm, m128'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_loadr_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='float const*' />
+	<description>Load 4 single-precision (32-bit) floating-point elements from memory into "dst" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[31:0] := MEM[mem_addr+127:mem_addr+96]
+dst[63:32] := MEM[mem_addr+95:mem_addr+64]
+dst[95:64] := MEM[mem_addr+63:mem_addr+32]
+dst[127:96] := MEM[mem_addr+31:mem_addr]
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_stream_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='float*' />
+	<parameter varname='a' type='__m128' />
+		<description>Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name='movntps' form='m128, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storeh_pi'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='__m64*' />
+	<parameter varname='a' type='__m128' />
+	<description>Store the upper 2 single-precision (32-bit) floating-point elements from "a" into memory.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[95:64]
+MEM[mem_addr+63:mem_addr+32] := a[127:96]
+	</operation>
+	<instruction name='movhps' form='m64, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storel_pi'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='__m64*' />
+	<parameter varname='a' type='__m128' />
+	<description>Store the lower 2 single-precision (32-bit) floating-point elements from "a" into memory.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[31:0]
+MEM[mem_addr+63:mem_addr+32] := a[63:32]
+	</operation>
+	<instruction name='movlps' form='m64, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_store_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='float*' />
+	<parameter varname='a' type='__m128' />
+	<description>Store the lower single-precision (32-bit) floating-point element from "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[31:0]
+	</operation>
+	<instruction name='movss' form='m32, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='void' name='_mm_store1_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='float*' />
+	<parameter varname='a' type='__m128' />
+	<description>Store the lower single-precision (32-bit) floating-point element from "a" into 4 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[31:0]
+MEM[mem_addr+63:mem_addr+32] := a[31:0]
+MEM[mem_addr+95:mem_addr+64] := a[31:0]
+MEM[mem_addr+127:mem_addr+96] := a[31:0]
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='void' name='_mm_store_ps1'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='float*' />
+	<parameter varname='a' type='__m128' />
+	<description>Store the lower single-precision (32-bit) floating-point element from "a" into 4 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[31:0]
+MEM[mem_addr+63:mem_addr+32] := a[31:0]
+MEM[mem_addr+95:mem_addr+64] := a[31:0]
+MEM[mem_addr+127:mem_addr+96] := a[31:0]
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='void' name='_mm_store_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='float*' />
+	<parameter varname='a' type='__m128' />
+	<description>Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name='movaps' form='m128, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storeu_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='float*' />
+	<parameter varname='a' type='__m128' />
+	<description>Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name='movups' form='m128, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' sequence='true' rettype='void' name='_mm_storer_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='float*' />
+	<parameter varname='a' type='__m128' />
+	<description>Store 4 single-precision (32-bit) floating-point elements from "a" into memory in reverse order.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[127:96]
+MEM[mem_addr+63:mem_addr+32] := a[95:64]
+MEM[mem_addr+95:mem_addr+64] := a[63:32]
+MEM[mem_addr+127:mem_addr+96] := a[31:0]
+	</operation>
+	<instruction name='movups' form='m128, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_move_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Move</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst", and copy the upper 3 elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := b[31:0]
+dst[63:32] := a[63:32]
+dst[95:64] := a[95:64]
+dst[127:96] := a[127:96]
+	</operation>
+	<instruction name='movss' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_shuffle_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<parameter varname="imm8" type='unsigned int' />
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+	</operation>
+	<instruction name='shufps' form='xmm, xmm, imm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_unpackhi_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name='unpckhps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_unpacklo_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name='unpcklps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_movehl_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Move</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Move the upper 2 single-precision (32-bit) floating-point elements from "b" to the lower 2 elements of "dst", and copy the upper 2 elements from "a" to the upper 2 elements of "dst".</description>
+	<operation>
+dst[31:0] := b[95:64]
+dst[63:32] := b[127:96]
+dst[95:64] := a[95:64]
+dst[127:96] := a[127:96]
+	</operation>
+	<instruction name='movhlps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_movelh_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Move</category>
+	<parameter varname='a' type='__m128' />
+	<parameter varname='b' type='__m128' />
+	<description>Move the lower 2 single-precision (32-bit) floating-point elements from "b" to the upper 2 elements of "dst", and copy the lower 2 elements from "a" to the lower 2 elements of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[63:32] := a[63:32]
+dst[95:64] := b[31:0]
+dst[127:96] := b[63:32]
+	</operation>
+	<instruction name='movlhps' form='xmm, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_movemask_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128' />
+	<description>Set each bit of mask "dst" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in "a".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF a[i+31]
+		dst[j] := 1
+	ELSE
+		dst[j] := 0
+	FI
+ENDFOR
+dst[MAX:4] := 0
+	</operation>
+	<instruction name='movmskps' form='r32, xmm'/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='void*' name='_mm_malloc'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<parameter varname='size' type='size_t'/>
+	<parameter varname='align' type='size_t'/>
+	<description>Allocate "size" bytes of memory, aligned to the alignment specified in "align", and return a pointer to the allocated memory. "_mm_free" should be used to free memory that is allocated with "_mm_malloc".</description>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='void' name='_mm_free'>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<description>Free aligned memory that was allocated with "_mm_malloc".</description>
+	<header>xmmintrin.h</header>
+</intrinsic>
+
+<intrinsic tech='SSE2' rettype='void' name='_mm_pause'>
+	<CPUID>SSE2</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Provide a hint to the processor that the code sequence is a spin-wait loop. This can help improve the performance and power consumption of spin-wait loops.</description>
+	<instruction name='pause' form=''/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='void' name='_mm_clflush'>
+	<CPUID>SSE2</CPUID>
+	<category>General Support</category>
+	<parameter varname='p' type='void const*'/>
+	<description>Invalidate and flush the cache line that contains "p" from all levels of the cache hierarchy.</description>
+	<instruction name='clflush' form='mprefetch'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='void' name='_mm_lfence'>
+	<CPUID>SSE2</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Perform a serializing operation on all load-from-memory instructions that were issued prior to this instruction. Guarantees that every load instruction that precedes, in program order, is globally visible before any load instruction which follows the fence in program order.</description>
+	<instruction name='lfence' form=''/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='void' name='_mm_mfence'>
+	<CPUID>SSE2</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Perform a serializing operation on all load-from-memory and store-to-memory instructions that were issued prior to this instruction. Guarantees that every memory access that precedes, in program order, the memory fence instruction is globally visible before any memory instruction which follows the fence in program order.</description>
+	<instruction name='mfence' form=''/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_add_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := a[i+7:i] + b[i+7:i]
+ENDFOR
+	</operation>
+	<instruction name='paddb' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_add_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := a[i+15:i] + b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name='paddw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_add_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='paddd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m64' name='_mm_add_si64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Add 64-bit integers "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] + b[63:0]
+	</operation>
+	<instruction name='paddq' form='mm, mm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_add_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name='paddq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_adds_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name='paddsb' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_adds_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+	</operation>
+	<instruction name='paddsw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_adds_epu8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name='paddusb' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_adds_epu16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+	</operation>
+	<instruction name='paddusw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_avg_epu8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+ENDFOR
+	</operation>
+	<instruction name='pavgb' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_avg_epu16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+ENDFOR
+	</operation>
+	<instruction name='pavgw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_madd_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name='pmaddwd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_max_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF a[i+15:i] &gt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pmaxsw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_max_epu8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF a[i+7:i] &gt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pmaxub' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_min_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF a[i+15:i] &lt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pminsw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_min_epu8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF a[i+7:i] &lt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pminub' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_mulhi_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+	</operation>
+	<instruction name='pmulhw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_mulhi_epu16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+	</operation>
+	<instruction name='pmulhuw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_mullo_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[15:0]
+ENDFOR
+	</operation>
+	<instruction name='pmullw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m64' name='_mm_mul_su32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Multiply the low unsigned 32-bit integers from "a" and "b", and store the unsigned 64-bit result in "dst". </description>
+	<operation>
+dst[63:0] := a[31:0] * b[31:0]
+	</operation>
+	<instruction name='pmuludq' form='mm, mm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_mul_epu32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='pmuludq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sad_epu8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce two unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
+ENDFOR
+FOR j := 0 to 1
+	i := j*64
+	dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] +
+	               tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56]
+	dst[i+63:i+16] := 0
+ENDFOR
+	</operation>
+	<instruction name='psadbw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sub_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := a[i+7:i] - b[i+7:i]
+ENDFOR
+	</operation>
+	<instruction name='psubb' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sub_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := a[i+15:i] - b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name='psubw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sub_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='psubd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m64' name='_mm_sub_si64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Subtract 64-bit integer "b" from 64-bit integer "a", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] - b[63:0]
+	</operation>
+	<instruction name='psubq' form='mm, mm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sub_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name='psubq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_subs_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+	</operation>
+	<instruction name='psubsb' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_subs_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name='psubsw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_subs_epu8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+	</operation>
+	<instruction name='psubusb' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_subs_epu16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])	
+ENDFOR
+	</operation>
+	<instruction name='psubusw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_slli_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
+	</operation>
+	<instruction name='pslldq' form='xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_bslli_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
+	</operation>
+	<instruction name='pslldq' form='xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_bsrli_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
+	</operation>
+	<instruction name='psrldq' form='xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_slli_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psllw' form='xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sll_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psllw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_slli_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pslld' form='xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sll_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pslld' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_slli_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psllq' form='xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sll_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psllq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srai_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := SignBit
+	ELSE
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psraw' form='xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sra_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := SignBit
+	ELSE
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psraw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srai_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := SignBit
+	ELSE
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrad' form='xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sra_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := SignBit
+	ELSE
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrad' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srli_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
+	</operation>
+	<instruction name='psrldq' form='xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srli_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrlw' form='xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srl_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrlw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srli_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrld' form='xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srl_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrld' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srli_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrlq' form='xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srl_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psrlq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_and_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[127:0] := (a[127:0] AND b[127:0])
+	</operation>
+	<instruction name='pand' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_andnot_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compute the bitwise NOT of 128 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
+	<operation>
+dst[127:0] := ((NOT a[127:0]) AND b[127:0])
+	</operation>
+	<instruction name='pandn' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_or_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compute the bitwise OR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[127:0] := (a[127:0] OR b[127:0])
+	</operation>
+	<instruction name='por' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_xor_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compute the bitwise XOR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[127:0] := (a[127:0] XOR b[127:0])
+	</operation>
+	<instruction name='pxor' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpeq_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpeqb' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpeq_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpeqw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpeq_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpeqd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpgt_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] &gt; b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpgtb' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpgt_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] &gt; b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpgtw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpgt_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpgtd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmplt_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtb instruction with the order of the operands switched.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] &lt; b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpgtb' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmplt_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtw instruction with the order of the operands switched.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] &lt; b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpgtw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmplt_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtd instruction with the order of the operands switched.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &lt; b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpgtd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cvtepi32_pd'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='cvtdq2pd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cvtsi32_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='int'/>
+	<description>Convert the 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
+	<operation>
+dst[63:0] := Convert_Int32_To_FP64(b[31:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='cvtsi2sd' form='xmm, r32'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cvtsi64_sd'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__int64'/>
+	<description>Convert the 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
+	<operation>
+dst[63:0] := Convert_Int64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='cvtsi2sd' form='xmm, r64'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cvtsi64x_sd'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__int64'/>
+	<description>Convert the 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
+	<operation>
+dst[63:0] := Convert_Int64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='cvtsi2sd' form='xmm, r64'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128' name='_mm_cvtepi32_ps'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='cvtdq2ps' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m128d' name='_mm_cvtpi32_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m64'/>
+	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='cvtpi2pd' form='xmm, mm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvtsi32_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='int'/>
+	<description>Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[127:32] := 0
+	</operation>
+	<instruction name='movd' form='xmm, r32'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvtsi64_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__int64'/>
+	<description>Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element.</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := 0
+	</operation>
+	<instruction name='movq' form='xmm, r64'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvtsi64x_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__int64'/>
+	<description>Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element.</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := 0
+	</operation>
+	<instruction name='movq' form='xmm, r64'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvtsi128_si32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+	</operation>
+	<instruction name='movd' form='r32, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_cvtsi128_si64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Copy the lower 64-bit integer in "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name='movq' form='r64, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_cvtsi128_si64x'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Copy the lower 64-bit integer in "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name='movq' form='r64, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='e1' type='__m64'/>
+	<parameter varname='e0' type='__m64'/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set_epi64x'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='e1' type='__int64'/>
+	<parameter varname='e0' type='__int64'/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='e3' type='int'/>
+	<parameter varname='e2' type='int'/>
+	<parameter varname='e1' type='int'/>
+	<parameter varname='e0' type='int'/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+dst[95:64] := e2
+dst[127:96] := e3
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='e7' type='short'/>
+	<parameter varname='e6' type='short'/>
+	<parameter varname='e5' type='short'/>
+	<parameter varname='e4' type='short'/>
+	<parameter varname='e3' type='short'/>
+	<parameter varname='e2' type='short'/>
+	<parameter varname='e1' type='short'/>
+	<parameter varname='e0' type='short'/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[15:0] := e0
+dst[31:16] := e1
+dst[47:32] := e2
+dst[63:48] := e3
+dst[79:64] := e4
+dst[95:80] := e5
+dst[111:96] := e6
+dst[127:112] := e7
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='e15' type='char'/>
+	<parameter varname='e14' type='char'/>
+	<parameter varname='e13' type='char'/>
+	<parameter varname='e12' type='char'/>
+	<parameter varname='e11' type='char'/>
+	<parameter varname='e10' type='char'/>
+	<parameter varname='e9' type='char'/>
+	<parameter varname='e8' type='char'/>
+	<parameter varname='e7' type='char'/>
+	<parameter varname='e6' type='char'/>
+	<parameter varname='e5' type='char'/>
+	<parameter varname='e4' type='char'/>
+	<parameter varname='e3' type='char'/>
+	<parameter varname='e2' type='char'/>
+	<parameter varname='e1' type='char'/>
+	<parameter varname='e0' type='char'/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[7:0] := e0
+dst[15:8] := e1
+dst[23:16] := e2
+dst[31:24] := e3
+dst[39:32] := e4
+dst[47:40] := e5
+dst[55:48] := e6
+dst[63:56] := e7
+dst[71:64] := e8
+dst[79:72] := e9
+dst[87:80] := e10
+dst[95:88] := e11
+dst[103:96] := e12
+dst[111:104] := e13
+dst[119:112] := e14
+dst[127:120] := e15
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set1_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='__m64'/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set1_epi64x'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='__int64'/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastq".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set1_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='int'/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastd".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set1_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='short'/>
+	<description>Broadcast 16-bit integer "a" to all all elements of "dst". This intrinsic may generate "vpbroadcastw".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set1_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='char'/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastb".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_setr_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='e1' type='__m64'/>
+	<parameter varname='e0' type='__m64'/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[63:0] := e1
+dst[127:64] := e0
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_setr_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='e3' type='int'/>
+	<parameter varname='e2' type='int'/>
+	<parameter varname='e1' type='int'/>
+	<parameter varname='e0' type='int'/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e3
+dst[63:32] := e2
+dst[95:64] := e1
+dst[127:96] := e0
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_setr_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='e7' type='short'/>
+	<parameter varname='e6' type='short'/>
+	<parameter varname='e5' type='short'/>
+	<parameter varname='e4' type='short'/>
+	<parameter varname='e3' type='short'/>
+	<parameter varname='e2' type='short'/>
+	<parameter varname='e1' type='short'/>
+	<parameter varname='e0' type='short'/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[15:0] := e7
+dst[31:16] := e6
+dst[47:32] := e5
+dst[63:48] := e4
+dst[79:64] := e3
+dst[95:80] := e2
+dst[111:96] := e1
+dst[127:112] := e0
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_setr_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='e15' type='char'/>
+	<parameter varname='e14' type='char'/>
+	<parameter varname='e13' type='char'/>
+	<parameter varname='e12' type='char'/>
+	<parameter varname='e11' type='char'/>
+	<parameter varname='e10' type='char'/>
+	<parameter varname='e9' type='char'/>
+	<parameter varname='e8' type='char'/>
+	<parameter varname='e7' type='char'/>
+	<parameter varname='e6' type='char'/>
+	<parameter varname='e5' type='char'/>
+	<parameter varname='e4' type='char'/>
+	<parameter varname='e3' type='char'/>
+	<parameter varname='e2' type='char'/>
+	<parameter varname='e1' type='char'/>
+	<parameter varname='e0' type='char'/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[7:0] := e15
+dst[15:8] := e14
+dst[23:16] := e13
+dst[31:24] := e12
+dst[39:32] := e11
+dst[47:40] := e10
+dst[55:48] := e9
+dst[63:56] := e8
+dst[71:64] := e7
+dst[79:72] := e6
+dst[87:80] := e5
+dst[95:88] := e4
+dst[103:96] := e3
+dst[111:104] := e2
+dst[119:112] := e1
+dst[127:120] := e0
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m128i' name='_mm_setzero_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<description>Return vector of type __m128i with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name='pxor' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m128i' name='_mm_loadl_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='__m128i const*'/>
+	<description>Load 64-bit integer from memory into the first element of "dst".</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+63:mem_addr]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='movq' form='xmm, m64'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_load_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='__m128i const*'/>
+	<description>Load 128-bits of integer data from memory into "dst". 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name='movdqa' form='xmm, m128'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_loadu_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='__m128i const*'/>
+	<description>Load 128-bits of integer data from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name='movdqu' form='xmm, m128'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='void' name='_mm_maskmoveu_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='mask' type='__m128i'/>
+	<parameter varname='mem_addr' type='char*'/>
+	<description>Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF mask[i+7]
+		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='maskmovdqu' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_store_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='__m128i*'/>
+	<parameter varname='a' type='__m128i'/>
+	<description>Store 128-bits of integer data from "a" into memory. 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name='movdqa' form='m128, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storeu_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='__m128i*'/>
+	<parameter varname='a' type='__m128i'/>
+	<description>Store 128-bits of integer data from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name='movdqu' form='m128, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='void' name='_mm_storel_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='__m128i*'/>
+	<parameter varname='a' type='__m128i'/>
+	<description>Store 64-bit integer from the first element of "a" into memory.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+	</operation>
+	<instruction name='movq' form='m64, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_stream_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='__m128i*'/>
+	<parameter varname='a' type='__m128i'/>
+	<description>Store 128-bits of integer data from "a" into memory using a non-temporal memory hint. 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name='movntdq' form='m128, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='void' name='_mm_stream_si32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='int*'/>
+	<parameter varname='a' type='int'/>
+	<description>Store 32-bit integer "a" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address "mem_addr" is already in the cache, the cache will be updated.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[31:0]
+	</operation>
+	<instruction name='movnti' form='m32, r32'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='void' name='_mm_stream_si64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='__int64*'/>
+	<parameter varname='a' type='__int64'/>
+	<description>Store 64-bit integer "a" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address "mem_addr" is already in the cache, the cache will be updated.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+	</operation>
+	<instruction name='movnti' form='m64, r64'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m64' name='_mm_movepi64_pi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Copy the lower 64-bit integer in "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name='movdq2q' form='mm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m128i' name='_mm_movpi64_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Move</category>
+	<parameter varname='a' type='__m64'/>
+	<description>Copy the 64-bit integer "a" to the lower element of "dst", and zero the upper element.</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := 0
+	</operation>
+	<instruction name='movq2dq' form='xmm, mm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__m128i' name='_mm_move_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Move</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Copy the lower 64-bit integer in "a" to the lower element of "dst", and zero the upper element.</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := 0
+	</operation>
+	<instruction name='movq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_packs_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".
+	</description>
+	<operation>
+dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
+dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
+dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
+dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
+dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
+dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
+dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
+dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
+dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
+dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
+dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
+dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
+dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
+dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
+dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
+dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
+	</operation>
+	<instruction name='packsswb' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_packs_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
+dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
+dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
+dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
+dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
+dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
+dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
+dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
+	</operation>
+	<instruction name='packssdw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_packus_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
+dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
+dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
+dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
+dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
+dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
+dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
+dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
+dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
+dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
+dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
+dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
+dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
+dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
+dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
+dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
+	</operation>
+	<instruction name='packuswb' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_extract_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst".</description>
+	<operation>
+dst[15:0] := (a[127:0] &gt;&gt; (imm8[2:0] * 16))[15:0]
+dst[31:16] := 0
+	</operation>
+	<instruction name='pextrw' form='r32, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_insert_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='i' type='int'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". </description>
+	<operation>
+dst[127:0] := a[127:0]
+sel := imm8[2:0]*16
+dst[sel+15:sel] := i[15:0]
+	</operation>
+	<instruction name='pinsrw' form='xmm, r32, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_movemask_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[j] := a[i+7]
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name='pmovmskb' form='r32, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_shuffle_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+	</operation>
+	<instruction name='pshufd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_shufflehi_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[79:64] := (a >> (imm8[1:0] * 16))[79:64]
+dst[95:80] := (a >> (imm8[3:2] * 16))[79:64]
+dst[111:96] := (a >> (imm8[5:4] * 16))[79:64]
+dst[127:112] := (a >> (imm8[7:6] * 16))[79:64]
+	</operation>
+	<instruction name='pshufhw' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_shufflelo_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst".</description>
+	<operation>
+dst[15:0] := (a >> (imm8[1:0] * 16))[15:0]
+dst[31:16] := (a >> (imm8[3:2] * 16))[15:0]
+dst[47:32] := (a >> (imm8[5:4] * 16))[15:0]
+dst[63:48] := (a >> (imm8[7:6] * 16))[15:0]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='pshuflw' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpackhi_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+	</operation>
+	<instruction name='punpckhbw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpackhi_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+
+dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name='punpckhwd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpackhi_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name='punpckhdq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpackhi_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name='punpckhqdq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpacklo_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+	</operation>
+	<instruction name='punpcklbw' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpacklo_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name='punpcklwd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpacklo_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name='punpckldq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpacklo_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name='punpcklqdq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_add_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
+	<operation>
+dst[63:0] := a[63:0] + b[63:0]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='addsd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_add_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name='addpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_div_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
+	<operation>
+dst[63:0] := a[63:0] 0 b[63:0]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='divsd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_div_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	dst[i+63:i] := a[i+63:i] / b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name='divpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_max_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := MAX(a[63:0], b[63:0])
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='maxsd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_max_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name='maxpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_min_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := MIN(a[63:0], b[63:0])
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='minsd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_min_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name='minpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_mul_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] * b[63:0]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='mulsd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_mul_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] * b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name='mulpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_sqrt_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := SQRT(b[63:0])
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='sqrtsd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_sqrt_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name='sqrtpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_sub_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] - b[63:0]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='subsd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_sub_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name='subpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_and_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name='andpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_andnot_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name='andnpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_or_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name='orpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_xor_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name='xorpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpeq_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for equality, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] == b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmplt_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] &lt; b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmple_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] &lt;= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpgt_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] &gt; b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpge_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] &gt;= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpord_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] != NaN AND b[63:0] != NaN) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpunord_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] == NaN OR b[63:0] == NaN) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpneq_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] != b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpnlt_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := !(a[63:0] &lt; b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpnle_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := !(a[63:0] &lt;= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpngt_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := !(a[63:0] &gt; b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpnge_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := !(a[63:0] &gt;= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpeq_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] == b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR	
+	</operation>
+	<instruction name='cmppd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmplt_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] &lt; b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR	
+	</operation>
+	<instruction name='cmppd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmple_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] &lt;= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR	
+	</operation>
+	<instruction name='cmppd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpgt_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] &gt; b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR	
+	</operation>
+	<instruction name='cmppd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpge_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] &gt;= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR	
+	</operation>
+	<instruction name='cmppd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpord_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR	
+	</operation>
+	<instruction name='cmppd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpunord_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR	
+	</operation>
+	<instruction name='cmppd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpneq_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] != b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR	
+	</operation>
+	<instruction name='cmppd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpnlt_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := !(a[i+63:i] &lt; b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR	
+	</operation>
+	<instruction name='cmppd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpnle_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := !(a[i+63:i] &lt;= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR	
+	</operation>
+	<instruction name='cmppd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpngt_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := !(a[i+63:i] &gt; b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR	
+	</operation>
+	<instruction name='cmppd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpnge_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := !(a[i+63:i] &gt;= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR	
+	</operation>
+	<instruction name='cmppd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_comieq_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[63:0] == b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name='comisd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_comilt_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[63:0] &lt; b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name='comisd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_comile_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[63:0] &lt;= b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name='comisd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_comigt_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[63:0] &gt; b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name='comisd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_comige_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[63:0] &gt;= b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name='comisd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_comineq_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[63:0] != b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name='comisd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomieq_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[63:0] == b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name='ucomisd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomilt_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[63:0] &lt; b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name='ucomisd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomile_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[63:0] &lt;= b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name='ucomisd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomigt_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[63:0] &gt; b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name='ucomisd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomige_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[63:0] &gt;= b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name='ucomisd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomineq_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[63:0] != b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name='ucomisd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128' name='_mm_cvtpd_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k])
+ENDFOR
+	</operation>
+	<instruction name='cvtpd2ps' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cvtps_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k])
+ENDFOR
+	</operation>
+	<instruction name='cvtps2pd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvtpd_epi32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
+ENDFOR
+	</operation>
+	<instruction name='cvtpd2dq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvtsd_si32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32(a[63:0])
+	</operation>
+	<instruction name='cvtsd2si' form='r32, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_cvtsd_si64'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64(a[63:0])
+	</operation>
+	<instruction name='cvtsd2si' form='r64, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_cvtsd_si64x'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64(a[63:0])
+	</operation>
+	<instruction name='cvtsd2si' form='r64, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128' name='_mm_cvtsd_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_FP32(b[63:0])
+dst[127:32] := a[127:31]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='cvtsd2ss' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='double' name='_mm_cvtsd_f64'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Copy the lower double-precision (64-bit) floating-point element of "a" to "dst".</description>
+	<operation>dst[63:0] := a[63:0]</operation>
+	<instruction name='movsd' form='m64, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cvtss_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128'/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_FP64(b[31:0])
+dst[127:64] := a[127:64]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='cvtss2sd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvttpd_epi32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k])
+ENDFOR
+	</operation>
+	<instruction name='cvttpd2dq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvttsd_si32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0])
+	</operation>
+	<instruction name='cvttsd2si' form='r32, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_cvttsd_si64'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
+	</operation>
+	<instruction name='cvttsd2si' form='r64, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_cvttsd_si64x'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
+	</operation>
+	<instruction name='cvttsd2si' form='r64, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvtps_epi32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='cvtps2dq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvttps_epi32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='cvttps2dq' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m64' name='_mm_cvtpd_pi32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
+ENDFOR
+	</operation>
+	<instruction name='cvtpd2pi' form='mm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m64' name='_mm_cvttpd_pi32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k])
+ENDFOR
+	</operation>
+	<instruction name='cvttpd2pi' form='mm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_set_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='double'/>
+	<description>Copy double-precision (64-bit) floating-point element "a" to the lower element of "dst", and zero the upper element.</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := 0
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_set1_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='double'/>
+	<description>Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_set_pd1'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='double'/>
+	<description>Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_set_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='e1' type='double'/>
+	<parameter varname='e0' type='double'/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_setr_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='e1' type='double'/>
+	<parameter varname='e0' type='double'/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[63:0] := e1
+dst[127:64] := e0
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m128d' name='_mm_setzero_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<parameter varname='' type='void'/>
+	<description>Return vector of type __m128d with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name='xorpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_load_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='double const*'/>
+	<description>Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into "dst".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name='movapd' form='xmm, m128'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_load1_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='double const*'/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into both elements of "dst".</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+63:mem_addr]
+dst[127:64] := MEM[mem_addr+63:mem_addr]
+	</operation>
+	<instruction name='movapd' form='xmm, m128'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+	
+	
+
+<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_load_pd1'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='double const*'/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into both elements of "dst".</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+63:mem_addr]
+dst[127:64] := MEM[mem_addr+63:mem_addr]
+	</operation>
+	<instruction name='movapd' form='xmm, m128'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_loadr_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='double const*'/>
+	<description>Load 2 double-precision (64-bit) floating-point elements from memory into "dst" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+127:mem_addr+64]
+dst[127:64] := MEM[mem_addr+63:mem_addr]
+	</operation>
+	<instruction name='movapd' form='xmm, m128'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_loadu_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='double const*'/>
+	<description>Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name='movupd' form='xmm, m128'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_load_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='double const*'/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into the lower of "dst", and zero the upper element. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+63:mem_addr]
+dst[127:64] := 0
+	</operation>
+	<instruction name='movsd' form='xmm, m64'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_loadh_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='mem_addr' type='double const*'/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into the upper element of "dst", and copy the lower element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := MEM[mem_addr+63:mem_addr]
+	</operation>
+	<instruction name='movhpd' form='xmm, m64'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_loadl_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='mem_addr' type='double const*'/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst", and copy the upper element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+63:mem_addr]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='movlpd' form='xmm, m64'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_stream_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double*'/>
+	<parameter varname='a' type='__m128d'/>
+	<description>Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name='movntpd' form='m128, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_store_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double*'/>
+	<parameter varname='a' type='__m128d'/>
+	<description>Store the lower double-precision (64-bit) floating-point element from "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+	</operation>
+	<instruction name='movsd' form='m64, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='void' name='_mm_store1_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double*'/>
+	<parameter varname='a' type='__m128d'/>
+	<description>Store the lower double-precision (64-bit) floating-point element from "a" into 2 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+MEM[mem_addr+127:mem_addr+64] := a[63:0]
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='void' name='_mm_store_pd1'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double*'/>
+	<parameter varname='a' type='__m128d'/>
+	<description>Store the lower double-precision (64-bit) floating-point element from "a" into 2 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+MEM[mem_addr+127:mem_addr+64] := a[63:0]
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_store_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double*'/>
+	<parameter varname='a' type='__m128d'/>
+	<description>Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name='movapd' form='m128, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storeu_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double*'/>
+	<parameter varname='a' type='__m128d'/>
+	<description>Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name='movupd' form='m128, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' sequence='true' rettype='void' name='_mm_storer_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double*'/>
+	<parameter varname='a' type='__m128d'/>
+	<description>Store 2 double-precision (64-bit) floating-point elements from "a" into memory in reverse order.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[127:64]
+MEM[mem_addr+127:mem_addr+64] := a[63:0]
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storeh_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double*'/>
+	<parameter varname='a' type='__m128d'/>
+	<description>Store the upper double-precision (64-bit) floating-point element from "a" into memory.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[127:64]
+	</operation>
+	<instruction name='movhpd' form='m64, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storel_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double*'/>
+	<parameter varname='a' type='__m128d'/>
+	<description>Store the lower double-precision (64-bit) floating-point element from "a" into memory.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+	</operation>
+	<instruction name='movlpd' form='m64, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_unpackhi_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name='unpckhpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_unpacklo_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name='unpcklpd' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_movemask_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Set each bit of mask "dst" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in "a".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF a[i+63]
+		dst[j] := 1
+	ELSE
+		dst[j] := 0
+	FI
+ENDFOR
+dst[MAX:2] := 0
+	</operation>
+	<instruction name='movmskpd' form='r32, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_shuffle_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst". </description>
+	<operation>
+dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+	</operation>
+	<instruction name='shufpd' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_move_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Move</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := b[63:0]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='movsd' form='xmm, xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m128' name='_mm_castpd_ps'>
+	<type>Floating Point</type>
+		<CPUID>SSE2</CPUID>
+		<category>Cast</category>
+		<parameter varname='a' type='__m128d'/>
+		<description>Cast vector of type __m128d to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m128i' name='_mm_castpd_si128'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+		<CPUID>SSE2</CPUID>
+		<category>Cast</category>
+		<parameter varname='a' type='__m128d'/>
+		<description>Cast vector of type __m128d to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m128d' name='_mm_castps_pd'>
+	<type>Floating Point</type>
+		<CPUID>SSE2</CPUID>
+		<category>Cast</category>
+		<parameter varname='a' type='__m128'/>
+		<description>Cast vector of type __m128 to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m128i' name='_mm_castps_si128'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+		<CPUID>SSE2</CPUID>
+		<category>Cast</category>
+		<parameter varname='a' type='__m128'/>
+		<description>Cast vector of type __m128 to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m128d' name='_mm_castsi128_pd'>
+	<type>Floating Point</type>
+		<CPUID>SSE2</CPUID>
+		<category>Cast</category>
+		<parameter varname='a' type='__m128i'/>
+		<description>Cast vector of type __m128i to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m128' name='_mm_castsi128_ps'>
+	<type>Floating Point</type>
+		<CPUID>SSE2</CPUID>
+		<category>Cast</category>
+		<parameter varname='a' type='__m128i'/>
+		<description>Cast vector of type __m128i to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' sequence='true' rettype='float' name='_cvtsh_ss'>
+	<type>Floating Point</type>
+	<category>Convert</category>
+	<parameter varname='a' type='unsigned short'/>
+	<description>Convert the half-precision (16-bit) floating-point value "a" to a single-precision (32-bit) floating-point value, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP16_To_FP32(a[15:0])
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' sequence='true' rettype='unsigned short' name='_cvtss_sh'>
+	<type>Floating Point</type>
+	<category>Convert</category>
+	<parameter varname='a' type='float'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Convert the single-precision (32-bit) floating-point value "a" to a half-precision (16-bit) floating-point value, and store the result in "dst".</description>
+	<operation>
+dst[15:0] := Convert_FP32_To_FP16(a[31:0])
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='__m128' name='_mm_cvtph_ps'>
+	<type>Floating Point</type>
+	<CPUID>FP16C</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*16
+	dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtph2ps' form='xmm, xmm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='__m128i' name='_mm_cvtps_ph'>
+	<type>Floating Point</type>
+	<CPUID>FP16C</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='rounding' type='int'/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := 16*j
+	l := 32*j
+	dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtps2ph' form='xmm, xmm, imm'/>
+	<header>emmintrin.h</header>
+</intrinsic>
+
+<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128' name='_mm_addsub_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<description>Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF (j is even) 
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='addsubps' form='xmm, xmm'/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128d' name='_mm_addsub_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF (j is even) 
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='addsubpd' form='xmm, xmm'/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128d' name='_mm_hadd_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[63:0] := a[127:64] + a[63:0]
+dst[127:64] := b[127:64] + b[63:0]
+	</operation>
+	<instruction name='haddpd' form='xmm, xmm'/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128' name='_mm_hadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<description>Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32] + a[31:0]
+dst[63:32] := a[127:96] + a[95:64]
+dst[95:64] := b[63:32] + b[31:0]
+dst[127:96] := b[127:96] + b[95:64]
+	</operation>
+	<instruction name='haddps' form='xmm, xmm'/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128d' name='_mm_hsub_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] - a[127:64]
+dst[127:64] := b[63:0] - b[127:64]
+	</operation>
+	<instruction name='hsubpd' form='xmm, xmm'/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128' name='_mm_hsub_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<description>Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] - a[63:32]
+dst[63:32] := a[95:64] - a[127:96]
+dst[95:64] := b[31:0] - b[63:32]
+dst[127:96] := b[95:64] - b[127:96]
+	</operation>
+	<instruction name='hsubps' form='xmm, xmm'/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128i' name='_mm_lddqu_si128'>
+	<type>Integer</type>
+	<CPUID>SSE3</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='__m128i const*'/>
+	<description>Load 128-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm_loadu_si128" when the data crosses a cache line boundary.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name='lddqu' form='xmm, m128'/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='void' name='_mm_monitor'>
+	<CPUID>MONITOR</CPUID>
+	<category>General Support</category>
+	<parameter varname='p' type='void const*'/>
+	<parameter varname='extensions' type='unsigned'/>
+	<parameter varname='hints' type='unsigned'/>
+	<description>Arm address monitoring hardware using the address specified in "p". A store to an address within the specified address range triggers the monitoring hardware. Specify optional extensions in "extensions", and optional hints in "hints".</description>
+	<instruction name='monitor' form=''/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128d' name='_mm_movedup_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Move</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Duplicate the low double-precision (64-bit) floating-point element from "a", and store the results in "dst".
+	</description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+	</operation>
+	<instruction name='movddup' form='xmm, xmm'/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128d' name='_mm_loaddup_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='double const*'/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into both elements of "dst".
+	</description>
+	<operation>
+tmp[63:0] := MEM[mem_addr+63:mem_addr]
+tmp[127:64] := MEM[mem_addr+63:mem_addr]
+	</operation>
+	<instruction name='movddup' form='xmm, m64'/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128' name='_mm_movehdup_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Move</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
+	</description>
+	<operation>
+dst[31:0] := a[63:32] 
+dst[63:32] := a[63:32]
+dst[95:64] := a[127:96] 
+dst[127:96] := a[127:96]
+	</operation>
+	<instruction name='movshdup' form='xmm, xmm'/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128' name='_mm_moveldup_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Move</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
+	</description>
+	<operation>
+dst[31:0] := a[31:0] 
+dst[63:32] := a[31:0]
+dst[95:64] := a[95:64] 
+dst[127:96] := a[95:64] 
+	</operation>
+	<instruction name='movsldup' form='xmm, xmm'/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='void' name='_mm_mwait'>
+	<CPUID>MONITOR</CPUID>
+	<category>General Support</category>
+	<parameter varname='extensions' type='unsigned'/>
+	<parameter varname='hints' type='unsigned'/>
+	<description>Hint to the processor that it can enter an implementation-dependent-optimized state while waiting for an event or store operation to the address range specified by MONITOR.</description>
+	<instruction name='mwait' form=''/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_abs_pi8'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m64'/>
+	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := ABS(a[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name='pabsb' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_abs_epi8'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := ABS(a[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name='pabsb' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_abs_pi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m64'/>
+	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := ABS(a[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name='pabsw' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_abs_epi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := ABS(a[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name='pabsw' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_abs_pi32'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m64'/>
+	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := ABS(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='pabsd' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_abs_epi32'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ABS(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='pabsd' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_shuffle_epi8'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF b[i+7] == 1
+		dst[i+7:i] := 0
+	ELSE
+		index[3:0] := b[i+3:i]
+		dst[i+7:i] := a[index*8+7:index*8]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pshufb' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_shuffle_pi8'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	IF b[i+7] == 1
+		dst[i+7:i] := 0
+	ELSE
+		index[2:0] := b[i+2:i]
+		dst[i+7:i] := a[index*8+7:index*8]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pshufb' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_alignr_epi8'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname='count' type='int'/>
+	<description>Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". </description>
+	<operation>
+tmp[255:0] := ((a[127:0] &lt;&lt; 128) OR b[127:0]) &gt;&gt; (count[7:0]*8)
+dst[127:0] := tmp[127:0]
+	</operation>
+	<instruction name='palignr' form='xmm, xmm, imm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_alignr_pi8'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<parameter varname='count' type='int'/>
+	<description>Concatenate 8-byte blocks in "a" and "b" into a 16-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". </description>
+	<operation>
+tmp[127:0] := ((a[63:0] &lt;&lt; 64) OR b[63:0]) &gt;&gt; (count[7:0]*8)
+dst[63:0] := tmp[63:0]
+	</operation>
+	<instruction name='palignr' form='mm, mm, imm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_hadd_epi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := a[31:16] + a[15:0]
+dst[31:16] := a[63:48] + a[47:32]
+dst[47:32] := a[95:80] + a[79:64]
+dst[63:48] := a[127:112] + a[111:96]
+dst[79:64] := b[31:16] + b[15:0]
+dst[95:80] := b[63:48] + b[47:32]
+dst[111:96] := b[95:80] + b[79:64]
+dst[127:112] := b[127:112] + b[111:96]
+	</operation>
+	<instruction name='phaddw' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_hadds_epi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0]= Saturate_To_Int16(a[31:16] + a[15:0])
+dst[31:16] = Saturate_To_Int16(a[63:48] + a[47:32])
+dst[47:32] = Saturate_To_Int16(a[95:80] + a[79:64])
+dst[63:48] = Saturate_To_Int16(a[127:112] + a[111:96])
+dst[79:64] = Saturate_To_Int16(b[31:16] + b[15:0])
+dst[95:80] = Saturate_To_Int16(b[63:48] + b[47:32])
+dst[111:96] = Saturate_To_Int16(b[95:80] + b[79:64])
+dst[127:112] = Saturate_To_Int16(b[127:112] + b[111:96])
+	</operation>
+	<instruction name='phaddsw' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_hadd_epi32'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32] + a[31:0]
+dst[63:32] := a[127:96] + a[95:64]
+dst[95:64] := b[63:32] + b[31:0]
+dst[127:96] := b[127:96] + b[95:64]
+	</operation>
+	<instruction name='phaddd' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_hadd_pi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := a[31:16] + a[15:0]
+dst[31:16] := a[63:48] + a[47:32]
+dst[47:32] := b[31:16] + b[15:0]
+dst[63:48] := b[63:48] + b[47:32]
+	</operation>
+	<instruction name='phaddw' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_hadd_pi32'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32] + a[31:0]
+dst[63:32] := b[63:32] + b[31:0]
+	</operation>
+	<instruction name='phaddw' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_hadds_pi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0]= Saturate_To_Int16(a[31:16] + a[15:0])
+dst[31:16] = Saturate_To_Int16(a[63:48] + a[47:32])
+dst[47:32] = Saturate_To_Int16(b[31:16] + b[15:0])
+dst[63:48] = Saturate_To_Int16(b[63:48] + b[47:32])
+	</operation>
+	<instruction name='phaddsw' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_hsub_epi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := a[15:0] - a[31:16]
+dst[31:16] := a[47:32] - a[63:48]
+dst[47:32] := a[79:64] - a[95:80]
+dst[63:48] := a[111:96] - a[127:112]
+dst[79:64] := b[15:0] - b[31:16]
+dst[95:80] := b[47:32] - b[63:48]
+dst[111:96] := b[79:64] - b[95:80]
+dst[127:112] := b[111:96] - b[127:112]
+	</operation>
+	<instruction name='phsubw' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_hsubs_epi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0]= Saturate_To_Int16(a[15:0] - a[31:16])
+dst[31:16] = Saturate_To_Int16(a[47:32] - a[63:48])
+dst[47:32] = Saturate_To_Int16(a[79:64] - a[95:80])
+dst[63:48] = Saturate_To_Int16(a[111:96] - a[127:112])
+dst[79:64] = Saturate_To_Int16(b[15:0] - b[31:16])
+dst[95:80] = Saturate_To_Int16(b[47:32] - b[63:48])
+dst[111:96] = Saturate_To_Int16(b[79:64] - b[95:80])
+dst[127:112] = Saturate_To_Int16(b[111:96] - b[127:112])
+	</operation>
+	<instruction name='phsubsw' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_hsub_epi32'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] - a[63:32]
+dst[63:32] := a[95:64] - a[127:96]
+dst[95:64] := b[31:0] - b[63:32]
+dst[127:96] := b[95:64] - b[127:96]
+	</operation>
+	<instruction name='phsubd' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_hsub_pi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := a[15:0] - a[31:16]
+dst[31:16] := a[47:32] - a[63:48]
+dst[47:32] := b[15:0] - b[31:16]
+dst[63:48] := b[47:32] - b[63:48]
+	</operation>
+	<instruction name='phsubw' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_hsub_pi32'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] - a[63:32]
+dst[63:32] := b[31:0] - b[63:32]
+	</operation>
+	<instruction name='phsubd' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_hsubs_pi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0]= Saturate_To_Int16(a[15:0] - a[31:16])
+dst[31:16] = Saturate_To_Int16(a[47:32] - a[63:48])
+dst[47:32] = Saturate_To_Int16(b[15:0] - b[31:16])
+dst[63:48] = Saturate_To_Int16(b[47:32] - b[63:48])
+	</operation>
+	<instruction name='phsubsw' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_maddubs_epi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name='pmaddubsw' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_maddubs_pi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name='pmaddubsw' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_mulhrs_epi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
+	dst[i+15:i] := tmp[16:1]
+ENDFOR
+	</operation>
+	<instruction name='pmulhrsw' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_mulhrs_pi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
+	dst[i+15:i] := tmp[16:1]
+ENDFOR
+	</operation>
+	<instruction name='pmulhrsw' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_sign_epi8'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF b[i+7:i] &lt; 0
+		dst[i+7:i] := NEG(a[i+7:i])
+	ELSE IF b[i+7:i] = 0
+		dst[i+7:i] := 0
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psignb' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_sign_epi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF b[i+15:i] &lt; 0
+		dst[i+15:i] := NEG(a[i+15:i])
+	ELSE IF b[i+15:i] = 0
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psignw' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_sign_epi32'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF b[i+31:i] &lt; 0
+		dst[i+31:i] := NEG(a[i+31:i])
+	ELSE IF b[i+31:i] = 0
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psignd' form='xmm, xmm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_sign_pi8'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	IF b[i+7:i] &lt; 0
+		dst[i+7:i] := NEG(a[i+7:i])
+	ELSE IF b[i+7:i] = 0
+		dst[i+7:i] := 0
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psignb' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_sign_pi16'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF b[i+15:i] &lt; 0
+		dst[i+15:i] := NEG(a[i+15:i])
+	ELSE IF b[i+15:i] = 0
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psignw' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSSE3' rettype='__m64' name='_mm_sign_pi32'>
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m64'/>
+	<parameter varname='b' type='__m64'/>
+	<description>Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF b[i+31:i] &lt; 0
+		dst[i+31:i] := NEG(a[i+31:i])
+	ELSE IF b[i+31:i] = 0
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='psignd' form='mm, mm'/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_blend_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF imm8[j%8]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='blendpd' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_blend_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF imm8[j%8]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='blendps' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_blendv_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname='mask' type='__m128d'/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='blendvpd' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_blendv_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname='mask' type='__m128'/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='blendvps' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_blendv_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname='mask' type='__m128i'/>
+	<description>Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF mask[i+7]
+		dst[i+7:i] := b[i+7:i]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pblendvb' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_blend_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Blend packed 16-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF imm8[j%8]
+		dst[i+15:i] := b[i+15:i]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pblendw' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_dp_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Conditionally multiply the packed double-precision (64-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8".</description>
+	<operation>
+DP(a[127:0], b[127:0], imm8[7:0]) {
+	FOR j := 0 to 1
+		i := j*64
+		IF imm8[(4+j)%8]]
+			temp[i+63:i] := a[i+63:i] * b[i+63:i]
+		ELSE
+			temp[i+63:i] := 0
+		FI
+	ENDFOR
+	
+	sum[63:0] := temp[127:64] + temp[63:0]
+	
+	FOR j := 0 to 1
+		i := j*64
+		IF imm8[j%8]
+			tmpdst[i+63:i] := sum[63:0]
+		ELSE
+			tmpdst[i+63:i] := 0
+		FI
+	ENDFOR
+	RETURN tmpdst[127:0]
+}
+
+dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0])
+	</operation>
+	<instruction name='dppd' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_dp_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8".</description>
+	<operation>
+DP(a[127:0], b[127:0], imm8[7:0]) {
+	FOR j := 0 to 3
+		i := j*32
+		IF imm8[(4+j)%8]
+			temp[i+31:i] := a[i+31:i] * b[i+31:i]
+		ELSE
+			temp[i+31:i] := 0
+		FI
+	ENDFOR
+	
+	sum[31:0] := (temp[127:96] + temp[95:64]) + (temp[63:32] + temp[31:0])
+	
+	FOR j := 0 to 3
+		i := j*32
+		IF imm8[j%8]
+			tmpdst[i+31:i] := sum[31:0]
+		ELSE
+			tmpdst[i+31:i] := 0
+		FI
+	ENDFOR
+	RETURN tmpdst[127:0]
+}
+
+dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0])
+	</operation>
+	<instruction name='dpps' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_extract_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Extract a single-precision (32-bit) floating-point element from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+dst[31:0] := (a[127:0] &gt;&gt; (imm8[1:0] * 32))[31:0]
+	</operation>
+	<instruction name='extractps' form='r32, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_extract_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Extract an 8-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst".</description>
+	<operation>
+dst[7:0] := (a[127:0] &gt;&gt; (imm8[3:0] * 8))[7:0]
+dst[31:8] := 0
+	</operation>
+	<instruction name='pextrb' form='r32, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_extract_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Extract a 32-bit integer from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+dst[31:0] := (a[127:0] &gt;&gt; (imm8[1:0] * 32))[31:0]
+	</operation>
+	<instruction name='pextrd' form='r32, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_extract_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Extract a 64-bit integer from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[127:0] &gt;&gt; (imm8[0] * 64))[63:0]
+	</operation>
+	<instruction name='pextrq' form='r64, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_insert_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Copy "a" to "tmp", then insert a single-precision (32-bit) floating-point element from "b" into "tmp" using the control in "imm8". Store "tmp" to "dst" using the mask in "imm8" (elements are zeroed out when the corresponding bit is set). </description>
+	<operation>
+tmp2[127:0] := a[127:0]
+CASE (imm8[7:6]) of
+0: tmp1[31:0] := b[31:0]
+1: tmp1[31:0] := b[63:32]
+2: tmp1[31:0] := b[95:64]
+3: tmp1[31:0] := b[127:96]
+ESAC
+CASE (imm8[5:4]) of
+0: tmp2[31:0] := tmp1[31:0]
+1: tmp2[63:32] := tmp1[31:0]
+2: tmp2[95:64] := tmp1[31:0]
+3: tmp2[127:96] := tmp1[31:0]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF imm8[j%8]
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := tmp2[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='insertps' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_insert_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='i' type='int'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Copy "a" to "dst", and insert the lower 8-bit integer from "i" into "dst" at the location specified by "imm8". </description>
+	<operation>
+dst[127:0] := a[127:0]
+sel := imm8[3:0]*8
+dst[sel+7:sel] := i[7:0]
+	</operation>
+	<instruction name='pinsrb' form='xmm, r32, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_insert_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='i' type='int'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Copy "a" to "dst", and insert the 32-bit integer "i" into "dst" at the location specified by "imm8". </description>
+	<operation>
+dst[127:0] := a[127:0]
+sel := imm8[1:0]*32
+dst[sel+31:sel] := i[31:0]
+	</operation>
+	<instruction name='pinsrd' form='xmm, r32, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_insert_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='i' type='__int64'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "imm8". </description>
+	<operation>
+dst[127:0] := a[127:0]
+sel := imm8[0]*64
+dst[sel+63:sel] := i[63:0]
+	</operation>
+	<instruction name='pinsrq' form='xmm, r64, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_max_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF a[i+7:i] &gt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pmaxsb' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_max_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF a[i+31:i] &gt; b[i+31:i]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := b[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pmaxsd' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_max_epu32'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF a[i+31:i] &gt; b[i+31:i]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := b[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pmaxud' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_max_epu16'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF a[i+15:i] &gt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pmaxuw' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_min_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF a[i+7:i] &lt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pminsb' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_min_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF a[i+31:i] &lt; b[i+31:i]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := b[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pminsd' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_min_epu32'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF a[i+31:i] &lt; b[i+31:i]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := b[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pminud' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_min_epu16'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF a[i+15:i] &lt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='pminuw' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_packus_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
+dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
+dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
+dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
+dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
+dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
+dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
+dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
+	</operation>
+	<instruction name='packusdw' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cmpeq_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ( a[i+63:i] == b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpeqq' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepi8_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	l := j*16
+	dst[l+15:l] := SignExtend(a[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name='pmovsxbw' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepi8_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 8*j
+	dst[i+31:i] := SignExtend(a[k+7:k])
+ENDFOR
+	</operation>
+	<instruction name='pmovsxbd' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepi8_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 8*j
+	dst[i+63:i] := SignExtend(a[k+7:k])
+ENDFOR
+	</operation>
+	<instruction name='pmovsxbq' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepi16_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 16*j
+	dst[i+31:i] := SignExtend(a[k+15:k])
+ENDFOR
+	</operation>
+	<instruction name='pmovsxwd' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepi16_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 16*j
+	dst[i+63:i] := SignExtend(a[k+15:k])
+ENDFOR
+	</operation>
+	<instruction name='pmovsxwq' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepi32_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := SignExtend(a[k+31:k])
+ENDFOR
+	</operation>
+	<instruction name='pmovsxdq' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepu8_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	l := j*16
+	dst[l+15:l] := ZeroExtend(a[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name='pmovzxbw' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepu8_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 8*j
+	dst[i+31:i] := ZeroExtend(a[k+7:k])
+ENDFOR
+	</operation>
+	<instruction name='pmovzxbd' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepu8_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 8*j
+	dst[i+63:i] := ZeroExtend(a[k+7:k])
+ENDFOR
+	</operation>
+	<instruction name='pmovzxbq' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepu16_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 16*j
+	dst[i+31:i] := ZeroExtend(a[k+15:k])
+ENDFOR
+	</operation>
+	<instruction name='pmovzxwd' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepu16_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 16*j
+	dst[i+63:i] := ZeroExtend(a[k+15:k])
+ENDFOR
+	</operation>
+	<instruction name='pmovzxwq' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepu32_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := ZeroExtend(a[k+31:k])
+ENDFOR
+	</operation>
+	<instruction name='pmovzxdq' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_mul_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='pmuldq' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_mullo_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	tmp[63:0] := a[i+31:i] * b[i+31:i]
+	dst[i+31:i] := tmp[31:0]
+ENDFOR
+	</operation>
+	<instruction name='pmulld' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_testz_si128'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
+	<operation>
+IF (a[127:0] AND b[127:0] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF ((NOT a[127:0]) AND b[127:0] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN ZF
+	</operation>
+	<instruction name='ptest' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_testc_si128'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
+	<operation>
+IF (a[127:0] AND b[127:0] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF ((NOT a[127:0]) AND b[127:0] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN CF
+	</operation>
+	<instruction name='ptest' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_testnzc_si128'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+IF (a[127:0] AND b[127:0] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF ((NOT a[127:0]) AND b[127:0] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	RETURN 1
+ELSE
+	RETURN 0
+FI
+	</operation>
+	<instruction name='ptest' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_test_all_zeros'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='mask' type='__m128i'/>
+	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and return 1 if the result is zero, otherwise return 0.</description>
+	<operation>
+IF (a[127:0] AND mask[127:0] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+RETURN ZF
+	</operation>
+	<instruction name='ptest' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_test_mix_ones_zeros'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='mask' type='__m128i'/>
+	 <description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "mask", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+IF (a[127:0] AND mask[127:0] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF ((NOT a[127:0]) AND mask[127:0] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	RETURN 1
+ELSE
+	RETURN 0
+FI
+	</operation>
+	<instruction name='ptest' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' sequence='true' rettype='int' name='_mm_test_all_ones'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Compute the bitwise NOT of "a" and then AND with a 128-bit vector containing all 1's, and return 1 if the result is zero, otherwise return 0.</description>
+	<operation>
+FOR j := 0 to 127
+	tmp[i] := 1
+ENDFOR
+
+IF ((NOT a[127:0]) AND tmp[127:0] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN CF
+	</operation>
+	<instruction name='pcmpeqd' form='xmm, xmm'/>
+	<instruction name='ptest' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_round_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='rounding' type='int'/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed double-precision floating-point elements in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ROUND(a[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name='roundpd' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_floor_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := FLOOR(a[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name='roundpd' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_ceil_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := CEIL(a[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name='roundpd' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_round_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='rounding' type='int'/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ROUND(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='roundps' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_floor_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := FLOOR(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='roundps' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_ceil_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := CEIL(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name='roundps' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_round_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname='rounding' type='int'/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "b" using the "rounding" parameter, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := ROUND(b[63:0])
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='roundsd' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_floor_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "b" down to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := FLOOR(b[63:0])
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='roundsd' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_ceil_sd'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "b" up to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := CEIL(b[63:0])
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name='roundsd' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_round_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname='rounding' type='int'/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "b" using the "rounding" parameter, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := ROUND(b[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='roundss' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_floor_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "b" down to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := FLOOR(b[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='roundss' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_ceil_ss'>
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "b" up to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := CEIL(b[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name='roundss' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_minpos_epu16'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Horizontally compute the minimum amongst the packed unsigned 16-bit integers in "a", store the minimum and index in "dst", and zero the remaining bits in "dst".</description>
+	<operation>
+index[2:0] := 0
+min[15:0] := a[15:0]
+FOR j := 0 to 7
+	i := j*16
+	IF a[i+15:i] &lt; min[15:0]
+		index[2:0] := j
+		min[15:0] := a[i+15:i]
+	FI
+ENDFOR
+dst[15:0] := min[15:0]
+dst[18:16] := index[2:0]
+dst[127:19] := 0
+	</operation>
+	<instruction name='phminposuw' form='xmm, xmm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_mpsadbw_epu8'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Arithmetic</category>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
+	Eight SADs are performed using one quadruplet from "b" and eight quadruplets from "a". One quadruplet is selected from "b" starting at on the offset specified in "imm8". Eight quadruplets are formed from sequential 8-bit integers selected from "a" starting at the offset specified in "imm8".</description>
+	<operation>
+MPSADBW(a[127:0], b[127:0], imm8[2:0]) {
+	a_offset := imm8[2]*32
+	b_offset := imm8[1:0]*32
+	FOR j := 0 to 7
+		i := j*8
+		k := a_offset+i
+		l := b_offset
+		tmp[i*2+15:i*2] := ABS(a[k+7:k] - b[l+7:l]) + ABS(a[k+15:k+8] - b[l+15:l+8]) + ABS(a[k+23:k+16] - b[l+23:l+16]) + ABS(a[k+31:k+24] - b[l+31:l+24])
+	ENDFOR
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0])
+	</operation>
+	<instruction name='mpsadbw' form='xmm, xmm, imm'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_stream_load_si128'>
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='const __m128i*'/>
+	<description>Load 128-bits of integer data from memory into "dst" using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name='movntdqa' form='xmm, m128'/>
+	<header>smmintrin.h</header>
+</intrinsic>
+
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__m128i' name='_mm_cmpistrm'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and store the generated mask in "dst".
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
+		
+		// invalidate characters after EOS
+		IF a[m+size-1:m] == 0
+			aInvalid := 1
+		FI
+		IF b[n+size-1:n] == 0
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 1
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound, j += 2
+			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes[i][i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
+			k++
+		ENDFOR
+	ENDFOR
+ESAC
+
+// optionally negate results
+bInvalid := 0
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF b[n+size-1:n] == 0
+				bInvalid := 1
+			FI
+			IF bInvalid // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+
+// output
+IF imm8[6] // byte / word mask
+	FOR i := 0 to UpperBound
+		j := i*size
+		IF IntRes2[i]
+			dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF)
+		ELSE
+			dst[j+size-1:j] := 0
+		FI
+	ENDFOR
+ELSE // bit mask
+	dst[UpperBound:0] := IntRes[UpperBound:0]
+	dst[127:UpperBound+1] := 0
+FI
+	</operation>
+	<instruction name='pcmpistrm' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpistri'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and store the generated index in "dst".
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
+		
+		// invalidate characters after EOS
+		IF a[m+size-1:m] == 0
+			aInvalid := 1
+		FI
+		IF b[n+size-1:n] == 0
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 1
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound, j += 2
+			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes[i][i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
+			k++
+		ENDFOR
+	ENDFOR
+ESAC
+
+// optionally negate results
+bInvalid := 0
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF b[n+size-1:n] == 0
+				bInvalid := 1
+			FI
+			IF bInvalid // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+
+// output
+IF imm8[6] // most significant bit
+	tmp := UpperBound
+	dst := tmp
+	DO WHILE ((tmp &gt;= 0) AND a[tmp] = 0)
+		tmp := tmp - 1
+		dst := tmp
+	OD
+ELSE // least significant bit
+	tmp := 0
+	dst := tmp
+	DO WHILE ((tmp &lt;= UpperBound) AND a[tmp] = 0)
+		tmp := tmp + 1
+		dst := tmp
+	OD
+FI
+	</operation>
+	<instruction name='pcmpistri' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpistrz'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if any character in "b" was null, and 0 otherwise.
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+bInvalid := 0
+FOR j := 0 to UpperBound
+	n := j*size
+	IF b[n+size-1:n] == 0
+		bInvalid := 1
+	FI
+ENDFOR
+
+dst := bInvalid
+	</operation>
+	<instruction name='pcmpistri' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpistrc'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if the resulting mask was non-zero, and 0 otherwise.
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
+		
+		// invalidate characters after EOS
+		IF a[m+size-1:m] == 0
+			aInvalid := 1
+		FI
+		IF b[n+size-1:n] == 0
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 1
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound, j += 2
+			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes[i][i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
+			k++
+		ENDFOR
+	ENDFOR
+ESAC
+
+// optionally negate results
+bInvalid := 0
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF b[n+size-1:n] == 0
+				bInvalid := 1
+			FI
+			IF bInvalid // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+
+// output
+dst := (IntRes2 != 0)
+	</operation>
+	<instruction name='pcmpistri' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpistrs'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if any character in "a" was null, and 0 otherwise.
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+aInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	IF b[m+size-1:m] == 0
+		aInvalid := 1
+	FI
+ENDFOR
+
+dst := aInvalid
+	</operation>
+	<instruction name='pcmpistri' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpistro'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns bit 0 of the resulting bit mask.
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
+		
+		// invalidate characters after EOS
+		IF a[m+size-1:m] == 0
+			aInvalid := 1
+		FI
+		IF b[n+size-1:n] == 0
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 1
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound, j += 2
+			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes[i][i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
+			k++
+		ENDFOR
+	ENDFOR
+ESAC
+
+// optionally negate results
+bInvalid := 0
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF b[n+size-1:n] == 0
+				bInvalid := 1
+			FI
+			IF bInvalid // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+
+// output
+dst := IntRes2[0]
+	</operation>
+	<instruction name='pcmpistri' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpistra'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if "b" did not contain a null character and the resulting mask was zero, and 0 otherwise.
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
+		
+		// invalidate characters after EOS
+		IF a[m+size-1:m] == 0
+			aInvalid := 1
+		FI
+		IF b[n+size-1:n] == 0
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 1
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound, j += 2
+			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes[i][i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
+			k++
+		ENDFOR
+	ENDFOR
+ESAC
+
+// optionally negate results
+bInvalid := 0
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF b[n+size-1:n] == 0
+				bInvalid := 1
+			FI
+			IF bInvalid // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+
+// output
+dst := (IntRes2 == 0) AND bInvalid
+	</operation>
+	<instruction name='pcmpistri' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__m128i' name='_mm_cmpestrm'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='la' type='int'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname='lb' type='int'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and store the generated mask in "dst".
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
+		
+		// invalidate characters after EOS
+		IF i == la
+			aInvalid := 1
+		FI
+		IF j == lb
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 1
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound, j += 2
+			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes[i][i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
+			k++
+		ENDFOR
+	ENDFOR
+ESAC
+
+// optionally negate results
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF i &gt;= lb // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+
+// output
+IF imm8[6] // byte / word mask
+	FOR i := 0 to UpperBound
+		j := i*size
+		IF IntRes2[i]
+			dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF)
+		ELSE
+			dst[j+size-1:j] := 0
+		FI
+	ENDFOR
+ELSE // bit mask
+	dst[UpperBound:0] := IntRes[UpperBound:0]
+	dst[127:UpperBound+1] := 0
+FI
+	</operation>
+	<instruction name='pcmpestrm' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpestri'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='la' type='int'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname='lb' type='int'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and store the generated index in "dst".
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
+		
+		// invalidate characters after EOS
+		IF i == la
+			aInvalid := 1
+		FI
+		IF j == lb
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 0
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes[i][j] := 1
+			ELSE If (aInvalid &amp;&amp; bInvalid)
+				BoolRes[i][j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound, j += 2
+			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes[i][i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
+			k++
+		ENDFOR
+	ENDFOR
+ESAC
+
+// optionally negate results
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF i &gt;= lb // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+
+// output
+IF imm8[6] // most significant bit
+	tmp := UpperBound
+	dst := tmp
+	DO WHILE ((tmp &gt;= 0) AND a[tmp] = 0)
+		tmp := tmp - 1
+		dst := tmp
+	OD
+ELSE // least significant bit
+	tmp := 0
+	dst := tmp
+	DO WHILE ((tmp &lt;= UpperBound) AND a[tmp] = 0)
+		tmp := tmp + 1
+		dst := tmp
+	OD
+FI
+	</operation>
+	<instruction name='pcmpestri' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpestrz'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='la' type='int'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname='lb' type='int'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if any character in "b" was null, and 0 otherwise.
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+dst := (lb &lt;= UpperBound)
+	</operation>
+	<instruction name='pcmpestri' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpestrc'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='la' type='int'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname='lb' type='int'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if the resulting mask was non-zero, and 0 otherwise.
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
+		
+		// invalidate characters after EOS
+		IF i == la
+			aInvalid := 1
+		FI
+		IF j == lb
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+			0:  // equal any
+				IF (!aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				ELSE IF (aInvalid &amp;&amp; !bInvalid)
+					BoolRes[i][j] := 0
+				ELSE If (aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				FI
+			1:  // ranges
+				IF (!aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				ELSE IF (aInvalid &amp;&amp; !bInvalid)
+					BoolRes[i][j] := 0
+				ELSE If (aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				FI
+			2:  // equal each
+				IF (!aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				ELSE IF (aInvalid &amp;&amp; !bInvalid)
+					BoolRes[i][j] := 0
+				ELSE If (aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 1
+				FI
+			3:  // equal ordered
+				IF (!aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				ELSE IF (aInvalid &amp;&amp; !bInvalid)
+					BoolRes[i][j] := 1
+				ELSE If (aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 1
+				FI
+		ESAC
+	ENDFOR
+ENDFOR
+
+// aggregate results
+CASE (imm8[3:2]) OF
+	0:  // equal any
+		IntRes1 := 0
+		FOR i := 0 to UpperBound
+			FOR j := 0 to UpperBound
+				IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
+			ENDFOR
+		ENDFOR
+	1:  // ranges
+		IntRes1 := 0
+		FOR i := 0 to UpperBound
+			FOR j := 0 to UpperBound, j += 2
+				IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
+			ENDFOR
+		ENDFOR
+	2:  // equal each
+		IntRes1 := 0
+		FOR i := 0 to UpperBound
+			IntRes1[i] := BoolRes[i][i]
+		ENDFOR
+	3:  // equal ordered
+		IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+		FOR i := 0 to UpperBound
+			k := i
+			FOR j := 0 to UpperBound-i
+				IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
+				k++
+			ENDFOR
+		ENDFOR
+ESAC
+
+// optionally negate results
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF i &gt;= lb // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+
+// output
+dst := (IntRes2 != 0)
+	</operation>
+	<instruction name='pcmpestri' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpestrs'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='la' type='int'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname='lb' type='int'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if any character in "a" was null, and 0 otherwise.
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+dst := (la &lt;= UpperBound)
+	</operation>
+	<instruction name='pcmpestri' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpestro'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='la' type='int'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname='lb' type='int'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns bit 0 of the resulting bit mask.
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
+		
+		// invalidate characters after EOS
+		IF i == la
+			aInvalid := 1
+		FI
+		IF j == lb
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+			0:  // equal any
+				IF (!aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				ELSE IF (aInvalid &amp;&amp; !bInvalid)
+					BoolRes[i][j] := 0
+				ELSE If (aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				FI
+			1:  // ranges
+				IF (!aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				ELSE IF (aInvalid &amp;&amp; !bInvalid)
+					BoolRes[i][j] := 0
+				ELSE If (aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				FI
+			2:  // equal each
+				IF (!aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				ELSE IF (aInvalid &amp;&amp; !bInvalid)
+					BoolRes[i][j] := 0
+				ELSE If (aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 1
+				FI
+			3:  // equal ordered
+				IF (!aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				ELSE IF (aInvalid &amp;&amp; !bInvalid)
+					BoolRes[i][j] := 1
+				ELSE If (aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 1
+				FI
+		ESAC
+	ENDFOR
+ENDFOR
+
+// aggregate results
+CASE (imm8[3:2]) OF
+	0:  // equal any
+		IntRes1 := 0
+		FOR i := 0 to UpperBound
+			FOR j := 0 to UpperBound
+				IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
+			ENDFOR
+		ENDFOR
+	1:  // ranges
+		IntRes1 := 0
+		FOR i := 0 to UpperBound
+			FOR j := 0 to UpperBound, j += 2
+				IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
+			ENDFOR
+		ENDFOR
+	2:  // equal each
+		IntRes1 := 0
+		FOR i := 0 to UpperBound
+			IntRes1[i] := BoolRes[i][i]
+		ENDFOR
+	3:  // equal ordered
+		IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+		FOR i := 0 to UpperBound
+			k := i
+			FOR j := 0 to UpperBound-i
+				IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
+				k++
+			ENDFOR
+		ENDFOR
+ESAC
+
+// optionally negate results
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF i &gt;= lb // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+
+// output
+dst := IntRes2[0
+	</operation>
+	<instruction name='pcmpestri' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpestra'>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='la' type='int'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname='lb' type='int'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if "b" did not contain a null character and the resulting mask was zero, and 0 otherwise.
+	[strcmp_note]
+	</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
+		
+		// invalidate characters after EOS
+		IF i == la
+			aInvalid := 1
+		FI
+		IF j == lb
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+			0:  // equal any
+				IF (!aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				ELSE IF (aInvalid &amp;&amp; !bInvalid)
+					BoolRes[i][j] := 0
+				ELSE If (aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				FI
+			1:  // ranges
+				IF (!aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				ELSE IF (aInvalid &amp;&amp; !bInvalid)
+					BoolRes[i][j] := 0
+				ELSE If (aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				FI
+			2:  // equal each
+				IF (!aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				ELSE IF (aInvalid &amp;&amp; !bInvalid)
+					BoolRes[i][j] := 0
+				ELSE If (aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 1
+				FI
+			3:  // equal ordered
+				IF (!aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 0
+				ELSE IF (aInvalid &amp;&amp; !bInvalid)
+					BoolRes[i][j] := 1
+				ELSE If (aInvalid &amp;&amp; bInvalid)
+					BoolRes[i][j] := 1
+				FI
+		ESAC
+	ENDFOR
+ENDFOR
+
+// aggregate results
+CASE (imm8[3:2]) OF
+	0:  // equal any
+		IntRes1 := 0
+		FOR i := 0 to UpperBound
+			FOR j := 0 to UpperBound
+				IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
+			ENDFOR
+		ENDFOR
+	1:  // ranges
+		IntRes1 := 0
+		FOR i := 0 to UpperBound
+			FOR j := 0 to UpperBound, j += 2
+				IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
+			ENDFOR
+		ENDFOR
+	2:  // equal each
+		IntRes1 := 0
+		FOR i := 0 to UpperBound
+			IntRes1[i] := BoolRes[i][i]
+		ENDFOR
+	3:  // equal ordered
+		IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+		FOR i := 0 to UpperBound
+			k := i
+			FOR j := 0 to UpperBound-i
+				IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
+				k++
+			ENDFOR
+		ENDFOR
+ESAC
+
+// optionally negate results
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF i &gt;= lb // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+
+// output
+dst := (IntRes2 == 0) AND (lb &gt; UpperBound)
+	</operation>
+	<instruction name='pcmpestri' form='xmm, xmm, imm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpgt_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ( a[i+63:i] &gt; b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name='pcmpgtq' form='xmm, xmm'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' rettype='unsigned int' name='_mm_crc32_u8'>
+	<type>Integer</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>Cryptography</category>
+	<parameter varname='crc' type='unsigned int'/>
+	<parameter varname='v' type='unsigned char'/>
+	<description>Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 8-bit integer "v", and stores the result in "dst".</description>
+	<operation>
+tmp1[7:0] := v[0:7] // bit reflection
+tmp2[31:0] := crc[0:31] // bit reflection
+tmp3[39:0] := tmp1[7:0] &lt;&lt; 32 
+tmp4[39:0] := tmp2[31:0] &lt;&lt; 8
+tmp5[39:0] := tmp3[39:0] XOR tmp4[39:0]
+tmp6[31:0] := tmp5[39:0] MOD2 0x11EDC6F41
+dst[31:0] := tmp6[0:31] // bit reflection
+	</operation>
+	<instruction name='crc32' form='r32, r8'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' rettype='unsigned int' name='_mm_crc32_u16'>
+	<type>Integer</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>Cryptography</category>
+	<parameter varname='crc' type='unsigned int'/>
+	<parameter varname='v' type='unsigned short'/>
+	<description>Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 16-bit integer "v", and stores the result in "dst".</description>
+	<operation>
+tmp1[15:0] := v[0:15] // bit reflection
+tmp2[31:0] := crc[0:31] // bit reflection
+tmp3[47:0] := tmp1[15:0] &lt;&lt; 32
+tmp4[47:0] := tmp2[31:0] &lt;&lt; 16
+tmp5[47:0] := tmp3[47:0] XOR tmp4[47:0]
+tmp6[31:0] := tmp5[47:0] MOD2 0x11EDC6F41
+dst[31:0] := tmp6[0:31] // bit reflection
+	</operation>
+	<instruction name='crc32' form='r32, r16'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' rettype='unsigned int' name='_mm_crc32_u32'>
+	<type>Integer</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>Cryptography</category>
+	<parameter varname='crc' type='unsigned int'/>
+	<parameter varname='v' type='unsigned int'/>
+	<description>Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 32-bit integer "v", and stores the result in "dst".</description>
+	<operation>
+tmp1[31:0] := v[0:31] // bit reflection
+tmp2[31:0] := crc[0:31] // bit reflection
+tmp3[63:0] := tmp1[31:0] &lt;&lt; 32
+tmp4[63:0] := tmp2[31:0] &lt;&lt; 32
+tmp5[63:0] := tmp3[63:0] XOR tmp4[63:0]
+tmp6[31:0] := tmp5[63:0] MOD2 0x11EDC6F41
+dst[31:0] := tmp6[0:31] // bit reflection
+	</operation>
+	<instruction name='crc32' form='r32, r32'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE4.2' rettype='unsigned __int64' name='_mm_crc32_u64'>
+	<type>Integer</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>Cryptography</category>
+	<parameter varname='crc' type='unsigned __int64'/>
+	<parameter varname='v' type='unsigned __int64'/>
+	<description>Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 64-bit integer "v", and stores the result in "dst".</description>
+	<operation>
+tmp1[63:0] := v[0:63] // bit reflection
+tmp2[31:0] := crc[0:31] // bit reflection
+tmp3[95:0] := tmp1[31:0] &lt;&lt; 32
+tmp4[95:0] := tmp2[63:0] &lt;&lt; 64
+tmp5[95:0] := tmp3[95:0] XOR tmp4[95:0]
+tmp6[31:0] := tmp5[95:0] MOD2 0x11EDC6F41
+dst[31:0] := tmp6[0:31] // bit reflection
+	</operation>
+	<instruction name='crc32' form='r64, r64'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='int' name='_mm_popcnt_u32'>
+	<type>Integer</type>
+	<CPUID>POPCNT</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='unsigned int'/>
+	<description>
+		Count the number of bits set to 1 in unsigned 32-bit integer "a", and return that count in "dst". 
+	</description>
+	<operation>
+dst := 0
+FOR i := 0 to 31
+	IF a[i]
+		dst := dst + 1
+	FI
+ENDFOR
+	</operation>
+	<instruction name='popcnt' form='r32, r32'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='__int64' name='_mm_popcnt_u64'>
+	<type>Integer</type>
+	<CPUID>POPCNT</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='unsigned __int64'/>
+	<description>
+		Count the number of bits set to 1 in unsigned 64-bit integer "a", and return that count in "dst". 
+	</description>
+	<operation>
+dst := 0
+FOR i := 0 to 63
+	IF a[i]
+		dst := dst + 1
+	FI
+ENDFOR
+	</operation>
+	<instruction name='popcnt' form='r64, r64'/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+
+<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_aesenc_si128'>
+	<type>Integer</type>
+	<CPUID>AES</CPUID>
+	<category>Cryptography</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='RoundKey' type='__m128i'/>
+	<description>Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"."</description>
+	<operation>state := a
+a[127:0] := ShiftRows(a[127:0])
+a[127:0] := SubBytes(a[127:0])
+a[127:0] := MixColumns(a[127:0])
+dst[127:0] := a[127:0] XOR RoundKey[127:0]
+	</operation>
+	<instruction name='aesenc' form='xmm, xmm'/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_aesenclast_si128'>
+	<type>Integer</type>
+	<CPUID>AES</CPUID>
+	<category>Cryptography</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='RoundKey' type='__m128i'/>
+	<description>Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"."</description>
+	<operation>state := a
+a[127:0] := ShiftRows(a[127:0])
+a[127:0] := SubBytes(a[127:0])
+dst[127:0] := a[127:0] XOR RoundKey[127:0]
+	</operation>
+	<instruction name='aesenclast' form='xmm, xmm'/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_aesdec_si128'>
+	<type>Integer</type>
+	<CPUID>AES</CPUID>
+	<category>Cryptography</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='RoundKey' type='__m128i'/>
+	<description>Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"."</description>
+	<operation>state := a
+a[127:0] := InvShiftRows(a[127:0])
+a[127:0] := InvSubBytes(a[127:0])
+a[127:0] := InvMixColumns(a[127:0])
+dst[127:0] := a[127:0] XOR RoundKey[127:0]
+	</operation>
+	<instruction name='aesdec' form='xmm, xmm'/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_aesdeclast_si128'>
+	<type>Integer</type>
+	<CPUID>AES</CPUID>
+	<category>Cryptography</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='RoundKey' type='__m128i'/>
+	<description>Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"."</description>
+	<operation>state := a
+a[127:0] := InvShiftRows(a[127:0])
+a[127:0] := InvSubBytes(a[127:0])
+dst[127:0] := a[127:0] XOR RoundKey[127:0]
+	</operation>
+	<instruction name='aesdeclast' form='xmm, xmm'/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_aesimc_si128'>
+	<type>Integer</type>
+	<CPUID>AES</CPUID>
+	<category>Cryptography</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Perform the InvMixColumns transformation on "a" and store the result in "dst".</description>
+	<operation>
+dst[127:0] := InvMixColumns(a[127:0])
+	</operation>
+	<instruction name='aesimc' form='xmm, xmm'/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_aeskeygenassist_si128'>
+	<type>Integer</type>
+	<CPUID>AES</CPUID>
+	<category>Cryptography</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Assist in expanding the AES cipher key by computing steps towards generating a round key for encryption cipher using data from "a" and an 8-bit round constant specified in "imm8", and store the result in "dst"."
+	</description>
+	<operation>
+X3[31:0] := a[127:96]
+X2[31:0] := a[95:64]
+X1[31:0] := a[63:32]
+X0[31:0] := a[31:0]
+RCON[31:0] := ZeroExtend(imm8[7:0]);
+dst[31:0] := SubWord(X1)
+dst[63:32] := (RotWord(SubWord(X1)) XOR RCON;
+dst[95:64] := SubWord(X3)
+dst[127:96] := RotWord(SubWord(X3)) XOR RCON;
+	</operation>
+	<instruction name='aeskeygenassist' form='xmm, xmm, imm'/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_clmulepi64_si128'>
+	<type>Integer</type>
+	<CPUID>PCLMULQDQ</CPUID>
+	<category>Application-Targeted</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname='imm8' type='const int'/>
+	<description>Perform a carry-less multiplication of two 64-bit integers, selected from "a" and "b" according to "imm8", and store the results in "dst".
+	</description>
+	<operation>
+IF (imm8[0] = 0)
+	TEMP1 := a[63:0];
+ELSE
+	TEMP1 := a[127:64];
+FI 
+IF (imm8[4] = 0)
+	TEMP2 := b[63:0];
+ELSE 
+	TEMP2 := b[127:64];
+FI
+
+FOR i := 0 to 63
+	TEMP[i] := (TEMP1[0] and TEMP2[i]);
+	FOR j := 1 to i
+		TEMP [i] := TEMP [i] XOR (TEMP1[j] AND TEMP2[i-j])
+	ENDFOR 
+	dst[i] := TEMP[i];
+ENDFOR
+FOR i := 64 to 127
+	TEMP [i] := 0;
+	FOR j := (i - 63) to 63
+		TEMP [i] := TEMP [i] XOR (TEMP1[j] AND TEMP2[i-j])
+	ENDFOR
+	dst[i] := TEMP[i];
+ENDFOR
+dst[127] := 0
+	</operation>
+	<instruction name='pclmulqdq' form='xmm, xmm, imm'/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_add_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vaddpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_add_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vaddps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_addsub_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF (j is even) 
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vaddsubpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_addsub_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF (j is even) 
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vaddsubps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_and_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+		<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vandpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_and_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vandps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_andnot_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vandnpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_andnot_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vandnps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_blend_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF imm8[j%8]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vblendpd' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_blend_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF imm8[j%8]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vblendps' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_blendv_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<parameter varname='mask' type='__m256d'/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vblendvpd' form='ymm, ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_blendv_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<parameter varname='mask' type='__m256'/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vblendvps' form='ymm, ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_div_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	dst[i+63:i] := a[i+63:i] / b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vdivpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_div_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := a[i+31:i] / b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vdivps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_dp_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8".</description>
+	<operation>
+DP(a[127:0], b[127:0], imm8[7:0]) {
+	FOR j := 0 to 3
+		i := j*32
+		IF imm8[(4+j)%8]
+			temp[i+31:i] := a[i+31:i] * b[i+31:i]
+		ELSE
+			temp[i+31:i] := 0
+		FI
+	ENDFOR
+	
+	sum[31:0] := (temp[127:96] + temp[95:64]) + (temp[63:32] + temp[31:0])
+	
+	FOR j := 0 to 3
+		i := j*32
+		IF imm8[j%8]
+			tmpdst[i+31:i] := sum[31:0]
+		ELSE
+			tmpdst[i+31:i] := 0
+		FI
+	ENDFOR
+	RETURN tmpdst[127:0]
+}
+
+dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0])
+dst[255:128] := DP(a[255:128], b[255:128], imm8[7:0])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vdpps' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_hadd_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[63:0] := a[127:64] + a[63:0]
+dst[127:64] := b[127:64] + b[63:0]
+dst[191:128] := a[255:192] + a[191:128]
+dst[255:192] := b[255:192] + b[191:128]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vhaddpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_hadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32] + a[31:0]
+dst[63:32] := a[127:96] + a[95:64]
+dst[95:64] := b[63:32] + b[31:0]
+dst[127:96] := b[127:96] + b[95:64]
+dst[159:128] := a[191:160] + a[159:128]
+dst[191:160] := a[255:224] + a[223:192]
+dst[223:192] := b[191:160] + b[159:128]
+dst[255:224] := b[255:224] + b[223:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vhaddps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_hsub_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] - a[127:64]
+dst[127:64] := b[63:0] - b[127:64]
+dst[191:128] := a[191:128] - a[255:192]
+dst[255:192] := b[191:128] - b[255:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vhsubpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_hsub_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] - a[63:32]
+dst[63:32] := a[95:64] - a[127:96]
+dst[95:64] := b[31:0] - b[63:32]
+dst[127:96] := b[95:64] - b[127:96]
+dst[159:128] := a[159:128] - a[191:160]
+dst[191:160] := a[223:192] - a[255:224]
+dst[223:192] := b[159:128] - b[191:160]
+dst[255:224] := b[223:192] - b[255:224]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vhsubps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_max_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmaxpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_max_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmaxps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_min_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vminpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_min_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vminps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_mul_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] * b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmulpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_mul_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmulps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_or_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vorpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_or_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vorps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_shuffle_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst". </description>
+	<operation>
+dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
+dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vshufpd' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_shuffle_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+dst[223:192] := SELECT4(b[255:128], imm8[5:4])
+dst[255:224] := SELECT4(b[255:128], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vshufps' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_sub_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vsubpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_sub_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vsubps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_xor_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vxorpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_xor_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vxorps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128d' name='_mm_cmp_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ( a[i+63:i] OP b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcmppd' form='xmm, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_cmp_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ( a[i+63:i] OP b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcmppd' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128' name='_mm_cmp_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] OP b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcmpps' form='xmm, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_cmp_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] OP b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcmpps' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128d' name='_mm_cmp_sd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+
+dst[63:0] := ( a[63:0] OP b[63:0] ) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcmpsd' form='xmm, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128' name='_mm_cmp_ss'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". </description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+
+dst[31:0] := ( a[31:0] OP b[31:0] ) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcmpss' form='xmm, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_cvtepi32_pd'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtdq2pd' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_cvtepi32_ps'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m256i'/>
+	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtdq2ps' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128' name='_mm256_cvtpd_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtpd2ps' form='xmm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_cvtps_epi32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtps2dq' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_cvtps_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtps2pd' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128i' name='_mm256_cvttpd_epi32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvttpd2dq' form='xmm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128i' name='_mm256_cvtpd_epi32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtpd2dq' form='xmm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_cvttps_epi32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvttps2dq' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128' name='_mm256_extractf128_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vextractf128' form='xmm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128d' name='_mm256_extractf128_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vextractf128' form='xmm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128i' name='_mm256_extractf128_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Extract 128 bits (composed of integer data) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vextractf128' form='xmm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' sequence='true' rettype='__int8' name='_mm256_extract_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="index" type='const int'/>
+	<description>Extract an 8-bit integer from "a", selected with "index", and store the result in "dst".</description>
+	<operation>
+dst[7:0] := (a[255:0] &gt;&gt; (index * 8))[7:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' sequence='true' rettype='__int16' name='_mm256_extract_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="index" type='const int'/>
+	<description>Extract a 16-bit integer from "a", selected with "index", and store the result in "dst".</description>
+	<operation>
+dst[15:0] := (a[255:0] &gt;&gt; (index * 16))[15:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__int32' name='_mm256_extract_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="index" type='const int'/>
+	<description>Extract a 32-bit integer from "a", selected with "index", and store the result in "dst".</description>
+	<operation>
+dst[31:0] := (a[255:0] &gt;&gt; (index * 32))[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__int64' name='_mm256_extract_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="index" type='const int'/>
+	<description>Extract a 64-bit integer from "a", selected with "index", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[255:0] &gt;&gt; (index * 64))[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm256_zeroall'>
+	<CPUID>AVX</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Zero the contents of all XMM or YMM registers.</description>
+	<operation>
+YMM0[MAX:0] := 0
+YMM1[MAX:0] := 0
+YMM2[MAX:0] := 0
+YMM3[MAX:0] := 0
+YMM4[MAX:0] := 0
+YMM5[MAX:0] := 0
+YMM6[MAX:0] := 0
+YMM7[MAX:0] := 0
+IF 64-bit mode
+	YMM8[MAX:0] := 0
+	YMM9[MAX:0] := 0
+	YMM10[MAX:0] := 0
+	YMM11[MAX:0] := 0
+	YMM12[MAX:0] := 0
+	YMM13[MAX:0] := 0
+	YMM14[MAX:0] := 0
+	YMM15[MAX:0] := 0
+FI
+</operation>
+	<instruction name='vzeroall' form=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm256_zeroupper'>
+	<CPUID>AVX</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Zero the upper 128 bits of all YMM registers; the lower 128-bits of the registers are unmodified.</description>
+	<operation>
+YMM0[MAX:128] := 0
+YMM1[MAX:128] := 0
+YMM2[MAX:128] := 0
+YMM3[MAX:128] := 0
+YMM4[MAX:128] := 0
+YMM5[MAX:128] := 0
+YMM6[MAX:128] := 0
+YMM7[MAX:128] := 0
+IF 64-bit mode
+	YMM8[MAX:128] := 0
+	YMM9[MAX:128] := 0
+	YMM10[MAX:128] := 0
+	YMM11[MAX:128] := 0
+	YMM12[MAX:128] := 0
+	YMM13[MAX:128] := 0
+	YMM14[MAX:128] := 0
+	YMM15[MAX:128] := 0
+FI
+</operation>
+	<instruction name='vzeroupper' form=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_permutevar_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := SELECT4(a[127:0], b[1:0])
+dst[63:32] := SELECT4(a[127:0], b[33:32])
+dst[95:64] := SELECT4(a[127:0], b[65:64])
+dst[127:96] := SELECT4(a[127:0], b[97:96])
+dst[159:128] := SELECT4(a[255:128], b[129:128])
+dst[191:160] := SELECT4(a[255:128], b[161:160])
+dst[223:192] := SELECT4(a[255:128], b[193:192])
+dst[255:224] := SELECT4(a[255:128], b[225:224])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpermilps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128' name='_mm_permutevar_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := SELECT4(a[127:0], b[1:0])
+dst[63:32] := SELECT4(a[127:0], b[33:32])
+dst[95:64] := SELECT4(a[127:0], b[65:64])
+dst[127:96] := SELECT4(a[127:0], b[97:96])
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpermilps' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_permute_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpermilps' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128' name='_mm_permute_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpermilps' form='xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_permutevar_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst".</description>
+	<operation>
+IF (b[1] == 0) dst[63:0] := a[63:0]
+IF (b[1] == 1) dst[63:0] := a[127:64]
+IF (b[65] == 0) dst[127:64] := a[63:0]
+IF (b[65] == 1) dst[127:64] := a[127:64]
+IF (b[129] == 0) dst[191:128] := a[191:128]
+IF (b[129] == 1) dst[191:128] := a[255:192]
+IF (b[193] == 0) dst[255:192] := a[191:128]
+IF (b[193] == 1) dst[255:192] := a[255:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpermilpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128d' name='_mm_permutevar_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst".</description>
+	<operation>
+IF (b[1] == 0) dst[63:0] := a[63:0]
+IF (b[1] == 1) dst[63:0] := a[127:64]
+IF (b[65] == 0) dst[127:64] := a[63:0]
+IF (b[65] == 1) dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpermilpd' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_permute_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+IF (imm8[0] == 0) dst[63:0] := a[63:0]
+IF (imm8[0] == 1) dst[63:0] := a[127:64]
+IF (imm8[1] == 0) dst[127:64] := a[63:0]
+IF (imm8[1] == 1) dst[127:64] := a[127:64]
+IF (imm8[2] == 0) dst[191:128] := a[191:128]
+IF (imm8[2] == 1) dst[191:128] := a[255:192]
+IF (imm8[3] == 0) dst[255:192] := a[191:128]
+IF (imm8[3] == 1) dst[255:192] := a[255:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpermilpd' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128d' name='_mm_permute_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+IF (imm8[0] == 0) dst[63:0] := a[63:0]
+IF (imm8[0] == 1) dst[63:0] := a[127:64]
+IF (imm8[1] == 0) dst[127:64] := a[63:0]
+IF (imm8[1] == 1) dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpermilpd' form='xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_permute2f128_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shuffle 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". </description>
+	<operation>
+SELECT4(src1, src2, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src1[127:0]
+	1:	tmp[127:0] := src1[255:128]
+	2:	tmp[127:0] := src2[127:0]
+	3:	tmp[127:0] := src2[255:128]
+	ESAC
+	IF control[3]
+		tmp[127:0] := 0
+	FI
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
+dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vperm2f128' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_permute2f128_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shuffle 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". </description>
+	<operation>
+SELECT4(src1, src2, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src1[127:0]
+	1:	tmp[127:0] := src1[255:128]
+	2:	tmp[127:0] := src2[127:0]
+	3:	tmp[127:0] := src2[255:128]
+	ESAC
+	IF control[3]
+		tmp[127:0] := 0
+	FI
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
+dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vperm2f128' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_permute2f128_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shuffle 128-bits (composed of integer data) selected by "imm8" from "a" and "b", and store the results in "dst". </description>
+	<operation>
+SELECT4(src1, src2, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src1[127:0]
+	1:	tmp[127:0] := src1[255:128]
+	2:	tmp[127:0] := src2[127:0]
+	3:	tmp[127:0] := src2[255:128]
+	ESAC
+	IF control[3]
+		tmp[127:0] := 0
+	FI
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
+dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vperm2f128' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_broadcast_ss'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='float const *'/>
+	<description>Broadcast a single-precision (32-bit) floating-point element from memory to all elements of "dst".</description>
+	<operation>
+tmp[31:0] = MEM[mem_addr+31:mem_addr]
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := tmp[31:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vbroadcastss' form='ymm, m32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128' name='_mm_broadcast_ss'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<parameter varname='mem_addr' type='float const *'/>
+	<description>Broadcast a single-precision (32-bit) floating-point element from memory to all elements of "dst".</description>
+	<operation>
+tmp[31:0] = MEM[mem_addr+31:mem_addr]
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := tmp[31:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vbroadcastss' form='xmm, m32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_broadcast_sd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<parameter varname='mem_addr' type='double const *'/>
+	<description>Broadcast a double-precision (64-bit) floating-point element from memory to all elements of "dst".</description>
+	<operation>
+tmp[63:0] = MEM[mem_addr+63:mem_addr]
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := tmp[63:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vbroadcastsd' form='ymm, m64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_broadcast_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<parameter varname='mem_addr' type='__m128 const *'/>
+	<description>Broadcast 128 bits from memory (composed of 4 packed single-precision (32-bit) floating-point elements) to all elements of "dst".</description>
+	<operation>
+tmp[127:0] = MEM[mem_addr+127:mem_addr]
+dst[127:0] := tmp[127:0]
+dst[255:128] := tmp[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vbroadcastf128' form='ymm, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_broadcast_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<parameter varname='mem_addr' type='__m128d const *'/>
+	<description>Broadcast 128 bits from memory (composed of 2 packed double-precision (64-bit) floating-point elements) to all elements of "dst".</description>
+	<operation>
+tmp[127:0] = MEM[mem_addr+127:mem_addr]
+dst[127:0] := tmp[127:0]
+dst[255:128] := tmp[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vbroadcastf128' form='ymm, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_insertf128_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE (imm8[1:0]) of
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_insertf128_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE imm8[7:0] of
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_insertf128_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Copy "a" to "dst", then insert 128 bits from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE (imm8[1:0]) of
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_insert_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='i' type='__int8'/>
+	<parameter varname="index" type='const int'/>
+	<description>Copy "a" to "dst", and insert the 8-bit integer "i" into "dst" at the location specified by "index". </description>
+	<operation>
+dst[255:0] := a[255:0]
+sel := index*8
+dst[sel+7:sel] := i[7:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_insert_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='i' type='__int16'/>
+	<parameter varname="index" type='const int'/>
+	<description>Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "index". </description>
+	<operation>
+dst[255:0] := a[255:0]
+sel := index*16
+dst[sel+15:sel] := i[15:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_insert_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='i' type='__int32'/>
+	<parameter varname="index" type='const int'/>
+	<description>Copy "a" to "dst", and insert the 32-bit integer "i" into "dst" at the location specified by "index". </description>
+	<operation>
+dst[255:0] := a[255:0]
+sel := index*32
+dst[sel+31:sel] := i[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_insert_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='i' type='__int64'/>
+	<parameter varname="index" type='const int'/>
+	<description>Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "index". </description>
+	<operation>
+dst[255:0] := a[255:0]
+sel := index*64
+dst[sel+63:sel] := i[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_load_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='double const *'/>
+	<description>Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into "dst".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmovapd' form='ymm, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm256_store_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double *'/>
+	<parameter varname='a' type='__m256d'/>
+	<description>Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name='vmovapd' form='m256, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_load_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='float const *'/>
+	<description>Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into "dst".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmovaps' form='ymm, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm256_store_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='float *'/>
+	<parameter varname='a' type='__m256'/>
+	<description>Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name='vmovaps' form='m256, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_loadu_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='double const *'/>
+	<description>Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmovupd' form='ymm, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm256_storeu_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double *'/>
+	<parameter varname='a' type='__m256d'/>
+	<description>Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name='vmovupd' form='m256, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_loadu_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='float const *'/>
+	<description>Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmovups' form='ymm, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm256_storeu_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='float *'/>
+	<parameter varname='a' type='__m256'/>
+	<description>Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name='vmovups' form='m256, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_load_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='__m256i const *'/>
+	<description>Load 256-bits of integer data from memory into "dst".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmovdqa' form='ymm, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm256_store_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='__m256i *'/>
+	<parameter varname='a' type='__m256i'/>
+	<description>Store 256-bits of integer data from "a" into memory.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name='vmovdqa' form='m256, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_loadu_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='__m256i const *'/>
+	<description>Load 256-bits of integer data from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmovdqu' form='ymm, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm256_storeu_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='__m256i *'/>
+	<parameter varname='a' type='__m256i'/>
+	<description>Store 256-bits of integer data from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name='vmovdqu' form='m256, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_maskload_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='double const *'/>
+	<parameter varname='mask' type='__m256i'/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmaskmovpd' form='ymm, ymm, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm256_maskstore_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double *'/>
+	<parameter varname='mask' type='__m256i'/>
+	<parameter varname='a' type='__m256d'/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using "mask".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF mask[i+63]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vmaskmovpd' form='m256, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128d' name='_mm_maskload_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='double const *'/>
+	<parameter varname='mask' type='__m128i'/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmaskmovpd' form='xmm, xmm, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm_maskstore_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double *'/>
+	<parameter varname='mask' type='__m128i'/>
+	<parameter varname='a' type='__m128d'/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using "mask".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF mask[i+63]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vmaskmovpd' form='m128, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_maskload_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='float const *'/>
+	<parameter varname='mask' type='__m256i'/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmaskmovps' form='ymm, ymm, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm256_maskstore_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='float *'/>
+	<parameter varname='mask' type='__m256i'/>
+	<parameter varname='a' type='__m256'/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using "mask".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF mask[i+31]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vmaskmovps' form='m256, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128' name='_mm_maskload_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='float const *'/>
+	<parameter varname='mask' type='__m128i'/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmaskmovps' form='xmm, xmm, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm_maskstore_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='float *'/>
+	<parameter varname='mask' type='__m128i'/>
+	<parameter varname='a' type='__m128'/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using "mask".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF mask[i+31]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vmaskmovps' form='m128, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_movehdup_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Move</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
+	</description>
+	<operation>
+dst[31:0] := a[63:32] 
+dst[63:32] := a[63:32] 
+dst[95:64] := a[127:96] 
+dst[127:96] := a[127:96]
+dst[159:128] := a[191:160] 
+dst[191:160] := a[191:160] 
+dst[223:192] := a[255:224] 
+dst[255:224] := a[255:224]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmovshdup' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_moveldup_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Move</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
+	</description>
+	<operation>
+dst[31:0] := a[31:0] 
+dst[63:32] := a[31:0] 
+dst[95:64] := a[95:64] 
+dst[127:96] := a[95:64]
+dst[159:128] := a[159:128] 
+dst[191:160] := a[159:128] 
+dst[223:192] := a[223:192] 
+dst[255:224] := a[223:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmovsldup' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_movedup_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Move</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst".
+	</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := a[63:0]
+dst[191:128] := a[191:128]
+dst[255:192] := a[191:128]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmovddup' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_lddqu_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='__m256i const *'/>
+	<description>Load 256-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm256_loadu_si256" when the data crosses a cache line boundary.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vlddqu' form='ymm, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm256_stream_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='__m256i *'/>
+	<parameter varname='a' type='__m256i'/>
+	<description>Store 256-bits of integer data from "a" into memory using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name='vmovntdq' form='m256, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm256_stream_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='double *'/>
+	<parameter varname='a' type='__m256d'/>
+	<description>Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name='vmovntpd' form='m256, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='void' name='_mm256_stream_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='float *'/>
+	<parameter varname='a' type='__m256'/>
+	<description>Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name='vmovntps' form='m256, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_rcp_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vrcpps' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_rsqrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vrsqrtps' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_sqrt_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vsqrtpd' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_sqrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vsqrtps' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_round_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='rounding' type='int'/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed double-precision floating-point elements in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ROUND(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vroundpd' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_round_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='rounding' type='int'/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ROUND(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vroundps' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_unpackhi_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vunpckhpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_unpackhi_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vunpckhps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_unpacklo_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vunpcklpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_unpacklo_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vunpcklps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm256_testz_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
+	<operation>
+IF (a[255:0] AND b[255:0] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF ((NOT a[255:0]) AND b[255:0] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN ZF
+	</operation>
+	<instruction name='vptest' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm256_testc_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
+	<operation>
+IF (a[255:0] AND b[255:0] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF ((NOT a[255:0]) AND b[255:0] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN CF
+	</operation>
+	<instruction name='vptest' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm256_testnzc_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+IF (a[255:0] AND b[255:0] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF ((NOT a[255:0]) AND b[255:0] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	RETURN 1
+ELSE
+	RETURN 0
+FI
+	</operation>
+	<instruction name='vptest' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm256_testz_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
+	<operation>
+tmp[255:0] := a[255:0] AND b[255:0]
+IF (tmp[63] == tmp[127] == tmp[191] == tmp[255] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[255:0] := (NOT a[255:0]) AND b[255:0]
+IF (tmp[63] == tmp[127] == tmp[191] == tmp[255] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN ZF
+	</operation>
+	<instruction name='vtestpd' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm256_testc_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
+	<operation>
+tmp[255:0] := a[255:0] AND b[255:0]
+IF (tmp[63] == tmp[127] == tmp[191] == tmp[255] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[255:0] := (NOT a[255:0]) AND b[255:0]
+IF (tmp[63] == tmp[127] == tmp[191] == tmp[255] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN CF
+	</operation>
+	<instruction name='vtestpd' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm256_testnzc_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+tmp[255:0] := a[255:0] AND b[255:0]
+IF (tmp[63] == tmp[127] == tmp[191] == tmp[255] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[255:0] := (NOT a[255:0]) AND b[255:0]
+IF (tmp[63] == tmp[127] == tmp[191] == tmp[255] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	RETURN 1
+ELSE
+	RETURN 0
+FI
+	</operation>
+	<instruction name='vtestpd' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm_testz_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
+	<operation>
+tmp[127:0] := a[127:0] AND b[127:0]
+IF (tmp[63] == tmp[127] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[127:0] := (NOT a[127:0]) AND b[127:0]
+IF (tmp[63] == tmp[127] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN ZF
+	</operation>
+	<instruction name='vtestpd' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm_testc_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
+	<operation>
+tmp[127:0] := a[127:0] AND b[127:0]
+IF (tmp[63] == tmp[127] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[127:0] := (NOT a[127:0]) AND b[127:0]
+IF (tmp[63] == tmp[127] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN CF
+	</operation>
+	<instruction name='vtestpd' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm_testnzc_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+tmp[127:0] := a[127:0] AND b[127:0]
+IF (tmp[63] == tmp[127] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[127:0] := (NOT a[127:0]) AND b[127:0]
+IF (tmp[63] == tmp[127] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	RETURN 1
+ELSE
+	RETURN 0
+FI
+	</operation>
+	<instruction name='vtestpd' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm256_testz_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
+	<operation>
+tmp[255:0] := a[255:0] AND b[255:0]
+IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == tmp[159] == tmp[191] == tmp[223] == tmp[255] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[255:0] := (NOT a[255:0]) AND b[255:0]
+IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == tmp[159] == tmp[191] == tmp[223] == tmp[255] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN ZF
+	</operation>
+	<instruction name='vtestps' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm256_testc_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
+	<operation>
+tmp[255:0] := a[255:0] AND b[255:0]
+IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == tmp[159] == tmp[191] == tmp[223] == tmp[255] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[255:0] := (NOT a[255:0]) AND b[255:0]
+IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == tmp[159] == tmp[191] == tmp[223] == tmp[255] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN CF
+	</operation>
+	<instruction name='vtestps' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm256_testnzc_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+tmp[255:0] := a[255:0] AND b[255:0]
+IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == tmp[159] == tmp[191] == tmp[223] == tmp[255]  == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[255:0] := (NOT a[255:0]) AND b[255:0]
+IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == tmp[159] == tmp[191] == tmp[223] == tmp[255]  == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	RETURN 1
+ELSE
+	RETURN 0
+FI
+	</operation>
+	<instruction name='vtestps' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm_testz_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<description>Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
+	<operation>
+tmp[127:0] := a[127:0] AND b[127:0]
+IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[127:0] := (NOT a[127:0]) AND b[127:0]
+IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN ZF
+	</operation>
+	<instruction name='vtestps' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm_testc_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<description>Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
+	<operation>
+tmp[127:0] := a[127:0] AND b[127:0]
+IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[127:0] := (NOT a[127:0]) AND b[127:0]
+IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN CF
+	</operation>
+	<instruction name='vtestps' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm_testnzc_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<description>Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+tmp[127:0] := a[127:0] AND b[127:0]
+IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[127:0] := (NOT a[127:0]) AND b[127:0]
+IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	RETURN 1
+ELSE
+	RETURN 0
+FI
+	</operation>
+	<instruction name='vtestps' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm256_movemask_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Set each bit of mask "dst" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in "a".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF a[i+63]
+		dst[j] := 1
+	ELSE
+		dst[j] := 0
+	FI
+ENDFOR
+dst[MAX:4] := 0
+	</operation>
+	<instruction name='vmovmskpd' form='r32, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='int' name='_mm256_movemask_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Set each bit of mask "dst" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in "a".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF a[i+31]
+		dst[j] := 1
+	ELSE
+		dst[j] := 0
+	FI
+ENDFOR
+dst[MAX:8] := 0
+	</operation>
+	<instruction name='vmovmskps' form='r32, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_setzero_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='' type='void'/>
+	<description>Return vector of type __m256d with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name='vxorpd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_setzero_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='' type='void'/>
+	<description>Return vector of type __m256 with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name='vxorps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_setzero_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='' type='void'/>
+	<description>Return vector of type __m256i with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name='vpxor' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256d' name='_mm256_set_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='e3' type='double'/>
+	<parameter varname='e2' type='double'/>
+	<parameter varname='e1' type='double'/>
+	<parameter varname='e0' type='double'/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+dst[191:128] := e2
+dst[255:192] := e3
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256' name='_mm256_set_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='e7' type='float'/>
+	<parameter varname='e6' type='float'/>
+	<parameter varname='e5' type='float'/>
+	<parameter varname='e4' type='float'/>
+	<parameter varname='e3' type='float'/>
+	<parameter varname='e2' type='float'/>
+	<parameter varname='e1' type='float'/>
+	<parameter varname='e0' type='float'/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+dst[95:64] := e2
+dst[127:96] := e3
+dst[159:128] := e4
+dst[191:160] := e5
+dst[223:192] := e6
+dst[255:224] := e7
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='e31' type='char'/>
+	<parameter varname='e30' type='char'/>
+	<parameter varname='e29' type='char'/>
+	<parameter varname='e28' type='char'/>
+	<parameter varname='e27' type='char'/>
+	<parameter varname='e26' type='char'/>
+	<parameter varname='e25' type='char'/>
+	<parameter varname='e24' type='char'/>
+	<parameter varname='e23' type='char'/>
+	<parameter varname='e22' type='char'/>
+	<parameter varname='e21' type='char'/>
+	<parameter varname='e20' type='char'/>
+	<parameter varname='e19' type='char'/>
+	<parameter varname='e18' type='char'/>
+	<parameter varname='e17' type='char'/>
+	<parameter varname='e16' type='char'/>
+	<parameter varname='e15' type='char'/>
+	<parameter varname='e14' type='char'/>
+	<parameter varname='e13' type='char'/>
+	<parameter varname='e12' type='char'/>
+	<parameter varname='e11' type='char'/>
+	<parameter varname='e10' type='char'/>
+	<parameter varname='e9' type='char'/>
+	<parameter varname='e8' type='char'/>
+	<parameter varname='e7' type='char'/>
+	<parameter varname='e6' type='char'/>
+	<parameter varname='e5' type='char'/>
+	<parameter varname='e4' type='char'/>
+	<parameter varname='e3' type='char'/>
+	<parameter varname='e2' type='char'/>
+	<parameter varname='e1' type='char'/>
+	<parameter varname='e0' type='char'/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[7:0] := e0
+dst[15:8] := e1
+dst[23:16] := e2
+dst[31:24] := e3
+dst[39:32] := e4
+dst[47:40] := e5
+dst[55:48] := e6
+dst[63:56] := e7
+dst[71:64] := e8
+dst[79:72] := e9
+dst[87:80] := e10
+dst[95:88] := e11
+dst[103:96] := e12
+dst[111:104] := e13
+dst[119:112] := e14
+dst[127:120] := e15
+dst[135:128] := e16
+dst[143:136] := e17
+dst[151:144] := e18
+dst[159:152] := e19
+dst[167:160] := e20
+dst[175:168] := e21
+dst[183:176] := e22
+dst[191:184] := e23
+dst[199:192] := e24
+dst[207:200] := e25
+dst[215:208] := e26
+dst[223:216] := e27
+dst[231:224] := e28
+dst[239:232] := e29
+dst[247:240] := e30
+dst[255:248] := e31
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='e15' type='short'/>
+	<parameter varname='e14' type='short'/>
+	<parameter varname='e13' type='short'/>
+	<parameter varname='e12' type='short'/>
+	<parameter varname='e11' type='short'/>
+	<parameter varname='e10' type='short'/>
+	<parameter varname='e9' type='short'/>
+	<parameter varname='e8' type='short'/>
+	<parameter varname='e7' type='short'/>
+	<parameter varname='e6' type='short'/>
+	<parameter varname='e5' type='short'/>
+	<parameter varname='e4' type='short'/>
+	<parameter varname='e3' type='short'/>
+	<parameter varname='e2' type='short'/>
+	<parameter varname='e1' type='short'/>
+	<parameter varname='e0' type='short'/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[15:0] := e0
+dst[31:16] := e1
+dst[47:32] := e2
+dst[63:48] := e3
+dst[79:64] := e4
+dst[95:80] := e5
+dst[111:96] := e6
+dst[127:112] := e7
+dst[145:128] := e8
+dst[159:144] := e9
+dst[175:160] := e10
+dst[191:176] := e11
+dst[207:192] := e12
+dst[223:208] := e13
+dst[239:224] := e14
+dst[255:240] := e15
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='e7' type='int'/>
+	<parameter varname='e6' type='int'/>
+	<parameter varname='e5' type='int'/>
+	<parameter varname='e4' type='int'/>
+	<parameter varname='e3' type='int'/>
+	<parameter varname='e2' type='int'/>
+	<parameter varname='e1' type='int'/>
+	<parameter varname='e0' type='int'/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+dst[95:64] := e2
+dst[127:96] := e3
+dst[159:128] := e4
+dst[191:160] := e5
+dst[223:192] := e6
+dst[255:224] := e7
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set_epi64x'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='e3' type='__int64'/>
+	<parameter varname='e2' type='__int64'/>
+	<parameter varname='e1' type='__int64'/>
+	<parameter varname='e0' type='__int64'/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+dst[191:128] := e2
+dst[255:192] := e3
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256d' name='_mm256_setr_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='e3' type='double'/>
+	<parameter varname='e2' type='double'/>
+	<parameter varname='e1' type='double'/>
+	<parameter varname='e0' type='double'/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[63:0] := e3
+dst[127:64] := e2
+dst[191:128] := e1
+dst[255:192] := e0
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256' name='_mm256_setr_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='e7' type='float'/>
+	<parameter varname='e6' type='float'/>
+	<parameter varname='e5' type='float'/>
+	<parameter varname='e4' type='float'/>
+	<parameter varname='e3' type='float'/>
+	<parameter varname='e2' type='float'/>
+	<parameter varname='e1' type='float'/>
+	<parameter varname='e0' type='float'/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e7
+dst[63:32] := e6
+dst[95:64] := e5
+dst[127:96] := e4
+dst[159:128] := e3
+dst[191:160] := e2
+dst[223:192] := e1
+dst[255:224] := e0
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_setr_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='e31' type='char'/>
+	<parameter varname='e30' type='char'/>
+	<parameter varname='e29' type='char'/>
+	<parameter varname='e28' type='char'/>
+	<parameter varname='e27' type='char'/>
+	<parameter varname='e26' type='char'/>
+	<parameter varname='e25' type='char'/>
+	<parameter varname='e24' type='char'/>
+	<parameter varname='e23' type='char'/>
+	<parameter varname='e22' type='char'/>
+	<parameter varname='e21' type='char'/>
+	<parameter varname='e20' type='char'/>
+	<parameter varname='e19' type='char'/>
+	<parameter varname='e18' type='char'/>
+	<parameter varname='e17' type='char'/>
+	<parameter varname='e16' type='char'/>
+	<parameter varname='e15' type='char'/>
+	<parameter varname='e14' type='char'/>
+	<parameter varname='e13' type='char'/>
+	<parameter varname='e12' type='char'/>
+	<parameter varname='e11' type='char'/>
+	<parameter varname='e10' type='char'/>
+	<parameter varname='e9' type='char'/>
+	<parameter varname='e8' type='char'/>
+	<parameter varname='e7' type='char'/>
+	<parameter varname='e6' type='char'/>
+	<parameter varname='e5' type='char'/>
+	<parameter varname='e4' type='char'/>
+	<parameter varname='e3' type='char'/>
+	<parameter varname='e2' type='char'/>
+	<parameter varname='e1' type='char'/>
+	<parameter varname='e0' type='char'/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[7:0] := e31
+dst[15:8] := e30
+dst[23:16] := e29
+dst[31:24] := e28
+dst[39:32] := e27
+dst[47:40] := e26
+dst[55:48] := e25
+dst[63:56] := e24
+dst[71:64] := e23
+dst[79:72] := e22
+dst[87:80] := e21
+dst[95:88] := e20
+dst[103:96] := e19
+dst[111:104] := e18
+dst[119:112] := e17
+dst[127:120] := e16
+dst[135:128] := e15
+dst[143:136] := e14
+dst[151:144] := e13
+dst[159:152] := e12
+dst[167:160] := e11
+dst[175:168] := e10
+dst[183:176] := e9
+dst[191:184] := e8
+dst[199:192] := e7
+dst[207:200] := e6
+dst[215:208] := e5
+dst[223:216] := e4
+dst[231:224] := e3
+dst[239:232] := e2
+dst[247:240] := e1
+dst[255:248] := e0
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_setr_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='e15' type='short'/>
+	<parameter varname='e14' type='short'/>
+	<parameter varname='e13' type='short'/>
+	<parameter varname='e12' type='short'/>
+	<parameter varname='e11' type='short'/>
+	<parameter varname='e10' type='short'/>
+	<parameter varname='e9' type='short'/>
+	<parameter varname='e8' type='short'/>
+	<parameter varname='e7' type='short'/>
+	<parameter varname='e6' type='short'/>
+	<parameter varname='e5' type='short'/>
+	<parameter varname='e4' type='short'/>
+	<parameter varname='e3' type='short'/>
+	<parameter varname='e2' type='short'/>
+	<parameter varname='e1' type='short'/>
+	<parameter varname='e0' type='short'/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[15:0] := e15
+dst[31:16] := e14
+dst[47:32] := e13
+dst[63:48] := e12
+dst[79:64] := e11
+dst[95:80] := e10
+dst[111:96] := e9
+dst[127:112] := e8
+dst[145:128] := e7
+dst[159:144] := e6
+dst[175:160] := e5
+dst[191:176] := e4
+dst[207:192] := e3
+dst[223:208] := e2
+dst[239:224] := e1
+dst[255:240] := e0
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_setr_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='e7' type='int'/>
+	<parameter varname='e6' type='int'/>
+	<parameter varname='e5' type='int'/>
+	<parameter varname='e4' type='int'/>
+	<parameter varname='e3' type='int'/>
+	<parameter varname='e2' type='int'/>
+	<parameter varname='e1' type='int'/>
+	<parameter varname='e0' type='int'/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e7
+dst[63:32] := e6
+dst[95:64] := e5
+dst[127:96] := e4
+dst[159:128] := e3
+dst[191:160] := e2
+dst[223:192] := e1
+dst[255:224] := e0
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_setr_epi64x'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='e3' type='__int64'/>
+	<parameter varname='e2' type='__int64'/>
+	<parameter varname='e1' type='__int64'/>
+	<parameter varname='e0' type='__int64'/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[63:0] := e3
+dst[127:64] := e2
+dst[191:128] := e1
+dst[255:192] := e0
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256d' name='_mm256_set1_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='double'/>
+	<description>Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256' name='_mm256_set1_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='float'/>
+	<description>Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set1_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='char'/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastb".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set1_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='short'/>
+	<description>Broadcast 16-bit integer "a" to all all elements of "dst". This intrinsic may generate the "vpbroadcastw".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set1_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='int'/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastd".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set1_epi64x'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='a' type='long long'/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastq".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_castpd_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Cast vector of type __m256d to type __m256.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_castps_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Cast vector of type __m256 to type __m256d.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_castps_si256'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Casts vector of type __m256 to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_castpd_si256'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Casts vector of type __m256d to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_castsi256_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m256i'/>
+	<description>Casts vector of type __m256i to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_castsi256_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m256i'/>
+	<description>Casts vector of type __m256i to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128' name='_mm256_castps256_ps128'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Casts vector of type __m256 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128d' name='_mm256_castpd256_pd128'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Casts vector of type __m256d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m128i' name='_mm256_castsi256_si128'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m256i'/>
+	<description>Casts vector of type __m256i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_castps128_ps256'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Casts vector of type __m128 to type __m256; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_castpd128_pd256'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Casts vector of type __m128d to type __m256d; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_castsi128_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Casts vector of type __m128i to type __m256i; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_zextps128_ps256'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Casts vector of type __m128 to type __m256; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_zextpd128_pd256'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Casts vector of type __m128d to type __m256d; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_zextsi128_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Casts vector of type __m128i to type __m256i; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_floor_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := FLOOR(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vroundps' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_ceil_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := CEIL(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vroundps' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_floor_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := FLOOR(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vroundpd' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_ceil_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := CEIL(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vroundpd' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE' rettype='__m128' name='_mm_undefined_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Return vector of type __m128 with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m128d' name='_mm_undefined_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Return vector of type __m128d with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SSE2' rettype='__m128i' name='_mm_undefined_si128'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Return vector of type __m128i with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_undefined_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Return vector of type __m256 with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_undefined_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Return vector of type __m256d with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_undefined_si256'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Return vector of type __m256i with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='void' name='_mm_clflushopt'>
+	<CPUID>CLFLUSHOPT</CPUID>
+	<category>General Support</category>
+	<parameter varname='p' type='void const *'/>
+	<description>Invalidate and flush the cache line that contains "p" from all levels of the cache hierarchy.</description>
+	<instruction name='clflushopt' />
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='void' name='_mm_clwb'>
+	<CPUID>CLWB</CPUID>
+	<category>General Support</category>
+	<parameter varname='p' type='void const *'/>
+	<description>Write back to memory the cache line that contains "p" from any level of the cache hierarchy in the cache coherence domain.</description>
+	<instruction name='clwb' />
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_set_m128'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='hi' type='__m128'/>
+	<parameter varname='lo' type='__m128'/>
+	<description>Set packed __m256 vector "dst" with the supplied values.</description>
+	<operation>
+dst[127:0] := lo[127:0]
+dst[255:128] := hi[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_set_m128d'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='hi' type='__m128d'/>
+	<parameter varname='lo' type='__m128d'/>
+	<description>Set packed __m256d vector "dst" with the supplied values.</description>
+	<operation>
+dst[127:0] := lo[127:0]
+dst[255:128] := hi[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_set_m128i'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='hi' type='__m128i'/>
+	<parameter varname='lo' type='__m128i'/>
+	<description>Set packed __m256i vector "dst" with the supplied values.</description>
+	<operation>
+dst[127:0] := lo[127:0]
+dst[255:128] := hi[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256' name='_mm256_setr_m128'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='lo' type='__m128'/>
+	<parameter varname='hi' type='__m128'/>
+	<description>Set packed __m256 vector "dst" with the supplied values.</description>
+	<operation>
+dst[127:0] := lo[127:0]
+dst[255:128] := hi[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256d' name='_mm256_setr_m128d'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='lo' type='__m128d'/>
+	<parameter varname='hi' type='__m128d'/>
+	<description>Set packed __m256d vector "dst" with the supplied values.</description>
+	<operation>
+dst[127:0] := lo[127:0]
+dst[255:128] := hi[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' rettype='__m256i' name='_mm256_setr_m128i'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<parameter varname='lo' type='__m128i'/>
+	<parameter varname='hi' type='__m128i'/>
+	<description>Set packed __m256i vector "dst" with the supplied values.</description>
+	<operation>
+dst[127:0] := lo[127:0]
+dst[255:128] := hi[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256' name='_mm256_loadu2_m128'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='hiaddr' type='float const*'/>
+	<parameter varname='loaddr' type='float const*'/>
+	<description>Load two 128-bit values (composed of 4 packed single-precision (32-bit) floating-point elements) from memory, and combine them into a 256-bit value in "dst".
+	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[loaddr+127:loaddr]
+dst[255:128] := MEM[hiaddr+127:hiaddr]
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256d' name='_mm256_loadu2_m128d'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='hiaddr' type='double const*'/>
+	<parameter varname='loaddr' type='double const*'/>
+	<description>Load two 128-bit values (composed of 2 packed double-precision (64-bit) floating-point elements) from memory, and combine them into a 256-bit value in "dst".
+	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[loaddr+127:loaddr]
+dst[255:128] := MEM[hiaddr+127:hiaddr]
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_loadu2_m128i'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<parameter varname='hiaddr' type='__m128i const*'/>
+	<parameter varname='loaddr' type='__m128i const*'/>
+	<description>Load two 128-bit values (composed of integer data) from memory, and combine them into a 256-bit value in "dst".
+	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[loaddr+127:loaddr]
+dst[255:128] := MEM[hiaddr+127:hiaddr]
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='void' name='_mm256_storeu2_m128'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='hiaddr' type='float*'/>
+	<parameter varname='loaddr' type='float*'/>
+	<parameter varname='a' type='__m256' />
+	<description>Store the high and low 128-bit halves (each composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory two different 128-bit locations.
+	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[loaddr+127:loaddr] := a[127:0]
+MEM[hiaddr+127:hiaddr] := a[255:128]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='void' name='_mm256_storeu2_m128d'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='hiaddr' type='double*'/>
+	<parameter varname='loaddr' type='double*'/>
+	<parameter varname='a' type='__m256d' />
+	<description>Store the high and low 128-bit halves (each composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory two different 128-bit locations.
+	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[loaddr+127:loaddr] := a[127:0]
+MEM[hiaddr+127:hiaddr] := a[255:128]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' sequence='true' rettype='void' name='_mm256_storeu2_m128i'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<parameter varname='hiaddr' type='__m128i*'/>
+	<parameter varname='loaddr' type='__m128i*'/>
+	<parameter varname='a' type='__m256i' />
+	<description>Store the high and low 128-bit halves (each composed of integer data) from "a" into memory two different 128-bit locations.
+	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[loaddr+127:loaddr] := a[127:0]
+MEM[hiaddr+127:hiaddr] := a[255:128]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_abs_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := ABS(a[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpabsb' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_abs_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := ABS(a[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpabsw' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_abs_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ABS(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpabsd' form='ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_add_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := a[i+7:i] + b[i+7:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpaddb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_add_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := a[i+15:i] + b[i+15:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpaddw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_add_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpaddd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_add_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpaddq' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_adds_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpaddsb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_adds_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpaddsw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_adds_epu8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpaddusb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_adds_epu16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpaddusw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_alignr_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<parameter varname='count' type='const int'/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*128
+	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128) OR b[i+127:i]) &gt;&gt; (count[7:0]*8)
+	dst[i+127:i] := tmp[127:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpalignr' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_and_si256'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[255:0] := (a[255:0] AND b[255:0])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpand' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_andnot_si256'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compute the bitwise NOT of 256 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
+	<operation>
+dst[255:0] := ((NOT a[255:0]) AND b[255:0])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpandn' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_avg_epu8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpavgb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_avg_epu16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpavgw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_blend_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Blend packed 16-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF imm8[j%8]
+		dst[i+15:i] := b[i+15:i]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpblendw' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_blend_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Blend packed 32-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF imm8[j%8]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpblendd' form='xmm, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_blend_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Blend packed 32-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF imm8[j%8]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpblendd' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_blendv_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<parameter varname='mask' type='__m256i'/>
+	<description>Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF mask[i+7]
+		dst[i+7:i] := b[i+7:i]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpblendvb' form='ymm, ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_broadcastb_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpbroadcastb' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_broadcastb_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpbroadcastb' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_broadcastd_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpbroadcastd' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_broadcastd_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpbroadcastd' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_broadcastq_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpbroadcastq' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_broadcastq_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpbroadcastq' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' vexEq='TRUE' rettype='__m128d' name='_mm_broadcastsd_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='movddup' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256d' name='_mm256_broadcastsd_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vbroadcastsd' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm_broadcastsi128_si256'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Broadcast 128 bits of integer data from "a" to all 128-bit lanes in "dst".
+	</description>
+	<operation>
+dst[127:0] := a[127:0]
+dst[255:128] := a[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vbroadcasti128' form='ymm, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_broadcastsi128_si256'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Broadcast 128 bits of integer data from "a" to all 128-bit lanes in "dst".
+	</description>
+	<operation>
+dst[127:0] := a[127:0]
+dst[255:128] := a[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vbroadcasti128' form='ymm, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128' name='_mm_broadcastss_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vbroadcastss' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256' name='_mm256_broadcastss_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vbroadcastss' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_broadcastw_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpbroadcastw' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_broadcastw_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpbroadcastw' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpeq_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpcmpeqb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpeq_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpcmpeqw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpeq_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpcmpeqd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpeq_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ( a[i+63:i] == b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpcmpeqq' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpgt_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] &gt; b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpcmpgtb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpgt_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] &gt; b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpcmpgtw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpgt_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpcmpgtd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpgt_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ( a[i+63:i] &gt; b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpcmpgtq' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepi16_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j:= 0 to 7
+	i := 32*j
+	k := 16*j
+	dst[i+31:i] := SignExtend(a[k+15:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovsxwd' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepi16_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j:= 0 to 3
+	i := 64*j
+	k := 16*j
+	dst[i+63:i] := SignExtend(a[k+15:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovsxwq' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepi32_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j:= 0 to 3
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := SignExtend(a[k+31:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovsxdq' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepi8_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	l := j*16
+	dst[l+15:l] := SignExtend(a[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovsxbw' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepi8_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 8*j
+	dst[i+31:i] := SignExtend(a[k+7:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovsxbd' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepi8_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 8*j
+	dst[i+63:i] := SignExtend(a[k+7:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovsxbq' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepu16_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 16*j
+	dst[i+31:i] := ZeroExtend(a[k+15:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovzxwd' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepu16_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j:= 0 to 3
+	i := 64*j
+	k := 16*j
+	dst[i+63:i] := ZeroExtend(a[k+15:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovzxwq' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepu32_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j:= 0 to 3
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := ZeroExtend(a[k+31:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovzxdq' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepu8_epi16'>
+	<type>Integer</type>
+		 <CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	l := j*16
+	dst[l+15:l] := ZeroExtend(a[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovzxbw' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepu8_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 8*j
+	dst[i+31:i] := ZeroExtend(a[k+7:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovzxbd' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepu8_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 8*j
+	dst[i+63:i] := ZeroExtend(a[k+7:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovzxbq' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm256_extracti128_si256'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Extract 128 bits (composed of integer data) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vextracti128' form='xmm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_hadd_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := a[31:16] + a[15:0]
+dst[31:16] := a[63:48] + a[47:32]
+dst[47:32] := a[95:80] + a[79:64]
+dst[63:48] := a[127:112] + a[111:96]
+dst[79:64] := b[31:16] + b[15:0]
+dst[95:80] := b[63:48] + b[47:32]
+dst[111:96] := b[95:80] + b[79:64]
+dst[127:112] := b[127:112] + b[111:96]
+dst[143:128] := a[159:144] + a[143:128]
+dst[159:144] := a[191:176] + a[175:160]
+dst[175:160] := a[223:208] + a[207:192]
+dst[191:176] := a[255:240] + a[239:224]
+dst[207:192] := b[127:112] + b[143:128]
+dst[223:208] := b[159:144] + b[175:160]
+dst[239:224] := b[191:176] + b[207:192]
+dst[255:240] := b[223:208] + b[239:224]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vphaddw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_hadd_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32] + a[31:0]
+dst[63:32] := a[127:96] + a[95:64]
+dst[95:64] := b[63:32] + b[31:0]
+dst[127:96] := b[127:96] + b[95:64]
+dst[159:128] := a[191:160] + a[159:128]
+dst[191:160] := a[255:224] + a[223:192]
+dst[223:192] := b[191:160] + b[159:128]
+dst[255:224] := b[255:224] + b[223:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vphaddd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_hadds_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0]= Saturate_To_Int16(a[31:16] + a[15:0])
+dst[31:16] = Saturate_To_Int16(a[63:48] + a[47:32])
+dst[47:32] = Saturate_To_Int16(a[95:80] + a[79:64])
+dst[63:48] = Saturate_To_Int16(a[127:112] + a[111:96])
+dst[79:64] = Saturate_To_Int16(b[31:16] + b[15:0])
+dst[95:80] = Saturate_To_Int16(b[63:48] + b[47:32])
+dst[111:96] = Saturate_To_Int16(b[95:80] + b[79:64])
+dst[127:112] = Saturate_To_Int16(b[127:112] + b[111:96])
+dst[143:128] = Saturate_To_Int16(a[159:144] + a[143:128])
+dst[159:144] = Saturate_To_Int16(a[191:176] + a[175:160])
+dst[175:160] = Saturate_To_Int16( a[223:208] + a[207:192])
+dst[191:176] = Saturate_To_Int16(a[255:240] + a[239:224])
+dst[207:192] = Saturate_To_Int16(b[127:112] + b[143:128])
+dst[223:208] = Saturate_To_Int16(b[159:144] + b[175:160])
+dst[239:224] = Saturate_To_Int16(b[191-160] + b[159-128])
+dst[255:240] = Saturate_To_Int16(b[255:240] + b[239:224])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vphaddsw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_hsub_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := a[15:0] - a[31:16]
+dst[31:16] := a[47:32] - a[63:48]
+dst[47:32] := a[79:64] - a[95:80]
+dst[63:48] := a[111:96] - a[127:112]
+dst[79:64] := b[15:0] - b[31:16]
+dst[95:80] := b[47:32] - b[63:48]
+dst[111:96] := b[79:64] - b[95:80]
+dst[127:112] := b[111:96] - b[127:112]
+dst[143:128] := a[143:128] - a[159:144]
+dst[159:144] := a[175:160] - a[191:176]
+dst[175:160] := a[207:192] - a[223:208]
+dst[191:176] := a[239:224] - a[255:240]
+dst[207:192] := b[143:128] - b[159:144]
+dst[223:208] := b[175:160] - b[191:176]
+dst[239:224] := b[207:192] - b[223:208]
+dst[255:240] := b[239:224] - b[255:240]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vphsubw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_hsub_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] - a[63:32]
+dst[63:32] := a[95:64] - a[127:96]
+dst[95:64] := b[31:0] - b[63:32]
+dst[127:96] := b[95:64] - b[127:96]
+dst[159:128] := a[159:128] - a[191:160]
+dst[191:160] := a[223:192] - a[255:224]
+dst[223:192] := b[159:128] - b[191:160]
+dst[255:224] := b[223:192] - b[255:224]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vphsubd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_hsubs_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0]= Saturate_To_Int16(a[15:0] - a[31:16])
+dst[31:16] = Saturate_To_Int16(a[47:32] - a[63:48])
+dst[47:32] = Saturate_To_Int16(a[79:64] - a[95:80])
+dst[63:48] = Saturate_To_Int16(a[111:96] - a[127:112])
+dst[79:64] = Saturate_To_Int16(b[15:0] - b[31:16])
+dst[95:80] = Saturate_To_Int16(b[47:32] - b[63:48])
+dst[111:96] = Saturate_To_Int16(b[79:64] - b[95:80])
+dst[127:112] = Saturate_To_Int16(b[111:96] - b[127:112])
+dst[143:128]= Saturate_To_Int16(a[143:128] - a[159:144])
+dst[159:144] = Saturate_To_Int16(a[175:160] - a[191:176])
+dst[175:160] = Saturate_To_Int16(a[207:192] - a[223:208])
+dst[191:176] = Saturate_To_Int16(a[239:224] - a[255:240])
+dst[207:192] = Saturate_To_Int16(b[143:128] - b[159:144])
+dst[223:208] = Saturate_To_Int16(b[175:160] - b[191:176])
+dst[239:224] = Saturate_To_Int16(b[207:192] - b[223:208])
+dst[255:240] = Saturate_To_Int16(b[239:224] - b[255:240])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vphsubsw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128d' name='_mm_i32gather_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='double const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgatherdpd' form='xmm, vm32x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256d' name='_mm256_i32gather_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='double const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vgatherdpd' form='ymm, vm32x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128' name='_mm_i32gather_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='float const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgatherdps' form='xmm, vm32x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256' name='_mm256_i32gather_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='float const*'/>
+	<parameter varname='vindex' type='__m256i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vgatherdps' form='ymm, vm32x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_i32gather_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='int const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpgatherdd' form='xmm, vm32x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_i32gather_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='int const*'/>
+	<parameter varname='vindex' type='__m256i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpgatherdd' form='ymm, vm32x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_i32gather_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='__int64 const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpgatherdq' form='xmm, vm32x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_i32gather_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='__int64 const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpgatherdq' form='ymm, vm32x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128d' name='_mm_i64gather_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='double const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgatherqpd' form='xmm, vm64x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256d' name='_mm256_i64gather_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='double const*'/>
+	<parameter varname='vindex' type='__m256i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vgatherqpd' form='ymm, vm64x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128' name='_mm_i64gather_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='float const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vgatherqps' form='xmm, vm64x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128' name='_mm256_i64gather_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='float const*'/>
+	<parameter varname='vindex' type='__m256i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgatherqps' form='ymm, vm64x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_i64gather_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='int const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vpgatherqd' form='xmm, vm64x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm256_i64gather_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='int const*'/>
+	<parameter varname='vindex' type='__m256i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpgatherqd' form='ymm, vm64x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_i64gather_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='__int64 const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpgatherqq' form='xmm, vm64x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_i64gather_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='base_addr' type='__int64 const*'/>
+	<parameter varname='vindex' type='__m256i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpgatherqq' form='ymm, vm64x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_inserti128_si256'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m128i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of integer data) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE (imm8[1:0]) of
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vinserti128' form='ymm, ymm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_madd_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmaddwd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_maddubs_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmaddubsw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128d' name='_mm_mask_i32gather_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m128d'/>
+	<parameter varname='base_addr' type='double const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='mask' type='__m128d'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	IF mask[i+63]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+		mask[i+63] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgatherdpd' form='xmm, vm32x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256d' name='_mm256_mask_i32gather_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m256d'/>
+	<parameter varname='base_addr' type='double const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='mask' type='__m256d'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	IF mask[i+63]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+		mask[i+63] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:256] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vgatherdpd' form='ymm, vm32x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128' name='_mm_mask_i32gather_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m128'/>
+	<parameter varname='base_addr' type='float const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='mask' type='__m128'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+		mask[i+31] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgatherdps' form='xmm, vm32x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256' name='_mm256_mask_i32gather_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m256'/>
+	<parameter varname='base_addr' type='float const*'/>
+	<parameter varname='vindex' type='__m256i'/>
+	<parameter varname='mask' type='__m256'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+		mask[i+31] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:256] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vgatherdps' form='ymm, vm32x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_mask_i32gather_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m128i'/>
+	<parameter varname='base_addr' type='int const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='mask' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+		mask[i+31] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpgatherdd' form='xmm, vm32x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mask_i32gather_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m256i'/>
+	<parameter varname='base_addr' type='int const*'/>
+	<parameter varname='vindex' type='__m256i'/>
+	<parameter varname='mask' type='__m256i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+		mask[i+31] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:256] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpgatherdd' form='ymm, vm32x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_mask_i32gather_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m128i'/>
+	<parameter varname='base_addr' type='__int64 const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='mask' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	IF mask[i+63]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+		mask[i+63] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpgatherdq' form='xmm, vm32x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mask_i32gather_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m256i'/>
+	<parameter varname='base_addr' type='__int64 const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='mask' type='__m256i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	IF mask[i+63]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+		mask[i+63] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:256] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpgatherdq' form='ymm, vm32x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128d' name='_mm_mask_i64gather_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m128d'/>
+	<parameter varname='base_addr' type='double const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='mask' type='__m128d'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+		mask[i+63] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgatherqpd' form='xmm, vm64x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256d' name='_mm256_mask_i64gather_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m256d'/>
+	<parameter varname='base_addr' type='double const*'/>
+	<parameter varname='vindex' type='__m256i'/>
+	<parameter varname='mask' type='__m256d'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+		mask[i+63] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:256] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vgatherqpd' form='ymm, vm64x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128' name='_mm_mask_i64gather_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m128'/>
+	<parameter varname='base_addr' type='float const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='mask' type='__m128'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF mask[i+31]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+		mask[i+31] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:64] := 0
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vgatherqps' form='xmm, vm64x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128' name='_mm256_mask_i64gather_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m128'/>
+	<parameter varname='base_addr' type='float const*'/>
+	<parameter varname='vindex' type='__m256i'/>
+	<parameter varname='mask' type='__m128'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF mask[i+31]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+		mask[i+31] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgatherqps' form='ymm, vm64x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_mask_i64gather_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m128i'/>
+	<parameter varname='base_addr' type='int const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='mask' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF mask[i+31]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+		mask[i+31] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:64] := 0
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vpgatherqd' form='xmm, vm64x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm256_mask_i64gather_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m128i'/>
+	<parameter varname='base_addr' type='int const*'/>
+	<parameter varname='vindex' type='__m256i'/>
+	<parameter varname='mask' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF mask[i+31]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+		mask[i+31] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpgatherqd' form='ymm, vm64x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_mask_i64gather_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m128i'/>
+	<parameter varname='base_addr' type='__int64 const*'/>
+	<parameter varname='vindex' type='__m128i'/>
+	<parameter varname='mask' type='__m128i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+		mask[i+63] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpgatherqq' form='xmm, vm64x, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mask_i64gather_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='src' type='__m256i'/>
+	<parameter varname='base_addr' type='__int64 const*'/>
+	<parameter varname='vindex' type='__m256i'/>
+	<parameter varname='mask' type='__m256i'/>
+	<parameter varname='scale' type='const int'/>
+	<description>
+	Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
+		mask[i+63] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:256] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpgatherqq' form='ymm, vm64x, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_maskload_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='int const*'/>
+	<parameter varname='mask' type='__m128i'/>
+	<description>Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmaskmovd' form='xmm, xmm, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_maskload_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='int const*'/>
+	<parameter varname='mask' type='__m256i'/>
+	<description>Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmaskmovd' form='ymm, ymm, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_maskload_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='__int64 const*'/>
+	<parameter varname='mask' type='__m128i'/>
+	<description>Load packed 64-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmaskmovq' form='xmm, xmm, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_maskload_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='__int64 const*'/>
+	<parameter varname='mask' type='__m256i'/>
+	<description>Load packed 64-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmaskmovq' form='ymm, ymm, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='void' name='_mm_maskstore_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='int*'/>
+	<parameter varname='mask' type='__m128i'/>
+	<parameter varname='a' type='__m128i'/>
+	<description>Store packed 32-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF mask[i+31]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmaskmovd' form='m128, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='void' name='_mm256_maskstore_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='int*'/>
+	<parameter varname='mask' type='__m256i'/>
+	<parameter varname='a' type='__m256i'/>
+	<description>Store packed 32-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF mask[i+31]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmaskmovd' form='m256, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='void' name='_mm_maskstore_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='__int64*'/>
+	<parameter varname='mask' type='__m128i'/>
+	<parameter varname='a' type='__m128i'/>
+	<description>Store packed 64-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF mask[i+63]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmaskmovq' form='m128, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='void' name='_mm256_maskstore_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='__int64*'/>
+	<parameter varname='mask' type='__m256i'/>
+	<parameter varname='a' type='__m256i'/>
+	<description>Store packed 64-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF mask[i+63]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmaskmovq' form='m256, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_max_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF a[i+7:i] &gt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmaxsb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_max_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF a[i+15:i] &gt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmaxsw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_max_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF a[i+31:i] &gt; b[i+31:i]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := b[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmaxsd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_max_epu8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF a[i+7:i] &gt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmaxub' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_max_epu16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF a[i+15:i] &gt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmaxuw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_max_epu32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF a[i+31:i] &gt; b[i+31:i]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := b[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmaxud' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_min_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF a[i+7:i] &lt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpminsb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_min_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF a[i+15:i] &lt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpminsw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_min_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF a[i+31:i] &lt; b[i+31:i]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := b[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpminsd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_min_epu8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF a[i+7:i] &lt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpminub' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_min_epu16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF a[i+15:i] &lt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpminuw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_min_epu32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF a[i+31:i] &lt; b[i+31:i]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := b[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpminud' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='int' name='_mm256_movemask_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m256i'/>
+	<description>
+Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[j] := a[i+7]
+ENDFOR
+	</operation>
+	<instruction name='vpmovmskb' form='r32, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mpsadbw_epu8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
+	Eight SADs are performed for each 128-bit lane using one quadruplet from "b" and eight quadruplets from "a". One quadruplet is selected from "b" starting at on the offset specified in "imm8". Eight quadruplets are formed from sequential 8-bit integers selected from "a" starting at the offset specified in "imm8".</description>
+	<operation>
+MPSADBW(a[127:0], b[127:0], imm8[2:0]) {
+	a_offset := imm8[2]*32
+	b_offset := imm8[1:0]*32
+	FOR j := 0 to 7
+		i := j*8
+		k := a_offset+i
+		l := b_offset
+		tmp[i+15:i] := ABS(a[k+7:k] - b[l+7:l]) + ABS(a[k+15:k+8] - b[l+15:l+8]) + ABS(a[k+23:k+16] - b[l+23:l+16]) + ABS(a[k+31:k+24] - b[l+31:l+24])
+	ENDFOR
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0])
+dst[255:128] := MPSADBW(a[255:128], b[255:128], imm8[5:3])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmpsadbw' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mul_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmuldq' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mul_epu32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmuludq' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mulhi_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmulhw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mulhi_epu16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmulhuw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mulhrs_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
+	dst[i+15:i] := tmp[16:1]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmulhrsw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mullo_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[15:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmullw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mullo_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	tmp[63:0] := a[i+31:i] * b[i+31:i]
+	dst[i+31:i] := tmp[31:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmulld' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_or_si256'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compute the bitwise OR of 256 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[255:0] := (a[255:0] OR b[255:0])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpor' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_packs_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".
+	</description>
+	<operation>
+dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
+dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
+dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
+dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
+dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
+dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
+dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
+dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
+dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
+dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
+dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
+dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
+dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
+dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
+dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
+dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
+dst[135:128] := Saturate_Int16_To_Int8 (a[143:128])
+dst[143:136] := Saturate_Int16_To_Int8 (a[159:144])
+dst[151:144] := Saturate_Int16_To_Int8 (a[175:160])
+dst[159:152] := Saturate_Int16_To_Int8 (a[191:176])
+dst[167:160] := Saturate_Int16_To_Int8 (a[207:192])
+dst[175:168] := Saturate_Int16_To_Int8 (a[223:208])
+dst[183:176] := Saturate_Int16_To_Int8 (a[239:224])
+dst[191:184] := Saturate_Int16_To_Int8 (a[255:240])
+dst[199:192] := Saturate_Int16_To_Int8 (b[143:128])
+dst[207:200] := Saturate_Int16_To_Int8 (b[159:144])
+dst[215:208] := Saturate_Int16_To_Int8 (b[175:160])
+dst[223:216] := Saturate_Int16_To_Int8 (b[191:176])
+dst[231:224] := Saturate_Int16_To_Int8 (b[207:192])
+dst[239:232] := Saturate_Int16_To_Int8 (b[223:208])
+dst[247:240] := Saturate_Int16_To_Int8 (b[239:224])
+dst[255:248] := Saturate_Int16_To_Int8 (b[255:240])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpacksswb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_packs_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
+dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
+dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
+dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
+dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
+dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
+dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
+dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
+dst[143:128] := Saturate_Int32_To_Int16 (a[159:128])
+dst[159:144] := Saturate_Int32_To_Int16 (a[191:160])
+dst[175:160] := Saturate_Int32_To_Int16 (a[223:192])
+dst[191:176] := Saturate_Int32_To_Int16 (a[255:224])
+dst[207:192] := Saturate_Int32_To_Int16 (b[159:128])
+dst[223:208] := Saturate_Int32_To_Int16 (b[191:160])
+dst[239:224] := Saturate_Int32_To_Int16 (b[223:192])
+dst[255:240] := Saturate_Int32_To_Int16 (b[255:224])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpackssdw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_packus_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
+dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
+dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
+dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
+dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
+dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
+dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
+dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
+dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
+dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
+dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
+dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
+dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
+dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
+dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
+dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
+dst[135:128] := Saturate_Int16_To_UnsignedInt8 (a[143:128])
+dst[143:136] := Saturate_Int16_To_UnsignedInt8 (a[159:144])
+dst[151:144] := Saturate_Int16_To_UnsignedInt8 (a[175:160])
+dst[159:152] := Saturate_Int16_To_UnsignedInt8 (a[191:176])
+dst[167:160] := Saturate_Int16_To_UnsignedInt8 (a[207:192])
+dst[175:168] := Saturate_Int16_To_UnsignedInt8 (a[223:208])
+dst[183:176] := Saturate_Int16_To_UnsignedInt8 (a[239:224])
+dst[191:184] := Saturate_Int16_To_UnsignedInt8 (a[255:240])
+dst[199:192] := Saturate_Int16_To_UnsignedInt8 (b[143:128])
+dst[207:200] := Saturate_Int16_To_UnsignedInt8 (b[159:144])
+dst[215:208] := Saturate_Int16_To_UnsignedInt8 (b[175:160])
+dst[223:216] := Saturate_Int16_To_UnsignedInt8 (b[191:176])
+dst[231:224] := Saturate_Int16_To_UnsignedInt8 (b[207:192])
+dst[239:232] := Saturate_Int16_To_UnsignedInt8 (b[223:208])
+dst[247:240] := Saturate_Int16_To_UnsignedInt8 (b[239:224])
+dst[255:248] := Saturate_Int16_To_UnsignedInt8 (b[255:240])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpackuswb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_packus_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
+dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
+dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
+dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
+dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
+dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
+dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
+dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
+dst[143:128] := Saturate_Int32_To_UnsignedInt16 (a[159:128])
+dst[159:144] := Saturate_Int32_To_UnsignedInt16 (a[191:160])
+dst[175:160] := Saturate_Int32_To_UnsignedInt16 (a[223:192])
+dst[191:176] := Saturate_Int32_To_UnsignedInt16 (a[255:224])
+dst[207:192] := Saturate_Int32_To_UnsignedInt16 (b[159:128])
+dst[223:208] := Saturate_Int32_To_UnsignedInt16 (b[191:160])
+dst[239:224] := Saturate_Int32_To_UnsignedInt16 (b[223:192])
+dst[255:240] := Saturate_Int32_To_UnsignedInt16 (b[255:224])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpackusdw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_permute2x128_si256'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Shuffle 128-bits (composed of integer data) selected by "imm8" from "a" and "b", and store the results in "dst". </description>
+	<operation>
+SELECT4(src1, src2, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src1[127:0]
+	1:	tmp[127:0] := src1[255:128]
+	2:	tmp[127:0] := src2[127:0]
+	3:	tmp[127:0] := src2[255:128]
+	ESAC
+	IF control[3]
+		tmp[127:0] := 0
+	FI
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
+dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vperm2i128' form='ymm, ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_permute4x64_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpermq' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256d' name='_mm256_permute4x64_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpermpd' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_permutevar8x32_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='idx' type='__m256i'/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpermd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256' name='_mm256_permutevar8x32_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='idx' type='__m256i'/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpermps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sad_epu8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
+ENDFOR
+FOR j := 0 to 4
+	i := j*64
+	dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56]
+	dst[i+63:i+16] := 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsadbw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_shuffle_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpshufd' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_shuffle_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Shuffle 8-bit integers in "a" within 128-bit lanes according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF b[i+7] == 1
+		dst[i+7:i] := 0
+	ELSE
+		index[3:0] := b[i+3:i]
+		dst[i+7:i] := a[index*8+7:index*8]
+	FI
+	IF b[128+i+7] == 1
+		dst[128+i+7:128+i] := 0
+	ELSE
+		index[3:0] := b[128+i+3:128+i]
+		dst[128+i+7:128+i] := a[128+index*8+7:128+index*8]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpshufb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_shufflehi_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[79:64] := (a >> (imm8[1:0] * 16))[79:64]
+dst[95:80] := (a >> (imm8[3:2] * 16))[79:64]
+dst[111:96] := (a >> (imm8[5:4] * 16))[79:64]
+dst[127:112] := (a >> (imm8[7:6] * 16))[79:64]
+dst[191:128] := a[191:128]
+dst[207:192] := (a >> (imm8[1:0] * 16))[207:192]
+dst[223:208] := (a >> (imm8[3:2] * 16))[207:192]
+dst[239:224] := (a >> (imm8[5:4] * 16))[207:192]
+dst[255:240] := (a >> (imm8[7:6] * 16))[207:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpshufhw' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_shufflelo_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst".</description>
+	<operation>
+dst[15:0] := (a >> (imm8[1:0] * 16))[15:0]
+dst[31:16] := (a >> (imm8[3:2] * 16))[15:0]
+dst[47:32] := (a >> (imm8[5:4] * 16))[15:0]
+dst[63:48] := (a >> (imm8[7:6] * 16))[15:0]
+dst[127:64] := a[127:64]
+dst[143:128] := (a >> (imm8[1:0] * 16))[143:128]
+dst[159:144] := (a >> (imm8[3:2] * 16))[143:128]
+dst[175:160] := (a >> (imm8[5:4] * 16))[143:128]
+dst[191:176] := (a >> (imm8[7:6] * 16))[143:128]
+dst[255:192] := a[255:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpshuflw' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sign_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF b[i+7:i] &lt; 0
+		dst[i+7:i] := NEG(a[i+7:i])
+	ELSE IF b[i+7:i] = 0
+		dst[i+7:i] := 0
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsignb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sign_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF b[i+15:i] &lt; 0
+		dst[i+15:i] := NEG(a[i+15:i])
+	ELSE IF b[i+15:i] = 0
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsignw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sign_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF b[i+31:i] &lt; 0
+		dst[i+31:i] := NEG(a[i+31:i])
+	ELSE IF b[i+31:i] = 0
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsignd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_slli_si256'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
+dst[255:128] := a[255:128] &lt;&lt; (tmp*8)
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpslldq' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_bslli_epi128'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
+dst[255:128] := a[255:128] &lt;&lt; (tmp*8)
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpslldq' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sll_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsllw' form='ymm, ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_slli_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsllw' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sll_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpslld' form='ymm, ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_slli_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpslld' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sll_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+		<parameter varname='a' type='__m256i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsllq' form='ymm, ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_slli_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsllq' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_sllv_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpsllvd' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sllv_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='count' type='__m256i'/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsllvd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_sllv_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpsllvq' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sllv_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='count' type='__m256i'/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsllvq' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sra_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := SignBit
+	ELSE
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsraw' form='ymm, ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srai_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := SignBit
+	ELSE
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsraw' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sra_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := SignBit
+	ELSE
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsrad' form='ymm, ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srai_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := SignBit
+	ELSE
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsrad' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_srav_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpsravd' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srav_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='count' type='__m256i'/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsravd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srli_si256'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
+dst[255:128] := a[255:128] &gt;&gt; (tmp*8)
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsrldq' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_bsrli_epi128'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='const int'/>
+	<description>Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
+dst[255:128] := a[255:128] &gt;&gt; (tmp*8)
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsrldq' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srl_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsrlw' form='ymm, ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srli_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsrlw' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srl_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsrld' form='ymm, ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srli_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsrld' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srl_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsrlq' form='ymm, ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srli_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname="imm8" type='int'/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsrlq' form='ymm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_srlv_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpsrlvd' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srlv_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='count' type='__m256i'/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsrlvd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m128i' name='_mm_srlv_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='count' type='__m128i'/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpsrlvq' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srlv_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='count' type='__m256i'/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsrlvq' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_stream_load_si256'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='__m256i const*'/>
+	<description>Load 256-bits of integer data from memory into "dst" using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vmovntdqa' form='ymm, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sub_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := a[i+7:i] - b[i+7:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsubb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sub_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := a[i+15:i] - b[i+15:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsubw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sub_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsubd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sub_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsubq' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_subs_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsubsb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_subs_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsubsw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_subs_epu8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsubusb' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_subs_epu16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpsubusw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_xor_si256'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Logical</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Compute the bitwise XOR of 256 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[255:0] := (a[255:0] XOR b[255:0])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpxor' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpackhi_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpunpckhbw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpackhi_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+
+dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpunpckhwd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpackhi_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpunpckhdq' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpackhi_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpunpckhqdq' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpacklo_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}
+
+dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpunpcklbw' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpacklo_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpunpcklwd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpacklo_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpunpckldq' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpacklo_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpunpcklqdq' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128d' name='_mm_fmadd_pd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname='c' type='__m128d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132pd' form='xmm, xmm, xmm'/>
+	<instruction name='vfmadd213pd' form='xmm, xmm, xmm'/>
+	<instruction name='vfmadd231pd' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m256d' name='_mm256_fmadd_pd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<parameter varname='c' type='__m256d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vfmadd132pd' form='ymm, ymm, ymm'/>
+	<instruction name='vfmadd213pd' form='ymm, ymm, ymm'/>
+	<instruction name='vfmadd231pd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128' name='_mm_fmadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname='c' type='__m128'/>
+		<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132ps' form='xmm, xmm, xmm'/>
+	<instruction name='vfmadd213ps' form='xmm, xmm, xmm'/>
+	<instruction name='vfmadd231ps' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m256' name='_mm256_fmadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<parameter varname='c' type='__m256'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vfmadd132ps' form='ymm, ymm, ymm'/>
+	<instruction name='vfmadd213ps' form='ymm, ymm, ymm'/>
+	<instruction name='vfmadd231ps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128d' name='_mm_fmadd_sd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname='c' type='__m128d'/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	</description>
+	<operation>
+dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132sd' form='xmm, xmm, xmm'/>
+	<instruction name='vfmadd213sd' form='xmm, xmm, xmm'/>
+	<instruction name='vfmadd231sd' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128' name='_mm_fmadd_ss'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname='c' type='__m128'/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	</description>
+	<operation>
+dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132ss' form='xmm, xmm, xmm'/>
+	<instruction name='vfmadd213ss' form='xmm, xmm, xmm'/>
+	<instruction name='vfmadd231ss' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128d' name='_mm_fmaddsub_pd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname='c' type='__m128d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF (j is even) 
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmaddsub132pd' form='xmm, xmm, xmm'/>
+	<instruction name='vfmaddsub213pd' form='xmm, xmm, xmm'/>
+	<instruction name='vfmaddsub231pd' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m256d' name='_mm256_fmaddsub_pd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<parameter varname='c' type='__m256d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF (j is even) 
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vfmaddsub132pd' form='ymm, ymm, ymm'/>
+	<instruction name='vfmaddsub213pd' form='ymm, ymm, ymm'/>
+	<instruction name='vfmaddsub231pd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128' name='_mm_fmaddsub_ps'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname='c' type='__m128'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF (j is even) 
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmaddsub132ps' form='xmm, xmm, xmm'/>
+	<instruction name='vfmaddsub213ps' form='xmm, xmm, xmm'/>
+	<instruction name='vfmaddsub231ps' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m256' name='_mm256_fmaddsub_ps'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<parameter varname='c' type='__m256'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF (j is even) 
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vfmaddsub132ps' form='ymm, ymm, ymm'/>
+	<instruction name='vfmaddsub213ps' form='ymm, ymm, ymm'/>
+	<instruction name='vfmaddsub231ps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128d' name='_mm_fmsub_pd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname='c' type='__m128d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132pd' form='xmm, xmm, xmm'/>
+	<instruction name='vfmsub213pd' form='xmm, xmm, xmm'/>
+	<instruction name='vfmsub231pd' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m256d' name='_mm256_fmsub_pd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<parameter varname='c' type='__m256d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vfmsub132pd' form='ymm, ymm, ymm'/>
+	<instruction name='vfmsub213pd' form='ymm, ymm, ymm'/>
+	<instruction name='vfmsub231pd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128' name='_mm_fmsub_ps'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname='c' type='__m128'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132ps' form='xmm, xmm, xmm'/>
+	<instruction name='vfmsub213ps' form='xmm, xmm, xmm'/>
+	<instruction name='vfmsub231ps' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m256' name='_mm256_fmsub_ps'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<parameter varname='c' type='__m256'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vfmsub132ps' form='ymm, ymm, ymm'/>
+	<instruction name='vfmsub213ps' form='ymm, ymm, ymm'/>
+	<instruction name='vfmsub231ps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128d' name='_mm_fmsub_sd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname='c' type='__m128d'/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132sd' form='xmm, xmm, xmm'/>
+	<instruction name='vfmsub213sd' form='xmm, xmm, xmm'/>
+	<instruction name='vfmsub231sd' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128' name='_mm_fmsub_ss'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname='c' type='__m128'/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132ss' form='xmm, xmm, xmm'/>
+	<instruction name='vfmsub213ss' form='xmm, xmm, xmm'/>
+	<instruction name='vfmsub231ss' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128d' name='_mm_fmsubadd_pd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname='c' type='__m128d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF (j is even) 
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsubadd132pd' form='xmm, xmm, xmm'/>
+	<instruction name='vfmsubadd213pd' form='xmm, xmm, xmm'/>
+	<instruction name='vfmsubadd231pd' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m256d' name='_mm256_fmsubadd_pd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<parameter varname='c' type='__m256d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF (j is even) 
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vfmsubadd132pd' form='ymm, ymm, ymm'/>
+	<instruction name='vfmsubadd213pd' form='ymm, ymm, ymm'/>
+	<instruction name='vfmsubadd231pd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128' name='_mm_fmsubadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname='c' type='__m128'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF (j is even) 
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsubadd132ps' form='xmm, xmm, xmm'/>
+	<instruction name='vfmsubadd213ps' form='xmm, xmm, xmm'/>
+	<instruction name='vfmsubadd231ps' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m256' name='_mm256_fmsubadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<parameter varname='c' type='__m256'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF (j is even) 
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vfmsubadd132ps' form='ymm, ymm, ymm'/>
+	<instruction name='vfmsubadd213ps' form='ymm, ymm, ymm'/>
+	<instruction name='vfmsubadd231ps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128d' name='_mm_fnmadd_pd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname='c' type='__m128d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132pd' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmadd213pd' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmadd231pd' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m256d' name='_mm256_fnmadd_pd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<parameter varname='c' type='__m256d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vfnmadd132pd' form='ymm, ymm, ymm'/>
+	<instruction name='vfnmadd213pd' form='ymm, ymm, ymm'/>
+	<instruction name='vfnmadd231pd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128' name='_mm_fnmadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname='c' type='__m128'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	a[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132ps' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmadd213ps' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmadd231ps' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m256' name='_mm256_fnmadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<parameter varname='c' type='__m256'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	a[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vfnmadd132ps' form='ymm, ymm, ymm'/>
+	<instruction name='vfnmadd213ps' form='ymm, ymm, ymm'/>
+	<instruction name='vfnmadd231ps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128d' name='_mm_fnmadd_sd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname='c' type='__m128d'/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132sd' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmadd213sd' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmadd231sd' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128' name='_mm_fnmadd_ss'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname='c' type='__m128'/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132ss' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmadd213ss' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmadd231ss' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128d' name='_mm_fnmsub_pd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname='c' type='__m128d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132pd' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmsub213pd' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmsub231pd' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m256d' name='_mm256_fnmsub_pd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<parameter varname='c' type='__m256d'/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vfnmsub132pd' form='ymm, ymm, ymm'/>
+	<instruction name='vfnmsub213pd' form='ymm, ymm, ymm'/>
+	<instruction name='vfnmsub231pd' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128' name='_mm_fnmsub_ps'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname='c' type='__m128'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132ps' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmsub213ps' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmsub231ps' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m256' name='_mm256_fnmsub_ps'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<parameter varname='c' type='__m256'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vfnmsub132ps' form='ymm, ymm, ymm'/>
+	<instruction name='vfnmsub213ps' form='ymm, ymm, ymm'/>
+	<instruction name='vfnmsub231ps' form='ymm, ymm, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128d' name='_mm_fnmsub_sd'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<parameter varname='c' type='__m128d'/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132sd' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmsub213sd' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmsub231sd' form='xmm, xmm, xmm'/>
+	<perfdata arch='Haswell' lat='5' tpt='0.5'/> b
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='FMA' rettype='__m128' name='_mm_fnmsub_ss'>
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<parameter varname='c' type='__m128'/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132ss' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmsub213ss' form='xmm, xmm, xmm'/>
+	<instruction name='vfnmsub231ss' form='xmm, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_bextr_u32'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned int' varname='a' />
+	<parameter type='unsigned int' varname='start' />
+	<parameter type='unsigned int' varname='len' />
+	<description>Extract contiguous bits from unsigned 32-bit integer "a", and store the result in "dst". Extract the number of bits specified by "len", starting at the bit specified by "start".</description>
+	<operation>
+tmp := ZERO_EXTEND_TO_512(a)
+dst := ZERO_EXTEND(tmp[start+len-1:start])
+	</operation>
+	<instruction name='bextr' form='r32, r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_bextr2_u32'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned int' varname='a' />
+	<parameter type='unsigned int' varname='control' />
+	<description>Extract contiguous bits from unsigned 32-bit integer "a", and store the result in "dst". Extract the number of bits specified by bits 15:8 of "control", starting at the bit specified by bits 0:7 of "control".</description>
+	<operation>
+start := control[7:0]
+len := control[15:8]
+tmp := ZERO_EXTEND_TO_512(a)
+dst := ZERO_EXTEND(tmp[start+len-1:start])
+	</operation>
+	<instruction name='bextr' form='r32, r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_bextr_u64'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned __int64' varname='a' />
+	<parameter type='unsigned int' varname='start' />
+	<parameter type='unsigned int' varname='len' />
+	<description>Extract contiguous bits from unsigned 64-bit integer "a", and store the result in "dst". Extract the number of bits specified by "len", starting at the bit specified by "start".</description>
+	<operation>
+tmp := ZERO_EXTEND_TO_512(a)
+dst := ZERO_EXTEND(tmp[start+len-1:start])
+	</operation>
+	<instruction name='bextr' form='r64, r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_bextr2_u64'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned __int64' varname='a' />
+	<parameter type='unsigned __int64' varname='control' />
+	<description>Extract contiguous bits from unsigned 64-bit integer "a", and store the result in "dst". Extract the number of bits specified by bits 15:8 of "control", starting at the bit specified by bits 0:7 of "control"..</description>
+	<operation>
+start := control[7:0]
+len := control[15:8]
+tmp := ZERO_EXTEND_TO_512(a)
+dst := ZERO_EXTEND(tmp[start+len-1:start])
+	</operation>
+	<instruction name='bextr' form='r64, r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_blsi_u32'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned int' varname='a' />
+	<description>Extract the lowest set bit from unsigned 32-bit integer "a" and set the corresponding bit in "dst". All other bits in "dst" are zeroed, and all bits are zeroed if no bits are set in "a".</description>
+	<operation>
+dst := (-a) BITWISE AND a
+	</operation>
+	<instruction name='blsi' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_blsi_u64'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned __int64' varname='a' />
+	<description>Extract the lowest set bit from unsigned 64-bit integer "a" and set the corresponding bit in "dst". All other bits in "dst" are zeroed, and all bits are zeroed if no bits are set in "a".</description>
+	<operation>
+dst := (-a) BITWISE AND a
+	</operation>
+	<instruction name='blsi' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_blsmsk_u32'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned int' varname='a' />
+	<description>Set all the lower bits of "dst" up to and including the lowest set bit in unsigned 32-bit integer "a".</description>
+	<operation>
+dst := (a - 1) XOR a
+	</operation>
+	<instruction name='blsmsk' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_blsmsk_u64'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned __int64' varname='a' />
+	<description>Set all the lower bits of "dst" up to and including the lowest set bit in unsigned 64-bit integer "a".</description>
+	<operation>
+dst := (a - 1) XOR a
+	</operation>
+	<instruction name='blsmsk' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_blsr_u32'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned int' varname='a' />
+	<description>Copy all bits from unsigned 32-bit integer "a" to "dst", and reset (set to 0) the bit in "dst" that corresponds to the lowest set bit in "a".</description>
+	<operation>
+dst := (a - 1) BITWISE AND a
+	</operation>
+	<instruction name='blsr' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_blsr_u64'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned __int64' varname='a' />
+	<description>Copy all bits from unsigned 64-bit integer "a" to "dst", and reset (set to 0) the bit in "dst" that corresponds to the lowest set bit in "a".</description>
+	<operation>
+dst := (a - 1) BITWISE AND a
+	</operation>
+	<instruction name='blsr' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_bzhi_u32'>
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned int' varname='a' />
+	<parameter type='unsigned int' varname='index' />
+	<description>Copy all bits from unsigned 32-bit integer "a" to "dst", and reset (set to 0) the high bits in "dst" starting at "index".</description>
+	<operation>
+n := index[7:0]
+dst := a
+IF (n &lt; 32)
+	dst[31:n] := 0
+FI
+	</operation>
+	<instruction name='bzhi' form='r32, r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_bzhi_u64'>
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned __int64' varname='a' />
+	<parameter type='unsigned int' varname='index' />
+	<description>Copy all bits from unsigned 64-bit integer "a" to "dst", and reset (set to 0) the high bits in "dst" starting at "index".</description>
+	<operation>
+n := index[7:0]
+dst := a
+IF (n &lt; 64)
+	dst[63:n] := 0
+FI
+	</operation>
+	<instruction name='bzhi' form='r64, r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_invpcid'>
+	<CPUID>INVPCID</CPUID>
+	<category>OS-Targeted</category>
+	<parameter type='unsigned int' varname='type' />
+	<parameter type='void*' varname='descriptor' />
+	<description>
+	Invalidate mappings in the Translation Lookaside Buffers (TLBs) and paging-structure caches for the processor context identifier (PCID) specified by "descriptor" based on the invalidation type specified in "type". 
+	The PCID "descriptor" is specified as a 16-byte memory operand (with no alignment restrictions) where bits [11:0] specify the PCID, and bits [127:64] specify the linear address; bits [63:12] are reserved.
+	The types supported are:
+		0) Individual-address invalidation: If "type" is 0, the logical processor invalidates mappings for a single linear address and tagged with the PCID specified in "descriptor", except global translations. The instruction may also invalidate global translations, mappings for other linear addresses, or mappings tagged with other PCIDs.
+		1) Single-context invalidation: If "type" is 1, the logical processor invalidates all mappings tagged with the PCID specified in "descriptor" except global translations. In some cases, it may invalidate mappings for other PCIDs as well.
+		2) All-context invalidation: If "type" is 2, the logical processor invalidates all mappings tagged with any PCID.
+		3) All-context invalidation, retaining global translations: If "type" is 3, the logical processor invalidates all mappings tagged with any PCID except global translations, ignoring "descriptor". The instruction may also invalidate global translations as well.
+	</description>
+	<operation>
+CASE type OF
+0: // individual-address invalidation retaining global translations
+	OP_PCID := descriptor[11:0]
+	ADDR := descriptor[127:64]
+	BREAK
+1: // single PCID invalidation retaining globals
+	OP_PCID := descriptor[11:0]
+	// invalidate all mappings tagged with OP_PCID except global translations
+	BREAK
+2: // all PCID invalidation
+	// invalidate all mappings tagged with any PCID
+	BREAK
+3: // all PCID invalidation retaining global translations
+	// invalidate all mappings tagged with any PCID except global translations
+	BREAK
+ESAC
+	</operation>
+	<instruction name='invpcid' form='r32, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_lzcnt_u32'>
+	<type>Integer</type>
+	<CPUID>LZCNT</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned int' varname='a' />
+	<description>Count the number of leading zero bits in unsigned 32-bit integer "a", and return that count in "dst".</description>
+	<operation>
+tmp := 31
+dst := 0
+DO WHILE (tmp &gt;= 0 AND a[tmp] = 0)
+	tmp := tmp - 1
+	dst := dst + 1
+OD	
+	</operation>
+	<instruction name='lzcnt' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_lzcnt_u64'>
+	<type>Integer</type>
+	<CPUID>LZCNT</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned __int64' varname='a' />
+	<description>Count the number of leading zero bits in unsigned 64-bit integer "a", and return that count in "dst".</description>
+	<operation>
+tmp := 63
+dst := 0
+DO WHILE (tmp &gt;= 0 AND a[tmp] = 0)
+	tmp := tmp - 1
+	dst := dst + 1
+OD	
+	</operation>
+	<instruction name='lzcnt' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_pdep_u32'>
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned int' varname='a' />
+	<parameter type='unsigned int' varname='mask' />
+	<description>Deposit contiguous low bits from unsigned 32-bit integer "a" to "dst" at the corresponding bit locations specified by "mask"; all other bits in "dst" are set to zero.</description>
+	<operation>
+tmp := a
+dst := 0
+m := 0
+k := 0
+DO WHILE m &lt; 32
+	IF mask[m] = 1
+		dst[m] := tmp[k]
+		k := k + 1
+	FI
+	m := m + 1
+OD
+	</operation>
+	<instruction name='pdep' form='r32, r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_pdep_u64'>
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned __int64' varname='a' />
+	<parameter type='unsigned __int64' varname='mask' />
+	<description>Deposit contiguous low bits from unsigned 64-bit integer "a" to "dst" at the corresponding bit locations specified by "mask"; all other bits in "dst" are set to zero.</description>
+	<operation>
+tmp := a
+dst := 0
+m := 0
+k := 0
+DO WHILE m &lt; 64
+	IF mask[m] = 1
+		dst[m] := tmp[k]
+		k := k + 1
+	FI
+	m := m + 1
+OD
+	</operation>
+	<instruction name='pdep' form='r64, r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_pext_u32'>
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned int' varname='a' />
+	<parameter type='unsigned int' varname='mask' />
+	<description>Extract bits from unsigned 32-bit integer "a" at the corresponding bit locations specified by "mask" to contiguous low bits in "dst"; the remaining upper bits in "dst" are set to zero.</description>
+	<operation>
+tmp := a
+dst := 0
+m := 0
+k := 0
+DO WHILE m &lt; 32
+	IF mask[m] = 1
+		dst[k] := tmp[m]
+		k := k + 1
+	FI
+	m := m + 1
+OD
+	</operation>
+	<instruction name='pext' form='r32, r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_pext_u64'>
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned __int64' varname='a' />
+	<parameter type='unsigned __int64' varname='mask' />
+	<description>Extract bits from unsigned 64-bit integer "a" at the corresponding bit locations specified by "mask" to contiguous low bits in "dst"; the remaining upper bits in "dst" are set to zero.</description>
+	<operation>
+tmp := a
+dst := 0
+m := 0
+k := 0
+DO WHILE m &lt; 64
+	IF mask[m] = 1
+		dst[k] := tmp[m]
+		k := k + 1
+	FI
+	m := m + 1
+OD
+	</operation>
+	<instruction name='pext' form='r64, r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_andn_u32'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned int' varname='a' />
+	<parameter type='unsigned int' varname='b' />
+	<description>Compute the bitwise NOT of 32-bit integer "a" and then AND with b, and store the results in dst.</description>
+	<operation>
+dst[31:0] := ((NOT a[31:0]) AND b[31:0])
+	</operation>
+	<instruction name='andn' form='r32, r32' />
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_andn_u64'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned __int64' varname='a' />
+	<parameter type='unsigned __int64' varname='b' />
+	<description>Compute the bitwise NOT of 64-bit integer "a" and then AND with b, and store the results in dst.</description>
+	<operation>
+dst[63:0] := ((NOT a[63:0]) AND b[63:0])
+	</operation>
+	<instruction name='andn' form='r64, r64' />
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_mulx_u32'>
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Arithmetic</category>
+	<parameter type='unsigned int' varname='a' />
+	<parameter type='unsigned int' varname='b' />
+	<parameter type='unsigned int*' varname='hi' />
+	<description>Multiply unsigned 32-bit integers "a" and "b", store the low 32-bits of the result in "dst", and store the high 32-bits in "hi". This does not read or write arithmetic flags.</description>
+	<operation>
+dst[31:0] := (a * b)[31:0]
+hi[31:0] := (a * b)[63:32]
+	</operation>
+	<instruction name='mulx' form='r32, r32, m32' />
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_mulx_u64'>
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Arithmetic</category>
+	<parameter type='unsigned __int64' varname='a' />
+	<parameter type='unsigned __int64' varname='b' />
+	<parameter type='unsigned __int64*' varname='hi' />
+	<description>Multiply unsigned 64-bit integers "a" and "b", store the low 64-bits of the result in "dst", and store the high 64-bits in "hi". This does not read or write arithmetic flags.</description>
+	<operation>
+dst[63:0] := (a * b)[63:0]
+hi[63:0] := (a * b)[127:64]
+	</operation>
+	<instruction name='mulx' form='r64, r64, m64' />
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_tzcnt_u32'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned int' varname='a' />
+	<description>Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst".</description>
+	<operation>
+tmp := 0
+dst := 0
+DO WHILE ((tmp &lt; 32) AND a[tmp] = 0)
+	tmp := tmp + 1
+	dst := dst + 1
+OD	
+	</operation>
+	<instruction name='tzcnt' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_tzcnt_u64'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned __int64' varname='a' />
+	<description>Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst".</description>
+	<operation>
+tmp := 0
+dst := 0
+DO WHILE ((tmp &lt; 64) AND a[tmp] = 0)
+	tmp := tmp + 1
+	dst := dst + 1
+OD	
+	</operation>
+	<instruction name='tzcnt' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='void' name='_xabort'>
+	<CPUID>RTM</CPUID>
+	<category>General Support</category>
+	<parameter type='const unsigned int' varname="imm8" />
+	<description>
+	Force an RTM abort. The EAX register is updated to reflect an XABORT instruction caused the abort, and the "imm8" parameter will be provided in bits [31:24] of EAX.
+	Following an RTM abort, the logical processor resumes execution at the fallback address computed through the outermost XBEGIN instruction. 
+	</description>
+	<operation>
+IF RTM_ACTIVE = 0
+	// nop
+ELSE
+	// restore architectural register state
+	// discard memory updates performed in transaction
+	// update EAX with status and imm8 value
+	RTM_NEST_COUNT := 0
+	RTM_ACTIVE := 0
+	IF 64-bit Mode
+		RIP := fallbackRIP
+	ELSE
+		EIP := fallbackEIP
+	FI
+FI
+	</operation>
+	<instruction name='xabort' form='imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='unsigned int' name='_xbegin'>
+	<CPUID>RTM</CPUID>
+	<category>General Support</category>
+	<parameter type='void' varname='' />
+	<description>
+	Specify the start of an RTM code region. 
+	If the logical processor was not already in transactional execution, then this call causes the logical processor to transition into transactional execution. 
+	On an RTM abort, the logical processor discards all architectural register and memory updates performed during the RTM execution, restores architectural state, and starts execution beginning at the fallback address computed from the outermost XBEGIN instruction. Return status of ~0 (0xFFFF) if continuing inside transaction; all other codes are aborts.
+	</description>
+	<operation>
+IF RTM_NEST_COUNT &lt; MAX_RTM_NEST_COUNT
+	RTM_NEST_COUNT := RTM_NEST_COUNT + 1
+	IF RTM_NEST_COUNT = 1
+		IF 64-bit Mode
+			fallbackRIP := RIP + SignExtend(IMM)
+		ELSE IF 32-bit Mode
+			fallbackEIP := EIP + SignExtend(IMM)
+		ELSE // 16-bit Mode
+			fallbackEIP := (EIP + SignExtend(IMM)) AND 0x0000FFFF
+		FI
+		
+		RTM_ACTIVE := 1
+		// enter RTM execution, record register state, start tracking memory state
+	FI
+ELSE
+	// RTM abort (see _xabort)
+FI			
+	</operation>
+	<instruction name='xbegin' form=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='void' name='_xend'>
+	<CPUID>RTM</CPUID>
+	<category>General Support</category>
+	<parameter type='void' varname='' />
+	<description>
+	Specify the end of an RTM code region.
+	If this corresponds to the outermost scope, the logical processor will attempt to commit the logical processor state atomically. 
+	If the commit fails, the logical processor will perform an RTM abort.
+	</description>
+	<operation>
+IF RTM_ACTIVE = 1
+	RTM_NEST_COUNT := RTM_NEST_COUNT - 1
+	IF RTM_NEST_COUNT = 0
+		// try to commit transaction
+		IF fail to commit transaction
+			// RTM abort (see _xabort)
+		ELSE
+			RTM_ACTIVE = 0
+		FI
+	FI
+FI
+	</operation>
+	<instruction name='xend' form=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='unsigned char' name='_xtest'>
+	<CPUID>RTM</CPUID>
+	<category>General Support</category>
+	<parameter type='void' varname='' />
+	<description>Query the transactional execution status, return 0 if inside a transactionally executing RTM or HLE region, and return 1 otherwise.</description>
+	<operation>
+IF (RTM_ACTIVE = 1 OR HLE_ACTIVE = 1)
+	dst := 0
+ELSE
+	dst := 1
+FI
+	</operation>
+	<instruction name='xtest' form=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='unsigned __int64' name='__rdtscp'>
+	<CPUID>RDTSCP</CPUID>
+	<category>General Support</category>
+	<parameter varname='mem_addr' type='unsigned int *'/>
+	<description>Copy the current 64-bit value of the processor's time-stamp counter into "dst", and store the IA32_TSC_AUX MSR (signature value) into memory at "mem_addr".</description>
+	<operation>
+dst[63:0] := TimeStampCounter
+MEM[mem_addr+31:mem_addr] := IA32_TSC_AUX[31:0]
+	</operation>
+	<instruction name='rdtscp' form=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='unsigned int' name='_rdpid_u32'>
+	<CPUID>RDPID</CPUID>
+	<category>General Support</category>
+	<parameter varname="" type="void"/>
+	<description>Copy the IA32_TSC_AUX MSR (signature value) into "dst".</description>
+	<operation>
+dst[31:0] := IA32_TSC_AUX[31:0]
+	</operation>
+	<instruction name='rdpid' form=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='int' name='_bit_scan_forward'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='int'/>
+	<description>Set "dst" to the index of the lowest set bit in 32-bit integer "a". If no bits are set in "a" then "dst" is undefined.</description>
+	<operation>
+tmp := 0
+IF a = 0
+	dst := undefined
+ELSE
+	DO WHILE ((tmp &lt; 32) AND a[tmp] = 0)
+		tmp := tmp + 1
+		dst := tmp
+	OD
+FI
+	</operation>
+	<instruction name='bsf' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='int' name='_bit_scan_reverse'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='int'/>
+	<description>Set "dst" to the index of the highest set bit in 32-bit integer "a". If no bits are set in "a" then "dst" is undefined.</description>
+	<operation>
+tmp := 31
+IF a = 0
+	dst := undefined
+ELSE
+	DO WHILE ((tmp &gt; 0) AND a[tmp] = 0)
+		tmp := tmp - 1
+		dst := tmp
+	OD
+FI
+	</operation>
+	<instruction name='bsr' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned char' name='_BitScanForward'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='index' type='unsigned __int32*'/>
+	<parameter varname='mask' type='unsigned __int32'/>
+	<description>Set "index" to the index of the lowest set bit in 32-bit integer "mask". If no bits are set in "mask", then set "dst" to 0, otherwise set "dst" to 1.</description>
+	<operation>
+tmp := 0
+IF mask = 0
+	dst := 0
+ELSE
+	DO WHILE ((tmp &lt; 32) AND mask[tmp] = 0)
+		tmp := tmp + 1
+		index := tmp
+		dst := 1
+	OD
+FI
+	</operation>
+	<instruction name='bsf' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned char' name='_BitScanReverse'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='index' type='unsigned __int32*'/>
+	<parameter varname='mask' type='unsigned __int32'/>
+	<description>Set "index" to the index of the highest set bit in 32-bit integer "mask". If no bits are set in "mask", then set "dst" to 0, otherwise set "dst" to 1.</description>
+	<operation>
+tmp := 31
+IF mask = 0
+	dst := 0
+ELSE
+	DO WHILE ((tmp &gt; 0) AND mask[tmp] = 0)
+		tmp := tmp - 1
+		index := tmp
+		dst := 1
+	OD
+FI
+	</operation>
+	<instruction name='bsr' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned char' name='_BitScanForward64'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='index' type='unsigned __int32*'/>
+	<parameter varname='mask' type='unsigned __int64'/>
+	<description>Set "index" to the index of the lowest set bit in 64-bit integer "mask". If no bits are set in "mask", then set "dst" to 0, otherwise set "dst" to 1.</description>
+	<operation>
+tmp := 0
+IF mask = 0
+	dst := 0
+ELSE
+	DO WHILE ((tmp &lt; 64) AND mask[tmp] = 0)
+		tmp := tmp + 1
+		index := tmp
+		dst := 1
+	OD
+FI
+	</operation>
+	<instruction name='bsf' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned char' name='_BitScanReverse64'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='index' type='unsigned __int32*'/>
+	<parameter varname='mask' type='unsigned __int64'/>
+	<description>Set "index" to the index of the highest set bit in 64-bit integer "mask". If no bits are set in "mask", then set "dst" to 0, otherwise set "dst" to 1.</description>
+	<operation>
+tmp := 31
+IF mask = 0
+	dst := 0
+ELSE
+	DO WHILE ((tmp &gt; 0) AND mask[tmp] = 0)
+		tmp := tmp - 1
+		index := tmp
+		dst := 1
+	OD
+FI
+	</operation>
+	<instruction name='bsr' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned char' name='_bittest'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='__int32*'/>
+	<parameter varname='b' type='__int32'/>
+	<description>Return the bit at index "b" of 32-bit integer "a".</description>
+	<operation>
+dst := a[b]
+	</operation>
+	<instruction name='bt' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned char' name='_bittestandcomplement'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='__int32*'/>
+	<parameter varname='b' type='__int32'/>
+	<description>Return the bit at index "b" of 32-bit integer "a", and set that bit to its complement.</description>
+	<operation>
+dst := a[b]
+a[b] := ~a[b]
+	</operation>
+	<instruction name='btc' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned char' name='_bittestandreset'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='__int32*'/>
+	<parameter varname='b' type='__int32'/>
+	<description>Return the bit at index "b" of 32-bit integer "a", and set that bit to zero.</description>
+	<operation>
+dst := a[b]
+a[b] := 0
+	</operation>
+	<instruction name='btr' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned char' name='_bittestandset'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='__int32*'/>
+	<parameter varname='b' type='__int32'/>
+	<description>Return the bit at index "b" of 32-bit integer "a", and set that bit to one.</description>
+	<operation>
+dst := a[b]
+a[b] := 1
+	</operation>
+	<instruction name='bts' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned char' name='_bittest64'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='__int64*'/>
+	<parameter varname='b' type='__int64'/>
+	<description>Return the bit at index "b" of 64-bit integer "a".</description>
+	<operation>
+dst := a[b]
+	</operation>
+	<instruction name='bt' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned char' name='_bittestandcomplement64'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='__int64*'/>
+	<parameter varname='b' type='__int64'/>
+	<description>Return the bit at index "b" of 64-bit integer "a", and set that bit to its complement.</description>
+	<operation>
+dst := a[b]
+a[b] := ~a[b]
+	</operation>
+	<instruction name='btc' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned char' name='_bittestandreset64'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='__int64*'/>
+	<parameter varname='b' type='__int64'/>
+	<description>Return the bit at index "b" of 64-bit integer "a", and set that bit to zero.</description>
+	<operation>
+dst := a[b]
+a[b] := 0
+	</operation>
+	<instruction name='btr' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned char' name='_bittestandset64'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='__int64*'/>
+	<parameter varname='b' type='__int64'/>
+	<description>Return the bit at index "b" of 64-bit integer "a", and set that bit to one.</description>
+	<operation>
+dst := a[b]
+a[b] := 1
+	</operation>
+	<instruction name='bts' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='int' name='_bswap'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='int'/>
+	<description>Reverse the byte order of 32-bit integer "a", and store the result in "dst". This intrinsic is provided for conversion between little and big endian values.</description>
+	<operation>
+dst[7:0] := a[31:24]
+dst[15:8] := a[23:16]
+dst[23:16] := a[15:8]
+dst[31:24] := a[7:0]
+	</operation>
+	<instruction name='bswap' form='r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='__int64' name='_bswap64'>
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='__int64'/>
+	<description>Reverse the byte order of 64-bit integer "a", and store the result in "dst". This intrinsic is provided for conversion between little and big endian values.</description>
+	<operation>
+dst[7:0] := a[63:56]
+dst[15:8] := a[55:48]
+dst[23:16] := a[47:40]
+dst[31:24] := a[39:32]
+dst[39:32] := a[31:24]
+dst[47:40] := a[23:16]
+dst[55:48] := a[15:8]
+dst[63:56] := a[7:0]
+	</operation>
+	<instruction name='bswap' form='r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='unsigned __int32' name='_castf32_u32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<category>Cast</category>
+	<parameter varname='a' type='float'/>
+	<description>Cast from type float to type unsigned __int32 without conversion.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='unsigned __int64' name='_castf64_u64'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<category>Cast</category>
+	<parameter varname='a' type='double'/>
+	<description>Cast from type double to type unsigned __int64 without conversion.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='float' name='_castu32_f32'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<category>Cast</category>
+	<parameter varname='a' type='unsigned __int32'/>
+	<description>Cast from type unsigned __int32 to type float without conversion.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='double' name='_castu64_f64'>
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<category>Cast</category>
+	<parameter varname='a' type='unsigned __int64'/>
+	<description>Cast from type unsigned __int64 to type double without conversion.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_fxrstor'>
+	<CPUID>FXSR</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<description>Reload the x87 FPU, MMX technology, XMM, and MXCSR registers from the 512-byte memory image at "mem_addr". This data should have been written to memory previously using the FXSAVE instruction, and in the same format as required by the operating mode. "mem_addr" must be aligned on a 16-byte boundary.</description>
+	<operation>
+(x87 FPU, MMX, XMM7-XMM0, MXCSR) := Load(MEM[mem_addr])
+	</operation>
+	<instruction name='fxrstor' form='MEMmfpxenv'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_fxrstor64'>
+	<CPUID>FXSR</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<description>Reload the x87 FPU, MMX technology, XMM, and MXCSR registers from the 512-byte memory image at "mem_addr". This data should have been written to memory previously using the FXSAVE64 instruction, and in the same format as required by the operating mode. "mem_addr" must be aligned on a 16-byte boundary.</description>
+	<operation>
+(x87 FPU, MMX, XMM7-XMM0, MXCSR) := Load(MEM[mem_addr])
+	</operation>
+	<instruction name='fxrstor64' form='MEMmfpxenv'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_fxsave'>
+	<CPUID>FXSR</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<description>Save the current state of the x87 FPU, MMX technology, XMM, and MXCSR registers to a 512-byte memory location at "mem_addr". The clayout of the 512-byte region depends on the operating mode. Bytes [511:464] are available for software use and will not be overwritten by the processor.</description>
+	<operation>
+MEM[mem_addr+511*8:mem_addr] := Fxsave(x87 FPU, MMX, XMM7-XMM0, MXCSR)
+	</operation>
+	<instruction name='fxsave' form='MEMmfpxenv'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_fxsave64'>
+	<CPUID>FXSR</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<description>Save the current state of the x87 FPU, MMX technology, XMM, and MXCSR registers to a 512-byte memory location at "mem_addr". The layout of the 512-byte region depends on the operating mode. Bytes [511:464] are available for software use and will not be overwritten by the processor.</description>
+	<operation>
+MEM[mem_addr+511*8:mem_addr] := Fxsave64(x87 FPU, MMX, XMM7-XMM0, MXCSR)
+	</operation>
+	<instruction name='fxsave64' form='MEMmfpxenv'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned long' name='_lrotl'>
+	<type>Integer</type>
+	<category>Shift</category>
+	<parameter varname='a' type='unsigned long'/>
+	<parameter varname='shift' type='int'/>
+	<description>Shift the bits of unsigned 64-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift BITWISE AND 63
+DO WHILE (count &gt; 0)
+	tmp[0] := dst[63]
+	dst := (dst &lt;&lt; 1) OR tmp[0]
+	count := count - 1
+OD
+	</operation>
+	<instruction name='rol' form='r64, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned long' name='_lrotr'>
+	<type>Integer</type>
+	<category>Shift</category>
+	<parameter varname='a' type='unsigned long'/>
+	<parameter varname='shift' type='int'/>
+	<description>Shift the bits of unsigned 64-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift BITWISE AND 63
+DO WHILE (count &gt; 0)
+	tmp[63] := dst[0]
+	dst := (dst &gt;&gt; 1) OR tmp[63]
+	count := count - 1
+OD
+	</operation>
+	<instruction name='ror' form='r64, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+
+
+
+<intrinsic tech='Other' sequence='true' rettype='void' name='_allow_cpu_features'>
+	<category>General Support</category>
+	<parameter varname='a' type='unsigned __int64'/>
+	<description>Treat the processor-specific feature(s) specified in "a" as available. Multiple features may be OR'd together. See the valid feature flags below:</description>
+	<operation>
+_FEATURE_GENERIC_IA32
+_FEATURE_FPU
+_FEATURE_CMOV
+_FEATURE_MMX
+_FEATURE_FXSAVE
+_FEATURE_SSE
+_FEATURE_SSE2
+_FEATURE_SSE3
+_FEATURE_SSSE3
+_FEATURE_SSE4_1
+_FEATURE_SSE4_2
+_FEATURE_MOVBE
+_FEATURE_POPCNT
+_FEATURE_PCLMULQDQ
+_FEATURE_AES
+_FEATURE_F16C
+_FEATURE_AVX
+_FEATURE_RDRND
+_FEATURE_FMA
+_FEATURE_BMI
+_FEATURE_LZCNT
+_FEATURE_HLE
+_FEATURE_RTM
+_FEATURE_AVX2
+_FEATURE_KNCNI
+_FEATURE_AVX512F
+_FEATURE_ADX
+_FEATURE_RDSEED
+_FEATURE_AVX512ER
+_FEATURE_AVX512PF
+_FEATURE_AVX512CD
+_FEATURE_SHA
+_FEATURE_MPX
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' sequence='true' rettype='int' name='_may_i_use_cpu_feature'>
+	<category>General Support</category>
+	<parameter varname='a' type='unsigned __int64'/>
+	<description>Dynamically query the processor to determine if the processor-specific feature(s) specified in "a" are available, and return true or false (1 or 0) if the set of features is available. Multiple features may be OR'd together. This intrinsic does not check the processor vendor. See the valid feature flags below:</description>
+	<operation>
+_FEATURE_GENERIC_IA32
+_FEATURE_FPU
+_FEATURE_CMOV
+_FEATURE_MMX
+_FEATURE_FXSAVE
+_FEATURE_SSE
+_FEATURE_SSE2
+_FEATURE_SSE3
+_FEATURE_SSSE3
+_FEATURE_SSE4_1
+_FEATURE_SSE4_2
+_FEATURE_MOVBE
+_FEATURE_POPCNT
+_FEATURE_PCLMULQDQ
+_FEATURE_AES
+_FEATURE_F16C
+_FEATURE_AVX
+_FEATURE_RDRND
+_FEATURE_FMA
+_FEATURE_BMI
+_FEATURE_LZCNT
+_FEATURE_HLE
+_FEATURE_RTM
+_FEATURE_AVX2
+_FEATURE_KNCNI
+_FEATURE_AVX512F
+_FEATURE_ADX
+_FEATURE_RDSEED
+_FEATURE_AVX512ER
+_FEATURE_AVX512PF
+_FEATURE_AVX512CD
+_FEATURE_SHA
+_FEATURE_MPX
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_acos_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ACOS(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_acos_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ACOS(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_acosh_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ACOSH(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_acosh_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ACOSH(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_asin_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ASIN(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_asin_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ASIN(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_asinh_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ASINH(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_asinh_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ASINH(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_atan_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ATAN(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_atan_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ATAN(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_atan2_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ATAN(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_atan2_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ATAN(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_atanh_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ATANH(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_atanh_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ATANH(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_cbrt_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := CubeRoot(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cbrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := CubeRoot(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_cdfnorm_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := CDFNormal(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cdfnorm_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := CDFNormal(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_cdfnorminv_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := InverseCDFNormal(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cdfnorminv_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := InverseCDFNormal(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cexp_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the exponential value of "e" raised to the power of packed complex single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := e^(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_clog_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the natural logarithm of packed complex single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ln(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_cos_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := COS(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cos_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := COS(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_cosd_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := COSD(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cosd_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := COSD(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_cosh_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := COSH(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cosh_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := COSH(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_csqrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the square root of packed complex single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 8*j
+	dst[i+7:i] := TRUNCATE(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	dst[i+15:i] := TRUNCATE(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	dst[i+63:i] := TRUNCATE(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epu8'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 8*j
+	dst[i+7:i] := TRUNCATE(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epu16'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	dst[i+15:i] := TRUNCATE(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epu32'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epu64'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	dst[i+63:i] := TRUNCATE(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_erf_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ERF(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_erf_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ERF(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_erfc_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := 1.0 - ERF(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_erfc_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := 1.0 - ERF(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_erfcinv_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i]))
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_erfcinv_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := 1.0 / (1.0 - ERF(a[i+31:i]))
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_erfinv_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := 1.0 / ERF(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_erfinv_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := 1.0 / ERF(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_exp_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := e^(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_exp_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := e^(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_exp10_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := 10^(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_exp10_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := 10^(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_exp2_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := 2^(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_exp2_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := 2^(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_expm1_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := e^(a[i+63:i]) - 1.0
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_expm1_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := e^(a[i+31:i]) - 1.0
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_hypot_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i]^2 + b[i+63:i]^2)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_hypot_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i]^2 + b[i+31:i]^2)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_idiv_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_idivrem_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='mem_addr' type='__m128i *'/>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed 32-bit integers into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+	MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_invcbrt_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := InvCubeRoot(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_invcbrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := InvCubeRoot(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_invsqrt_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := InvSQRT(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_invsqrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := InvSQRT(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_irem_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_log_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ln(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_log_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ln(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_log10_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := log10(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_log10_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := log10(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_log1p_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ln(1.0 + a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_log1p_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ln(1.0 + a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_log2_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := log2(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_log2_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := log2(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_logb_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_logb_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_pow_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<parameter varname='b' type='__m128d'/>
+	<description>Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i])^(b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_pow_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<parameter varname='b' type='__m128'/>
+	<description>Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := (a[i+31:i])^(b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epi8'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 8*j
+	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epi16'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epi64'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epu8'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 8*j
+	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epu16'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epu32'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epu64'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_sin_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SIN(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_sin_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SIN(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_sincos_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='mem_addr' type='__m128d *'/>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SIN(a[i+63:i])
+	MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_sincos_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='mem_addr' type='__m128 *'/>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SIN(a[i+31:i])
+	MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_sind_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SIND(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_sind_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SIND(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_sinh_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SINH(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_sinh_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SINH(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_svml_ceil_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := CEIL(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_svml_ceil_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := CEIL(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_svml_floor_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := FLOOR(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_svml_floor_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := FLOOR(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_svml_round_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ROUND(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_svml_round_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ROUND(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_svml_sqrt_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_pd".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_svml_sqrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_tan_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := TAN(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_tan_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := TAN(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_tand_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := TAND(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_tand_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := TAND(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_tanh_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := TANH(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_tanh_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := TANH(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_trunc_pd'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := TRUNCATE(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_trunc_ps'>
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := TRUNCATE(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_udiv_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_udivrem_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='mem_addr' type='__m128i *'/>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed unsigned 32-bit integers into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+	MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_urem_epi32'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m128i'/>
+	<parameter varname='b' type='__m128i'/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_acos_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ACOS(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_acos_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ACOS(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_acosh_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ACOSH(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_acosh_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ACOSH(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_asin_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ASIN(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_asin_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ASIN(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_asinh_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ASINH(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_asinh_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ASINH(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_atan_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ATAN(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_atan_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ATAN(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_atan2_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ATAN(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_atan2_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ATAN(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_atanh_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ATANH(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_atanh_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ATANH(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_cbrt_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := CubeRoot(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cbrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := CubeRoot(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_cdfnorm_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := CDFNormal(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cdfnorm_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := CDFNormal(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_cdfnorminv_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := InverseCDFNormal(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cdfnorminv_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := InverseCDFNormal(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cexp_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the exponential value of "e" raised to the power of packed complex single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := e^(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_clog_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the natural logarithm of packed complex single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ln(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_cos_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := COS(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cos_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := COS(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_cosd_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := COSD(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cosd_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := COSD(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_cosh_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := COSH(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cosh_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := COSH(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_csqrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the square root of packed complex single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 8*j
+	dst[i+7:i] := TRUNCATE(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	dst[i+15:i] := TRUNCATE(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	dst[i+63:i] := TRUNCATE(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epu8'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 8*j
+	dst[i+7:i] := TRUNCATE(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epu16'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	dst[i+15:i] := TRUNCATE(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epu32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epu64'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	dst[i+63:i] := TRUNCATE(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_erf_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ERF(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_erf_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ERF(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_erfc_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := 1.0 - ERF(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_erfc_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := 1.0 - ERF(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_erfcinv_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i]))
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_erfcinv_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := 1.0 / (1.0 - ERF(a[i+31:i]))
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_erfinv_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := 1.0 / ERF(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_erfinv_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := 1.0 / ERF(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_exp_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := e^(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_exp_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := e^(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_exp10_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := 10^(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_exp10_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := 10^(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_exp2_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := 2^(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_exp2_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := 2^(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_expm1_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := e^(a[i+63:i]) - 1.0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_expm1_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := e^(a[i+31:i]) - 1.0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_hypot_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i]^2 + b[i+63:i]^2)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_hypot_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i]^2 + b[i+31:i]^2)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_idiv_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_idivrem_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='mem_addr' type='__m256i *'/>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed 32-bit integers into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+	MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_invcbrt_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := InvCubeRoot(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_invcbrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := InvCubeRoot(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_invsqrt_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := InvSQRT(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_invsqrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := InvSQRT(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_irem_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_log_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ln(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_log_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ln(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_log10_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := log10(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_log10_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := log10(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_log1p_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ln(1.0 + a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_log1p_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ln(1.0 + a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_log2_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := log2(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_log2_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := log2(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_logb_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_logb_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_pow_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<parameter varname='b' type='__m256d'/>
+	<description>Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := (a[i+63:i])^(b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_pow_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='b' type='__m256'/>
+	<description>Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := (a[i+31:i])^(b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epi8'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 8*j
+	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epi16'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epu8'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 8*j
+	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epu16'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epu32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epu64'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_sin_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SIN(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_sin_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SIN(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_sincos_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='mem_addr' type='__m256d *'/>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SIN(a[i+63:i])
+	MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_sincos_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='mem_addr' type='__m256 *'/>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SIN(a[i+31:i])
+	MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_sind_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SIND(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_sind_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SIND(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_sinh_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SINH(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_sinh_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SINH(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_svml_ceil_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := CEIL(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_svml_ceil_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := CEIL(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_svml_floor_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := FLOOR(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_svml_floor_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := FLOOR(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_svml_round_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ROUND(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_svml_round_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ROUND(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_svml_sqrt_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_pd".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_svml_sqrt_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_tan_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := TAN(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_tan_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := TAN(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_tand_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := TAND(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_tand_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := TAND(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_tanh_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := TANH(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_tanh_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := TANH(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_trunc_pd'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := TRUNCATE(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_trunc_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := TRUNCATE(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_udiv_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_udivrem_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='mem_addr' type='__m256i *'/>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed unsigned 32-bit integers into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+	MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_urem_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='__m256i'/>
+	<parameter varname='b' type='__m256i'/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='int' name='_popcnt32'>
+	<type>Integer</type>
+	<CPUID>POPCNT</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='int'/>
+	<description>
+		Count the number of bits set to 1 in 32-bit integer "a", and return that count in "dst". 
+	</description>
+	<operation>
+dst := 0
+FOR i := 0 to 31
+	IF a[i]
+		dst := dst + 1
+	FI
+ENDFOR
+	</operation>
+	<instruction name='popcnt' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='int' name='_popcnt64'>
+	<type>Integer</type>
+	<CPUID>POPCNT</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname='a' type='__int64'/>
+	<description>
+		Count the number of bits set to 1 in 64-bit integer "a", and return that count in "dst". 
+	</description>
+	<operation>
+dst := 0
+FOR i := 0 to 63
+	IF a[i]
+		dst := dst + 1
+	FI
+ENDFOR
+	</operation>
+	<instruction name='popcnt' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='__int64' name='_rdpmc'>
+	<category>General Support</category>
+	<parameter varname='a' type='int'/>
+	<description>Read the Performance Monitor Counter (PMC) specified by "a", and store up to 64-bits in "dst". The width of performance counters is implementation specific.</description>
+	<operation>
+dst[63:0] := ReadPMC(a)
+	</operation>
+	<instruction name='rdpmc' form=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='__int64' name='_rdtsc'>
+	<CPUID>TSC</CPUID>
+	<category>General Support</category>
+	<parameter varname='' type='void'/>
+	<description>Copy the current 64-bit value of the processor's time-stamp counter into "dst".</description>
+	<operation>
+dst[63:0] := TimeStampCounter
+	</operation>
+	<instruction name='rdtsc' form=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_rotl'>
+	<type>Integer</type>
+	<category>Shift</category>
+	<parameter varname='a' type='unsigned int'/>
+	<parameter varname='shift' type='int'/>
+	<description>Shift the bits of unsigned 32-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift BITWISE AND 31
+DO WHILE (count &gt; 0)
+	tmp[0] := dst[31]
+	dst := (dst &lt;&lt; 1) OR tmp[0]
+	count := count - 1
+OD
+	</operation>
+	<instruction name='rol' form='r32, int'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_rotr'>
+	<type>Integer</type>
+	<category>Shift</category>
+	<parameter varname='a' type='unsigned int'/>
+	<parameter varname='shift' type='int'/>
+	<description>Shift the bits of unsigned 32-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift BITWISE AND 31
+DO WHILE (count &gt; 0)
+	tmp[31] := dst[0]
+	dst := (dst &gt;&gt; 1) OR tmp[31]
+	count := count - 1
+OD
+	</operation>
+	<instruction name='ror' form='r32, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned short' name='_rotwl'>
+	<type>Integer</type>
+	<category>Shift</category>
+	<parameter varname='a' type='unsigned short'/>
+	<parameter varname='shift' type='int'/>
+	<description>Shift the bits of unsigned 16-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift BITWISE AND 15
+DO WHILE (count &gt; 0)
+	tmp[0] := dst[15]
+	dst := (dst &lt;&lt; 1) OR tmp[0]
+	count := count - 1
+OD
+	</operation>
+	<instruction name='rol' form='r16, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned short' name='_rotwr'>
+	<type>Integer</type>
+	<category>Shift</category>
+	<parameter varname='a' type='unsigned short'/>
+	<parameter varname='shift' type='int'/>
+	<description>Shift the bits of unsigned 16-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift BITWISE AND 15
+DO WHILE (count &gt; 0)
+	tmp[15] := dst[0]
+	dst := (dst &gt;&gt; 1) OR tmp[15]
+	count := count - 1
+OD
+	</operation>
+	<instruction name='ror' form='r16, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_rotl64'>
+	<type>Integer</type>
+	<category>Shift</category>
+	<parameter varname='a' type='unsigned __int64'/>
+	<parameter varname='shift' type='int'/>
+	<description>Shift the bits of unsigned 64-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift BITWISE AND 63
+DO WHILE (count &gt; 0)
+	tmp[0] := dst[63]
+	dst := (dst &lt;&lt; 1) OR tmp[0]
+	count := count - 1
+OD
+	</operation>
+	<instruction name='rol' form='r64, int'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_rotr64'>
+	<type>Integer</type>
+	<category>Shift</category>
+	<parameter varname='a' type='unsigned __int64'/>
+	<parameter varname='shift' type='int'/>
+	<description>Shift the bits of unsigned 64-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift BITWISE AND 63
+DO WHILE (count &gt; 0)
+	tmp[63] := dst[0]
+	dst := (dst &gt;&gt; 1) OR tmp[63]
+	count := count - 1
+OD
+	</operation>
+	<instruction name='ror' form='r64, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_xgetbv'>
+	<CPUID>XSAVE</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='a' type='unsigned int'/>
+	<description>Copy up to 64-bits from the value of the extended control register (XCR) specified by "a" into "dst". Currently only XFEATURE_ENABLED_MASK XCR is supported.</description>
+	<operation>
+dst[63:0] := XCR[a]
+	</operation>
+	<instruction name='xgetbv' form=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_xrstor'>
+	<CPUID>XSAVE</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<parameter varname='rs_mask' type='unsigned __int64'/>
+	<description>Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>
+st_mask = mem_addr.HEADER.XSTATE_BV[62:0]
+FOR i := 0 to 62
+	IF (rs_mask[i] AND XCR0[i])
+		IF st_mask[i]
+			CASE (i) OF
+			0: ProcessorState[x87 FPU] := mem_addr.FPUSSESave_Area[FPU]
+			1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE]
+			DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i]
+			ESAC
+		ELSE
+			// ProcessorExtendedState := Processor Supplied Values
+			CASE (i) OF
+			1: MXCSR := mem_addr.FPUSSESave_Area[SSE]
+			ESAC
+		FI
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name='xrstor' form='MEMmxsave'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_xrstor64'>
+	<CPUID>XSAVE</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<parameter varname='rs_mask' type='unsigned __int64'/>
+	<description>Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>
+st_mask = mem_addr.HEADER.XSTATE_BV[62:0]
+FOR i := 0 to 62
+	IF (rs_mask[i] AND XCR0[i])
+		IF st_mask[i]
+			CASE (i) OF
+			0: ProcessorState[x87 FPU] := mem_addr.FPUSSESave_Area[FPU]
+			1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE]
+			DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i]
+			ESAC
+		ELSE
+			// ProcessorExtendedState := Processor Supplied Values
+			CASE (i) OF
+			1: MXCSR := mem_addr.FPUSSESave_Area[SSE]
+			ESAC
+		FI
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name='xrstor64' form='MEMmxsave'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_xsave'>
+	<CPUID>XSAVE</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<parameter varname='save_mask' type='unsigned __int64'/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>
+mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name='xsave' form='MEMmxsave'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_xsave64'>
+	<CPUID>XSAVE</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<parameter varname='save_mask' type='unsigned __int64'/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>
+mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name='xsave64' form='MEMmxsave'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_xsaveopt'>
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSAVEOPT</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<parameter varname='save_mask' type='unsigned __int64'/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. The hardware may optimize the manner in which data is saved. The performance of this instruction will be equal to or better than using the XSAVE instruction.</description>
+	<operation>
+mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		2: mem_addr.EXT_SAVE_Area2[YMM] := ProcessorState[YMM]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name='xsaveopt' form='MEMmxsave'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_xsaveopt64'>
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSAVEOPT</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<parameter varname='save_mask' type='unsigned __int64'/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. The hardware may optimize the manner in which data is saved. The performance of this instruction will be equal to or better than using the XSAVE64 instruction.</description>
+	<operation>
+mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		2: mem_addr.EXT_SAVE_Area2[YMM] := ProcessorState[YMM]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name='xsaveopt64' form='MEMmxsave'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_xsetbv'>
+	<CPUID>XSAVE</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='a' type='unsigned int'/>
+	<parameter varname='val' type='unsigned __int64'/>
+	<description>Copy 64-bits from "val" to the extended control register (XCR) specified by "a". Currently only XFEATURE_ENABLED_MASK XCR is supported.</description>
+	<operation>
+XCR[a] := val[63:0]
+	</operation>
+	<instruction name='xsetbv' form=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+	
+	
+
+<intrinsic tech='Other' rettype='__m128i' name='_mm_loadu_si32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='void const*'/>
+	<description>Load unaligned 32-bit integer from memory into the first element of "dst".</description>
+	<operation>
+dst[31:0] := MEM[mem_addr+31:mem_addr]
+dst[MAX:32] := 0
+	</operation>
+	<instruction name='movd' form='xmm, m32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='void' name='_mm_storeu_si32'>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='void*'/>
+	<parameter varname='a' type='__m128i'/>
+	<description>Store 32-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[31:0]
+	</operation>
+	<instruction name='movd' form='m32, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='void' name='_mm_storeu_si16' sequence='true'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='void*'/>
+	<parameter varname='a' type='__m128i'/>
+	<description>Store 16-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+15:mem_addr] := a[15:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='__m128i' name='_mm_loadu_si64'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='void const*'/>
+	<description>Load unaligned 64-bit integer from memory into the first element of "dst".</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+63:mem_addr]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='movq' form='xmm, m64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='void' name='_mm_storeu_si64'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<parameter varname='mem_addr' type='void*'/>
+	<parameter varname='a' type='__m128i'/>
+	<description>Store 64-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+	</operation>
+	<instruction name='movq' form='m64, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='__m128i' name='_mm_loadu_si16' sequence='true'>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<parameter varname='mem_addr' type='void const*'/>
+	<description>Load unaligned 16-bit integer from memory into the first element of "dst".</description>
+	<operation>
+dst[15:0] := MEM[mem_addr+15:mem_addr]
+dst[MAX:16] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_readfsbase_u32'>
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<description>Read the FS segment base register and store the 32-bit result in "dst".</description>
+	<operation>
+dst[31:0] := FS_Segment_Base_Register;
+dst[63:32] := 0
+	</operation>
+	<instruction name='rdfsbase' form='r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_readfsbase_u64'>
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<description>Read the FS segment base register and store the 64-bit result in "dst".</description>
+	<operation>
+dst[63:0] := FS_Segment_Base_Register;
+	</operation>
+	<instruction name='rdfsbase' form='r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned int' name='_readgsbase_u32'>
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<description>Read the GS segment base register and store the 32-bit result in "dst".</description>
+	<operation>
+dst[31:0] := GS_Segment_Base_Register;
+dst[63:32] := 0
+	</operation>
+	<instruction name='rdgsbase' form='r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='unsigned __int64' name='_readgsbase_u64'>
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<description>Read the GS segment base register and store the 64-bit result in "dst".</description>
+	<operation>
+dst[63:0] := GS_Segment_Base_Register;
+	</operation>
+	<instruction name='rdgsbase' form='r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='int' name='_rdrand16_step'>
+	<type>Integer</type>
+	<CPUID>RDRAND</CPUID>
+	<category>Random</category>
+	<parameter varname='val' type='unsigned short*'/>
+	<description>Read a hardware generated 16-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise.
+</description>
+	<operation>
+IF HW_RND_GEN.ready = 1
+	val[15:0] := HW_RND_GEN.data;
+	RETURN 1;
+ELSE
+	val[15:0] := 0;
+	RETURN 0;
+FI
+	</operation>
+	<instruction name='rdrand' form='r16'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='int' name='_rdrand32_step'>
+	<type>Integer</type>
+	<CPUID>RDRAND</CPUID>
+	<category>Random</category>
+	<parameter varname='val' type='unsigned int*'/>
+	<description>Read a hardware generated 32-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise.
+</description>
+	<operation>
+IF HW_RND_GEN.ready = 1
+	val[31:0] := HW_RND_GEN.data;
+	RETURN 1;
+ELSE
+	val[31:0] := 0;
+	RETURN 0;
+FI
+	</operation>
+	<instruction name='rdrand' form='r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='int' name='_rdrand64_step'>
+	<type>Integer</type>
+	<CPUID>RDRAND</CPUID>
+	<category>Random</category>
+	<parameter varname='val' type='unsigned __int64*'/>
+	<description>Read a hardware generated 64-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise.
+</description>
+	<operation>
+IF HW_RND_GEN.ready = 1
+	val[63:0] := HW_RND_GEN.data;
+	RETURN 1;
+ELSE
+	val[63:0] := 0;
+	RETURN 0;
+FI
+	</operation>
+	<instruction name='rdrand' form='r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_writefsbase_u32'>
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<parameter varname='a' type='unsigned int'/>
+	<description>Write the unsigned 32-bit integer "a" to the FS segment base register.</description>
+	<operation>
+FS_Segment_Base_Register[31:0] := a[31:0];
+FS_Segment_Base_Register[63:32] := 0
+	</operation>
+	<instruction name='wrfsbase' form='r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_writefsbase_u64'>
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<parameter varname='a' type='unsigned __int64'/>
+	<description>Write the unsigned 64-bit integer "a" to the FS segment base register.</description>
+	<operation>
+FS_Segment_Base_Register[63:0] := a[63:0];
+	</operation>
+	<instruction name='wrfsbase' form='r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_writegsbase_u32'>
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<parameter varname='a' type='unsigned int'/>
+	<description>Write the unsigned 32-bit integer "a" to the GS segment base register.</description>
+	<operation>
+GS_Segment_Base_Register[31:0] := a[31:0];
+GS_Segment_Base_Register[63:32] := 0
+	</operation>
+	<instruction name='wrgsbase' form='r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_writegsbase_u64'>
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<parameter varname='a' type='unsigned __int64'/>
+	<description>Write the unsigned 64-bit integer "a" to the GS segment base register.</description>
+	<operation>
+GS_Segment_Base_Register[63:0] := a[63:0];
+	</operation>
+	<instruction name='wrgsbase' form='r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='__m256' name='_mm256_cvtph_ps'>
+	<type>Floating Point</type>
+	<CPUID>FP16C</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*16
+	dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtph2ps' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='Other' rettype='__m128i' name='_mm256_cvtps_ph'>
+	<type>Floating Point</type>
+	<CPUID>FP16C</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m256'/>
+	<parameter varname='rounding' type='int'/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 32*j
+	dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtps2ph' form='xmm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="int" name="_rdseed16_step">
+	<CPUID>RDSEED</CPUID>
+	<category>Random</category>
+	<parameter varname="val" type="unsigned short *"/>
+	<description>Read a 16-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise.</description>
+	<operation>
+IF HW_NRND_GEN.ready = 1 THEN
+	val[15:0] := HW_NRND_GEN.data
+	RETURN 1
+ELSE
+	val[15:0] := 0
+	RETURN 0
+FI
+	</operation>
+	<instruction name='rdseed' form='r16'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="int" name="_rdseed32_step">
+	<CPUID>RDSEED</CPUID>
+	<category>Random</category>
+	<parameter varname="val" type="unsigned int *"/>
+	<description>Read a 32-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise.</description>
+	<operation>
+IF HW_NRND_GEN.ready = 1 THEN
+	val[31:0] := HW_NRND_GEN.data
+	RETURN 1
+ELSE
+	val[31:0] := 0
+	RETURN 0
+FI
+	</operation>
+	<instruction name='rdseed' form='r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="int" name="_rdseed64_step">
+	<CPUID>RDSEED</CPUID>
+	<category>Random</category>
+	<parameter varname="val" type="unsigned __int64 *"/>
+	<description>Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise.</description>
+	<operation>
+IF HW_NRND_GEN.ready = 1 THEN
+	val[63:0] := HW_NRND_GEN.data
+	RETURN 1
+ELSE
+	val[63:0] := 0
+	RETURN 0
+FI
+	</operation>
+	<instruction name='rdseed' form='r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="unsigned char" name="_addcarry_u32">
+	<type>Integer</type>
+	<category>Arithmetic</category>
+	<parameter varname="c_in" type="unsigned char"/>
+	<parameter varname="a" type="unsigned int"/>
+	<parameter varname="b" type="unsigned int"/>
+	<parameter varname="out" type="unsigned int *"/>
+	<description>Add unsigned 32-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry flag), and store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
+	<operation>
+dst:out[31:0] := a[31:0] + b[31:0] + c_in;
+	</operation>
+	<instruction name='adc' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="unsigned char" name="_addcarry_u64">
+	<type>Integer</type>
+	<category>Arithmetic</category>
+	<parameter varname="c_in" type="unsigned char"/>
+	<parameter varname="a" type="unsigned __int64"/>
+	<parameter varname="b" type="unsigned __int64"/>
+	<parameter varname="out" type="unsigned __int64 *"/>
+		<description>Add unsigned 64-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry flag), and store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
+	<operation>
+dst:out[63:0] := a[63:0] + b[63:0] + c_in;
+	</operation>
+	<instruction name='adc' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="unsigned char" name="_subborrow_u32">
+	<type>Integer</type>
+	<category>Arithmetic</category>
+	<parameter varname="b_in" type="unsigned char"/>
+	<parameter varname="a" type="unsigned int"/>
+	<parameter varname="b" type="unsigned int"/>
+	<parameter varname="out" type="unsigned int *"/>
+	<description>Add unsigned 8-bit borrow "b_in" (carry flag) to unsigned 32-bit integer "b", and subtract the result from unsigned 32-bit integer "a". Store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
+	<operation>
+dst:out[31:0] := (a[31:0] - (b[31:0] + b_in));
+	</operation>
+	<instruction name='sbb' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="unsigned char" name="_subborrow_u64">
+	<type>Integer</type>
+	<category>Arithmetic</category>
+	<parameter varname="b_in" type="unsigned char"/>
+	<parameter varname="a" type="unsigned __int64"/>
+	<parameter varname="b" type="unsigned __int64"/>
+	<parameter varname="out" type="unsigned __int64 *"/>
+	<description>Add unsigned 8-bit borrow "b_in" (carry flag) to unsigned 64-bit integer "b", and subtract the result from unsigned 64-bit integer "a". Store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
+	<operation>
+dst:out[63:0] := (a[63:0] - (b[63:0] + b_in));
+	</operation>
+	<instruction name='sbb' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="unsigned char" name="_addcarryx_u32">
+	<type>Integer</type>
+	<CPUID>ADX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="c_in" type="unsigned char"/>
+	<parameter varname="a" type="unsigned int"/>
+	<parameter varname="b" type="unsigned int"/>
+	<parameter varname="out" type="unsigned int *"/>
+		<description>Add unsigned 32-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry or overflow flag), and store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
+	<operation>
+dst:out[31:0] := a[31:0] + b[31:0] + c_in;
+	</operation>
+	<instruction name='adcx' form='r32, r32'/>
+	<instruction name='adox' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="unsigned char" name="_addcarryx_u64">
+	<type>Integer</type>
+	<CPUID>ADX</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="c_in" type="unsigned char"/>
+	<parameter varname="a" type="unsigned __int64"/>
+	<parameter varname="b" type="unsigned __int64"/>
+	<parameter varname="out" type="unsigned __int64 *"/>
+	<description>Add unsigned 64-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry or overflow flag), and store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
+	<operation>
+dst:out[63:0] := a[63:0] + b[63:0] + c_in;
+	</operation>
+	<instruction name='adcx' form='r64, r64'/>
+	<instruction name='adox' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="__m128i" name="_mm_sha1msg1_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Perform an intermediate calculation for the next four SHA1 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst".</description>
+	<operation>
+W0 := a[127:96];
+W1 := a[95:64];
+W2 := a[63:32];
+W3 := a[31:0];
+W4 := b[127:96];
+W5 := b[95:64];
+
+dst[127:96] := W2 XOR W0;
+dst[95:64] := W3 XOR W1;
+dst[63:32] := W4 XOR W2;
+dst[31:0] := W5 XOR W3;
+	</operation>
+	<instruction name='sha1msg1' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="__m128i" name="_mm_sha1msg2_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Perform the final calculation for the next four SHA1 message values (unsigned 32-bit integers) using the intermediate result in "a" and the previous message values in "b", and store the result in "dst".</description>
+	<operation>
+W13 := b[95:64];
+W14 := b[63:32];
+W15 := b[31:0];
+W16 := (a[127:96] XOR W13) &lt;&lt;&lt; 1;
+W17 := (a[95:64] XOR W14) &lt;&lt;&lt; 1;
+W18 := (a[63:32] XOR W15) &lt;&lt;&lt; 1;
+W19 := (a[31:0] XOR W16) &lt;&lt;&lt; 1;
+
+dst[127:96] := W16;
+dst[95:64] := W17;
+dst[63:32] := W18;
+dst[31:0] := W19;
+	</operation>
+	<instruction name='sha1msg2' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="__m128i" name="_mm_sha1nexte_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Calculate SHA1 state variable E after four rounds of operation from the current SHA1 state variable "a", add that value to the scheduled values (unsigned 32-bit integers) in "b", and store the result in "dst".</description>
+	<operation>
+tmp := (a[127:96] &lt;&lt;&lt; 30);
+dst[127:96] := b[127:96] + tmp;
+dst[95:64] := b[95:64];
+dst[63:32] := b[63:32];
+dst[31:0] := b[31:0];
+	</operation>
+	<instruction name='sha1nexte' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="__m128i" name="_mm_sha1rnds4_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="func" type="const int"/>
+	<description>Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) from "a" and some pre-computed sum of the next 4 round message values (unsigned 32-bit integers), and state variable E from "b", and store the updated SHA1 state (A,B,C,D) in "dst". "func" contains the logic functions and round constants.</description>
+	<operation>
+IF (func[1:0] = 0) THEN
+	f() := f0(), K := K0;
+ELSE IF (func[1:0] = 1) THEN
+	f() := f1(), K := K1;
+ELSE IF (func[1:0] = 2) THEN
+	f() := f2(), K := K2;
+ELSE IF (func[1:0] = 3) THEN
+	f() := f3(), K := K3;
+FI;
+
+A := a[127:96];
+B := a[95:64];
+C := a[63:32];
+D := a[31:0];
+
+W[0] := b[127:96];
+W[1] := b[95:64];
+W[2] := b[63:32];
+W[3] := b[31:0];
+
+A[1] := f(B, C, D) + (A &lt;&lt;&lt; 5) + W[0] + K;
+B[1] := A;
+C[1] := B &lt;&lt;&lt; 30;
+D[1] := C;
+E[1] := D;
+
+FOR i = 1 to 3
+		A[i+1] := f(B[i], C[i], D[i]) + (A[i] &lt;&lt;&lt; 5) + W[i] + E[i] + K;
+		B[i+1] := A[i];
+		C[i+1] := B[i] &lt;&lt;&lt; 30;
+		D[i+1] := C[i];
+		E[i+1] := D[i];
+ENDFOR;
+
+dst[127:96] := A[4];
+dst[95:64] := B[4];
+dst[63:32] := C[4];
+dst[31:0] := D[4];
+	</operation>
+	<instruction name='sha1rnds4' form='xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="__m128i" name="_mm_sha256msg1_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Perform an intermediate calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst".</description>
+	<operation>
+W4 := b[31:0];
+W3 := a[127:96];
+W2 := a[95:64];
+W1 := a[63:32];
+W0 := a[31:0];
+
+dst[127:96] := W3 + sigma0(W4);
+dst[95:64] := W2 + sigma0(W3);
+dst[63:32] := W1 + sigma0(W2);
+dst[31:0] := W0 + sigma0(W1);
+	</operation>
+	<instruction name='sha256msg1' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="__m128i" name="_mm_sha256msg2_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Perform the final calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst"."</description>
+	<operation>
+W14 := b[95:64];
+W15 := b[127:96];
+W16 := a[31:0] + sigma1(W14);
+W17 := a[63:32] + sigma1(W15);
+W18 := a[95:64] + sigma1(W16);
+W19 := a[127:96] + sigma1(W17);
+
+dst[127:96] := W19;
+dst[95:64] := W18;
+dst[63:32] := W17;
+dst[31:0] := W16;
+	</operation>
+	<instruction name='sha256msg2' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="__m128i" name="_mm_sha256rnds2_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="k" type="__m128i"/>
+	<description>Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from "a", an initial SHA256 state (A,B,E,F) from "b", and a pre-computed sum of the next 2 round message values (unsigned 32-bit integers) and the corresponding round constants from "k", and store the updated SHA256 state (A,B,E,F) in "dst".</description>
+	<operation>
+A[0] := b[127:96];
+B[0] := b[95:64];
+C[0] := a[127:96];
+D[0] := a[95:64];
+E[0] := b[63:32];
+F[0] := b[31:0];
+G[0] := a[63:32];
+H[0] := a[31:0];
+
+W_K0 := k[31:0];
+W_K1 := k[63:32];
+
+FOR i = 0 to 1
+		A_(i+1) := Ch(E[i], F[i], G[i]) + sum1(E[i]) + WKi + H[i] + Maj(A[i], B[i], C[i]) + sum0(A[i]);
+		B_(i+1) := A[i];
+		C_(i+1) := B[i];
+		D_(i+1) := C[i];
+		E_(i+1) := Ch(E[i], F[i], G[i]) + sum1(E[i]) + WKi + H[i] + D[i];
+		F_(i+1) := E[i];
+		G_(i+1) := F[i];
+		H_(i+1) := G[i];
+ENDFOR;
+
+dst[127:96] := A[2];
+dst[95:64] := B[2];
+dst[63:32] := E[2];
+dst[31:0] := F[2];
+	</operation>
+	<instruction name='sha256rnds2' form='xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="void *" name="_bnd_set_ptr_bounds">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="srcmem" type="const void *"/>
+	<parameter varname="size" type="size_t"/>
+	<description>Make a pointer with the value of "srcmem" and bounds set to ["srcmem", "srcmem" + "size" - 1], and store the result in "dst".</description>
+	<operation>
+dst := srcmem;
+dst.LB := srcmem.LB;
+dst.UB := srcmem + size - 1;
+	</operation>
+	<instruction name='bndmk' form='bnd, m32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="void *" name="_bnd_narrow_ptr_bounds" sequence="true">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="q" type="const void *"/>
+	<parameter varname="r" type="const void *"/>
+	<parameter varname="size" type="size_t"/>
+	<description>Narrow the bounds for pointer "q" to the intersection of the bounds of "r" and the bounds ["q", "q" + "size" - 1], and store the result in "dst".</description>
+	<operation>
+dst := q;
+IF r.LB &gt; (q + size - 1) OR r.UB &lt; q THEN
+	dst.LB := 1;
+	dst.UB := 0;
+ELSE
+	dst.LB := MAX(r.LB, q);
+	dst.UB := MIN(r.UB, (q + size - 1));
+FI;
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="void *" name="_bnd_copy_ptr_bounds" sequence="true">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="q" type="const void *"/>
+	<parameter varname="r" type="const void *"/>
+	<description>Make a pointer with the value of "q" and bounds set to the bounds of "r" (e.g. copy the bounds of "r" to pointer "q"), and store the result in "dst".</description>
+	<operation>
+dst := q;
+dst.LB := r.LB;
+dst.UB := r.UB;
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="void *" name="_bnd_init_ptr_bounds" sequence="true">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="q" type="const void *"/>
+	<description>Make a pointer with the value of "q" and open bounds, which allow the pointer to access the entire virtual address space, and store the result in "dst".</description>
+	<operation>
+dst := q;
+dst.LB := 0;
+dst.UB := 0;
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="void" name="_bnd_store_ptr_bounds">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="ptr_addr" type="const void **"/>
+	<parameter varname="ptr_val" type="const void *"/>
+	<description>Stores the bounds of "ptr_val" pointer in memory at address "ptr_addr".</description>
+	<operation>
+MEM[ptr_addr].LB := ptr_val.LB;
+MEM[ptr_addr].UB := ptr_val.UB;
+	</operation>
+	<instruction name='bndstx' form='mib, bnd'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="void" name="_bnd_chk_ptr_lbounds">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="q" type="const void *"/>
+	<description>Checks if "q" is within its lower bound, and throws a #BR if not.</description>
+	<operation>
+IF q &lt; q.LB THEN
+	#BR;
+FI;
+	</operation>
+	<instruction name='bndcl' form='bnd, m32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="void" name="_bnd_chk_ptr_ubounds">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="q" type="const void *"/>
+	<description>Checks if "q" is within its upper bound, and throws a #BR if not.</description>
+	<operation>
+IF q &gt; q.UB THEN
+	#BR;
+FI;
+	</operation>
+	<instruction name='bndcu' form='bnd, m32'/>
+	<instruction name='bndcn' form='bnd, m32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="void" name="_bnd_chk_ptr_bounds">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="q" type="const void *"/>
+	<parameter varname="size" type="size_t"/>
+	<description>Checks if ["q", "q" + "size" - 1] is within the lower and upper bounds of "q" and throws a #BR if not.</description>
+	<operation>
+IF (q + size - 1) &lt; q.LB OR (q + size - 1) &gt; q.UB THEN
+	#BR;
+FI;
+	</operation>
+	<instruction name='bndcu' form='bnd, m32'/>
+	<instruction name='bndcn' form='bnd, m32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="const void *" name="_bnd_get_ptr_lbound" sequence="true">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="q" type="const void *"/>
+	<description>Return the lower bound of "q".</description>
+	<operation>
+dst := q.LB
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype="const void *" name="_bnd_get_ptr_ubound" sequence="true">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="q" type="const void *"/>
+	<description>Return the upper bound of "q".</description>
+	<operation>
+dst := q.UB
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="short" name="_loadbe_i16" sequence="true">
+	<category>Load</category>
+	<parameter varname="ptr" type="void const *"/>
+	<description>Loads a big-endian word (16-bit) value from address "ptr" and stores the result in "dst".
+	</description>
+	<operation>
+addr := MEM[ptr]
+FOR j := 0 to 1
+	i := j*8
+	dst[i+7:i] := addr[15-i:15-i-7]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="int" name="_loadbe_i32" sequence="true">
+	<category>Load</category>
+	<parameter varname="ptr" type="void const *"/>
+	<description>Loads a big-endian double word (32-bit) value from address "ptr" and stores the result in "dst".
+	</description>
+	<operation>
+addr := MEM[ptr]
+FOR j := 0 to 4
+	i := j*8
+	dst[i+7:i] := addr[31-i:31-i-7]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__int64" name="_loadbe_i64" sequence="true">
+	<category>Load</category>
+	<parameter varname="ptr" type="void const *"/>
+	<description>Loads a big-endian quad word (64-bit) value from address "ptr" and stores the result in "dst".
+	</description>
+	<operation>
+addr := MEM[ptr]
+FOR j := 0 to 8
+	i := j*8
+	dst[i+7:i] := addr[63-i:63-i-7]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_storebe_i16" sequence="true">
+	<category>Store</category>
+	<parameter varname="ptr" type="void *"/>
+	<parameter varname="data" type="short"/>
+	<description>Stores word-sized (16-bit) "data" to address "ptr" in big-endian format.
+	</description>
+	<operation>
+addr := MEM[ptr]
+FOR j := 0 to 1
+	i := j*8
+	addr[i+7:i] := data[15-i:15-i-7]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_storebe_i32" sequence="true">
+	<category>Store</category>
+	<parameter varname="ptr" type="void *"/>
+	<parameter varname="data" type="int"/>
+	<description>Stores double word-sized (32-bit) "data" to address "ptr" in big-endian format.
+	</description>
+	<operation>
+addr := MEM[ptr]
+FOR j := 0 to 4
+	i := j*8
+	addr[i+7:i] := data[31-i:31-i-7]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_storebe_i64" sequence="true">
+	<category>Store</category>
+	<parameter varname="ptr" type="void *"/>
+	<parameter varname="data" type="__int64"/>
+	<description>Stores quad word-sized (64-bit) "data" to address "ptr" in big-endian format.
+	</description>
+	<operation>
+addr := MEM[ptr]
+FOR j := 0 to 7
+	i := j*8
+	addr[i+7:i] := data[63-i:63-i-7]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_xsavec'>
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSAVEC</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<parameter varname='save_mask' type='unsigned __int64'/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsavec differs from xsave in that it uses compaction and that it may use init optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>
+mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name='xsavec' form='MEMmxsave'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_xsaves'>
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSS</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<parameter varname='save_mask' type='unsigned __int64'/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsaves differs from xsave in that it can save state components corresponding to bits set in IA32_XSS MSR and that it may use the modified optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>
+mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name='xsaves' form='MEMmxsave'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_xsavec64'>
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSAVEC</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<parameter varname='save_mask' type='unsigned __int64'/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsavec differs from xsave in that it uses compaction and that it may use init optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>
+mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name='xsavec64' form='MEMmxsave'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_xsaves64'>
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSS</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='void *'/>
+	<parameter varname='save_mask' type='unsigned __int64'/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsaves differs from xsave in that it can save state components corresponding to bits set in IA32_XSS MSR and that it may use the modified optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>
+mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name='xsavec64' form='MEMmxsave'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_xrstors'>
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSS</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='const void *'/>
+	<parameter varname='rs_mask' type='unsigned __int64'/>
+	<description>Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". xrstors differs from xrstor in that it can restore state components corresponding to bits set in the IA32_XSS MSR; xrstors cannot restore from an xsave area in which the extended region is in the standard form. State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>
+st_mask = mem_addr.HEADER.XSTATE_BV[62:0]
+FOR i := 0 to 62
+	IF (rs_mask[i] AND XCR0[i])
+		IF st_mask[i]
+			CASE (i) OF
+			0: ProcessorState[x87 FPU] := mem_addr.FPUSSESave_Area[FPU]
+			1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE]
+			DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i]
+			ESAC
+		ELSE
+			// ProcessorExtendedState := Processor Supplied Values
+			CASE (i) OF
+			1: MXCSR := mem_addr.FPUSSESave_Area[SSE]
+			ESAC
+		FI
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name='xrstors' form='MEMmxsave'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='void' name='_xrstors64'>
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSS</CPUID>
+	<category>OS-Targeted</category>
+	<parameter varname='mem_addr' type='const void *'/>
+	<parameter varname='rs_mask' type='unsigned __int64'/>
+	<description>Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". xrstors differs from xrstor in that it can restore state components corresponding to bits set in the IA32_XSS MSR; xrstors cannot restore from an xsave area in which the extended region is in the standard form. State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>
+st_mask = mem_addr.HEADER.XSTATE_BV[62:0]
+FOR i := 0 to 62
+	IF (rs_mask[i] AND XCR0[i])
+		IF st_mask[i]
+			CASE (i) OF
+			0: ProcessorState[x87 FPU] := mem_addr.FPUSSESave_Area[FPU]
+			1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE]
+			DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i]
+			ESAC
+		ELSE
+			// ProcessorExtendedState := Processor Supplied Values
+			CASE (i) OF
+			1: MXCSR := mem_addr.FPUSSESave_Area[SSE]
+			ESAC
+		FI
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name='xrstors64' form='MEMmxsave'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX' vexEq='TRUE' rettype='float' name='_mm256_cvtss_f32'>
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m256' />
+	<description>Copy the lower single-precision (32-bit) floating-point element of "a" to "dst".</description>
+	<operation>dst[31:0] := a[31:0]</operation>
+	<instruction name='movss' form='m32, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' vexEq='TRUE' rettype='double' name='_mm256_cvtsd_f64'>
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Copy the lower double-precision (64-bit) floating-point element of "a" to "dst".</description>
+	<operation>dst[63:0] := a[63:0]</operation>
+	<instruction name='movsd' form='m64, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX2' vexEq='TRUE' rettype='int' name='_mm256_cvtsi256_si32'>
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m256i'/>
+	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+	</operation>
+	<instruction name='movd' form='r32, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+
+
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kandn">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<parameter varname="b" type="__mmask16"/>
+	<description>Compute the bitwise NOT of 16-bit masks "a" and then AND with "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := (NOT a[15:0]) AND b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name='kandnw' form='k, k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kandn">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<parameter varname="b" type="__mmask16"/>
+	<description>Compute the bitwise NOT of 16-bit masks "a" and then AND with "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := (NOT a[15:0]) AND b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name='kandn' form='k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kand">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<parameter varname="b" type="__mmask16"/>
+	<description>Compute the bitwise AND of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] AND b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name='kandw' form='k, k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kand">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<parameter varname="b" type="__mmask16"/>
+	<description>Compute the bitwise AND of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] AND b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name='kand' form='k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kmov">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<description>Copy 16-bit mask "a" to "k".</description>
+	<operation>
+k[15:0] := a[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name='kmovw' form='k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kmov">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<description>Copy 16-bit mask "a" to "k".</description>
+	<operation>
+k[15:0] := a[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name='kmov' form='k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_knot">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<description>Compute the bitwise NOT of 16-bit mask "a", and store the result in "k".</description>
+	<operation>
+k[15:0] := NOT a[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name='knotw' form='k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_knot">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<description>Compute the bitwise NOT of 16-bit mask "a", and store the result in "k".</description>
+	<operation>
+k[15:0] := NOT a[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name='knot' form='k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kor">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<parameter varname="b" type="__mmask16"/>
+	<description>Compute the bitwise OR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] OR b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name='korw' form='k, k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kor">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<parameter varname="b" type="__mmask16"/>
+	<description>Compute the bitwise OR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] OR b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name='kor' form='k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kunpackb">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<parameter varname="b" type="__mmask16"/>
+	<description>Unpack and interleave 8 bits from masks "a" and "b", and store the 16-bit result in "k".</description>
+	<operation>
+k[7:0] := b[7:0]
+k[15:8] := a[7:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name='kunpckbw' form='k, k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kxnor">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<parameter varname="b" type="__mmask16"/>
+	<description>Compute the bitwise XNOR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := NOT (a[15:0] XOR b[15:0])
+k[MAX:16] := 0
+	</operation>
+	<instruction name='kxnorw' form='k, k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kxnor">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<parameter varname="b" type="__mmask16"/>
+	<description>Compute the bitwise XNOR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := NOT (a[15:0] XOR b[15:0])
+k[MAX:16] := 0
+	</operation>
+	<instruction name='kxnor' form='k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kxor">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<parameter varname="b" type="__mmask16"/>
+	<description>Compute the bitwise XOR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] XOR b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name='kxorw' form='k, k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kxor">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__mmask16"/>
+	<parameter varname="b" type="__mmask16"/>
+	<description>Compute the bitwise XOR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] XOR b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name='kxor' form='k, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vaddpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_add_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vaddpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vaddpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_add_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vaddpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vaddpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_add_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vaddpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vaddps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_add_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vaddps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vaddps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_add_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vaddps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vaddps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_add_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vaddps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_add_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := a[63:0] + b[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vaddsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_add_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+		[round_note]
+		</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] + b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vaddsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_add_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". </description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] + b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vaddsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_add_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+		[round_note]
+		</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] + b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vaddsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_add_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] + b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vaddsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_add_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+		[round_note]
+		</description>
+	<operation>
+dst[31:0] := a[31:0] + b[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vaddss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_add_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+		[round_note]
+		</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] + b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vaddss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_add_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". </description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] + b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vaddss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_add_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] + b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vaddss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_add_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] + b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vaddss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 64 bytes (16 elements) in "dst".</description>
+	<operation>
+temp[1023:512] := a[511:0]
+temp[511:0] := b[511:0]
+temp[1023:0] := temp[1023:0] &gt;&gt; (32*count)
+dst[511:0] := temp[511:0]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='valignd' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 64 bytes (16 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+temp[1023:512] := a[511:0]
+temp[511:0] := b[511:0]
+temp[1023:0] := temp[1023:0] &gt;&gt; (32*count)
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := temp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='valignd' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "count" 32-bit elements, and stores the low 64 bytes (16 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+temp[1023:512] := a[511:0]
+temp[511:0] := b[511:0]
+temp[1023:0] := temp[1023:0] &gt;&gt; (32*count)
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := temp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='valignd' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 64 bytes (8 elements) in "dst".</description>
+	<operation>
+temp[1023:512] := a[511:0]
+temp[511:0] := b[511:0]
+temp[1023:0] := temp[1023:0] &gt;&gt; (64*count)
+dst[511:0] := temp[511:0]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='valignq' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 64 bytes (8 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+temp[1023:512] := a[511:0]
+temp[511:0] := b[511:0]
+temp[1023:0] := temp[1023:0] &gt;&gt; (64*count)
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := temp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='valignq' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "count" 64-bit elements, and stores the low 64 bytes (8 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+temp[1023:512] := a[511:0]
+temp[511:0] := b[511:0]
+temp[1023:0] := temp[1023:0] &gt;&gt; (64*count)
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := temp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='valignq' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_blend_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vblendmpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_blend_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vblendmps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_broadcast_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 4)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcastf32x4' form='zmm {k}, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_broadcast_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcastf32x4' form='zmm {k}, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_broadcast_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcastf32x4' form='zmm {k}, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_broadcast_f64x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m256d"/>
+	<description>Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j mod 4)*64
+	dst[i+63:i] := a[n+63:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcastf64x4' form='zmm {k}, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_broadcast_f64x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j mod 4)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := src[n+63:n]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcastf64x4' form='zmm {k}, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_broadcast_f64x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j mod 4)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcastf64x4' form='zmm {k}, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcast_i32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 4)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcasti32x4' form='zmm {k}, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcast_i32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[n+31:n]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcasti32x4' form='zmm {k}, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcast_i32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcasti32x4' form='zmm {k}, m128'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcast_i64x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j mod 4)*64
+	dst[i+63:i] := a[n+63:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcasti64x4' form='zmm {k}, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcast_i64x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j mod 4)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := src[n+63:n]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcasti64x4' form='zmm {k}, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcast_i64x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j mod 4)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcasti64x4' form='zmm {k}, m256'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_broadcastsd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcastsd' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_broadcastsd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcastsd' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_broadcastsd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcastsd' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcastss' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcastss' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vbroadcastss' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmp_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmp_round_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="sae" type="const int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmpeq_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] == b[i+63:i]) ? 1 : 0
+ENDFOR	
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmple_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] &lt;= b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmplt_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] &lt; b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmpneq_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] != b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmpnle_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := !(a[i+63:i] &lt;= b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmpnlt_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := !(a[i+63:i] &lt; b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmpord_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 1 : 0 
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmpunord_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 1 : 0 
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmp_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmp_round_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="sae" type="const int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmpeq_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (a[i+63:i] == b[i+63:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR	
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmple_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (a[i+63:i] &lt;= b[i+63:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmplt_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (a[i+63:i] &lt; b[i+63:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmpneq_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (a[i+63:i] != b[i+63:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmpnle_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := !(a[i+63:i] &lt;= b[i+63:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmpnlt_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := !(a[i+63:i] &lt; b[i+63:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmpord_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmpunord_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmp_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmp_round_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="sae" type="const int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpeq_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] == b[i+31:i]) ? 1 : 0
+ENDFOR	
+k[MAX:16] := 0		
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmple_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] &lt;= b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmplt_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] &lt; b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpneq_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] != b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpnle_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := !(a[i+31:i] &lt;= b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpnlt_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := !(a[i+31:i] &lt; b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpord_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] != NaN AND b[i+31:i] != NaN) ? 1 : 0 
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpunord_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] == NaN OR b[i+31:i] == NaN) ? 1 : 0 
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmp_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmp_round_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="sae" type="const int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpeq_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>y
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := (a[i+31:i] == b[i+31:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR		
+k[MAX:16] := 0	
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmple_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := (a[i+31:i] &lt;= b[i+31:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmplt_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := (a[i+31:i] &lt; b[i+31:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpneq_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := (a[i+31:i] != b[i+31:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpnle_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := !(a[i+31:i] &lt;= b[i+31:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpnlt_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := !(a[i+31:i] &lt; b[i+31:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpord_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := (a[i+31:i] != NaN AND b[i+31:i] != NaN) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpunord_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := (a[i+31:i] == NaN OR b[i+31:i] == NaN) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_round_sd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="sae" type="const int"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+
+k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0
+
+k[MAX:1] := 0
+	</operation>
+	<instruction name='vcmpsd' form='k {k}, xmm, xmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_sd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+
+k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0
+
+k[MAX:1] := 0
+	</operation>
+	<instruction name='vcmpsd' form='k {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_round_sd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="sae" type="const int"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set).
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+
+IF k1[0]
+	k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0
+ELSE
+	k[0] := 0
+FI
+k[MAX:1] := 0
+	</operation>
+	<instruction name='vcmpsd' form='k {k}, xmm, xmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_sd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). </description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+
+IF k1[0]
+	k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0
+ELSE
+	k[0] := 0
+FI
+k[MAX:1] := 0
+	</operation>
+	<instruction name='vcmpsd' form='k {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_round_ss_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="sae" type="const int"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+
+k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0
+
+k[MAX:1] := 0
+	</operation>
+	<instruction name='vcmpss' form='k {k}, xmm, xmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_ss_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+
+k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0
+
+k[MAX:1] := 0
+	</operation>
+	<instruction name='vcmpss' form='k {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_round_ss_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="sae" type="const int"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+
+IF k1[0]
+	k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0
+ELSE
+	k[0] := 0
+FI
+k[MAX:1] := 0
+	</operation>
+	<instruction name='vcmpss' form='k {k}, xmm, xmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_ss_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). </description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+
+IF k1[0]
+	k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0
+ELSE
+	k[0] := 0
+FI
+k[MAX:1] := 0
+	</operation>
+	<instruction name='vcmpss' form='k {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_comi_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="sae" type="const int"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1).
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+
+RETURN ( a[63:0] OP b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name='vcomisd' form='xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_comi_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="sae" type="const int"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1).
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+
+RETURN ( a[31:0] OP b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name='vcomiss' form='xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_compress_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := src[511:m]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcompresspd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_compressstoreu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 64
+m := base_addr
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vcompresspd' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_compress_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcompresspd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_compress_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := src[511:m]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcompressps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_compressstoreu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 32
+m := base_addr
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vcompressps' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_compress_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcompressps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtdq2pd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+	ELSE
+		dst[m+63:m] := src[m+63:m]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtdq2pd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+	ELSE
+		dst[m+63:m] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtdq2pd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_cvt_roundepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtdq2ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtdq2ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_cvt_roundepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtdq2ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtdq2ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_cvt_roundepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtdq2ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtdq2ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvt_roundpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2dq' form='ymm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2dq' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvt_roundpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2dq' form='ymm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2dq' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvt_roundpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2dq' form='ymm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2dq' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_cvt_roundpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2ps' form='ymm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2ps' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_cvt_roundpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2ps' form='ymm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2ps' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_cvt_roundpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2ps' form='ymm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2ps' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvt_roundpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2udq' form='ymm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2udq' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvt_roundpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2udq' form='ymm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2udq' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvt_roundpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2udq' form='ymm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtpd2udq' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_cvt_roundph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*16
+	dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtph2ps' form='zmm {k}, ymm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*16
+	dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtph2ps' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_cvt_roundph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtph2ps' form='zmm {k}, ymm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtph2ps' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_cvt_roundph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtph2ps' form='zmm {k}, ymm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtph2ps' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvt_roundps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2dq' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2dq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvt_roundps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2dq' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2dq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvt_roundps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2dq' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2dq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvt_roundps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2pd' form='zmm {k}, ymm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvtps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2pd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvt_roundps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2pd' form='zmm {k}, ymm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvtps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2pd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvt_roundps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2pd' form='zmm {k}, ymm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvtps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2pd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 32*j
+	dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtps2ph' form='ymm {k}, zmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 32*j
+	dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtps2ph' form='ymm {k}, zmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtps2ph' form='ymm {k}, zmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtps2ph' form='ymm {k}, zmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtps2ph' form='ymm {k}, zmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvtps2ph' form='ymm {k}, zmm {sae}, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvt_roundps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2udq' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2udq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvt_roundps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2udq' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2udq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvt_roundps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2udq' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtps2udq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_cvt_roundsd_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32(a[63:0])
+	</operation>
+	<instruction name='vcvtsd2si' form='r32, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvt_roundsd_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64(a[63:0])
+	</operation>
+	<instruction name='vcvtsd2si' form='r64, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_cvt_roundsd_si32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32(a[63:0])
+	</operation>
+	<instruction name='vcvtsd2si' form='r32, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvt_roundsd_si64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64(a[63:0])
+	</operation>
+	<instruction name='vcvtsd2si' form='r64, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_cvtsd_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32(a[63:0])
+	</operation>
+	<instruction name='vcvtsd2si' form='r32, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvtsd_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64(a[63:0])
+	</operation>
+	<instruction name='vcvtsd2si' form='r64, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundsd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_FP32(b[63:0])
+dst[127:32] := a[127:31]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vcvtsd2ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvt_roundsd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := Convert_FP64_To_FP32(b[63:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:31]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vcvtsd2ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvtsd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := Convert_FP64_To_FP32(b[63:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:31]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vcvtsd2ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvt_roundsd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := Convert_FP64_To_FP32(b[63:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:31]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vcvtsd2ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvtsd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := Convert_FP64_To_FP32(b[63:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:31]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vcvtsd2ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvt_roundsd_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_UnsignedInt32(a[63:0])
+	</operation>
+	<instruction name='vcvtsd2usi' form='r32, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvt_roundsd_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_UnsignedInt64(a[63:0])
+	</operation>
+	<instruction name='vcvtsd2usi' form='r64, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvtsd_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_UnsignedInt32(a[63:0])
+	</operation>
+	<instruction name='vcvtsd2usi' form='r32, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvtsd_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_UnsignedInt64(a[63:0])
+	</operation>
+	<instruction name='vcvtsd2usi' form='r64, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvt_roundi64_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__int64"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_Int64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtsi2sd' form='xmm, xmm, r64 {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvt_roundsi64_sd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__int64"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_Int64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtsi2sd' form='xmm, xmm, r64 {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvti32_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="int"/>
+	<description>Convert the 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
+	<operation>
+dst[63:0] := Convert_Int32_To_FP64(b[31:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtsi2sd' form='xmm, xmm, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvti64_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__int64"/>
+	<description>Convert the 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
+	<operation>
+dst[63:0] := Convert_Int64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtsi2sd' form='xmm, xmm, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundi32_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtsi2ss' form='xmm, xmm, r32 {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundi64_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__int64"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_Int64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtsi2ss' form='xmm, xmm, r64 {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundsi32_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtsi2ss' form='xmm, xmm, r32 {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundsi64_ss">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__int64"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_Int64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtsi2ss' form='xmm, xmm, r64 {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvti32_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="int"/>
+	<description>Convert the 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtsi2ss' form='xmm, xmm, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvti64_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__int64"/>
+	<description>Convert the 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtsi2ss' form='xmm, xmm, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvt_roundss_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_FP64(b[31:0])
+dst[127:64] := a[127:64]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vcvtss2sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_cvt_roundss_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[63:0] := Convert_FP32_To_FP64(b[31:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vcvtss2sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_cvtss_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	</description>
+	<operation>
+IF k[0]
+	dst[63:0] := Convert_FP32_To_FP64(b[31:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vcvtss2sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_cvt_roundss_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". 
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[63:0] := Convert_FP32_To_FP64(b[31:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vcvtss2sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_cvtss_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := Convert_FP32_To_FP64(b[31:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vcvtss2sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_cvt_roundss_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32(a[31:0])
+	</operation>
+	<instruction name='vcvtss2si' form='r32, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvt_roundss_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64(a[31:0])
+	</operation>
+	<instruction name='vcvtss2si' form='r64, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_cvt_roundss_si32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32(a[31:0])
+	</operation>
+	<instruction name='vcvtss2si' form='r32, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvt_roundss_si64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64(a[31:0])
+	</operation>
+	<instruction name='vcvtss2si' form='r64, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_cvtss_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32(a[31:0])
+	</operation>
+	<instruction name='vcvtss2si' form='r32, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvtss_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64(a[31:0])
+	</operation>
+	<instruction name='vcvtss2si' form='r64, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvt_roundss_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_UnsignedInt32(a[31:0])
+	</operation>
+	<instruction name='vcvtss2usi' form='r32, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvt_roundss_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_UnsignedInt64(a[31:0])
+	</operation>
+	<instruction name='vcvtss2usi' form='r64, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvtss_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_UnsignedInt32(a[31:0])
+	</operation>
+	<instruction name='vcvtss2usi' form='r32, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvtss_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_UnsignedInt64(a[31:0])
+	</operation>
+	<instruction name='vcvtss2usi' form='r64, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtt_roundpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*i
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_IntegerTruncate(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvttpd2dq' form='ymm {k}, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvttpd2dq' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtt_roundpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*i
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_IntegerTruncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvttpd2dq' form='ymm {k}, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvttpd2dq' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtt_roundpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*i
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_IntegerTruncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvttpd2dq' form='ymm {k}, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvttpd2dq' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtt_roundpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*i
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UnsignedIntegerTruncate(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvttpd2udq' form='ymm {k}, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvttpd2udq' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtt_roundpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*i
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedIntegerTruncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvttpd2udq' form='ymm {k}, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvttpd2udq' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtt_roundpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*i
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedIntegerTruncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvttpd2udq' form='ymm {k}, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vcvttpd2udq' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtt_roundps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*i
+	dst[i+31:i] := Convert_FP32_To_IntegerTruncate(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvttps2dq' form='zmm {k}, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvttps2dq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtt_roundps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions. </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*i
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_IntegerTruncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvttps2dq' form='zmm {k}, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvttps2dq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtt_roundps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*i
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_IntegerTruncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvttps2dq' form='zmm {k}, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvttps2dq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtt_roundps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*i
+	dst[i+31:i] := Convert_FP32_To_UnsignedIntegerTruncate(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvttps2udq' form='zmm {k}, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UnsignedInt32_Truncate(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvttps2udq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtt_roundps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*i
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UnsignedIntegerTruncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvttps2udq' form='zmm {k}, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvttps2udq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtt_roundps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*i
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UnsignedIntegerTruncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvttps2udq' form='zmm {k}, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvttps2udq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_cvtt_roundsd_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0])
+	</operation>
+	<instruction name='vcvttsd2si' form='r32, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvtt_roundsd_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
+	</operation>
+	<instruction name='vcvttsd2si' form='r64, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_cvtt_roundsd_si32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0])
+	</operation>
+	<instruction name='vcvttsd2si' form='r32, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvtt_roundsd_si64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
+	</operation>
+	<instruction name='vcvttsd2si' form='r64, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_cvttsd_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0])
+	</operation>
+	<instruction name='vcvttsd2si' form='r32, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvttsd_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
+	</operation>
+	<instruction name='vcvttsd2si' form='r64, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvtt_roundsd_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_UnsignedInt32_Truncate(a[63:0])
+	</operation>
+	<instruction name='vcvttsd2usi' form='r32, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvtt_roundsd_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_UnsignedInt64_Truncate(a[63:0])
+	</operation>
+	<instruction name='vcvttsd2usi' form='r64, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvttsd_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_UnsignedInt32_Truncate(a[63:0])
+	</operation>
+	<instruction name='vcvttsd2usi' form='r32, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvttsd_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_UnsignedInt64_Truncate(a[63:0])
+	</operation>
+	<instruction name='vcvttsd2usi' form='r64, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_cvtt_roundss_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
+	</operation>
+	<instruction name='vcvttss2si' form='r32, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvtt_roundss_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0])
+	</operation>
+	<instruction name='vcvttss2si' form='r64, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_cvtt_roundss_si32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
+	</operation>
+	<instruction name='vcvttss2si' form='r32, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvtt_roundss_si64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0])
+	</operation>
+	<instruction name='vcvttss2si' form='r64, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm_cvttss_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
+	</operation>
+	<instruction name='vcvttss2si' form='r32, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvttss_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0])
+	</operation>
+	<instruction name='vcvttss2si' form='r64, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvtt_roundss_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_UnsignedInt32_Truncate(a[31:0])
+	</operation>
+	<instruction name='vcvttss2usi' form='r32, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvtt_roundss_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_UnsignedInt64_Truncate(a[31:0])
+	</operation>
+	<instruction name='vcvttss2usi' form='r64, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvttss_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_UnsignedInt32_Truncate(a[31:0])
+	</operation>
+	<instruction name='vcvttss2usi' form='r32, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvttss_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_UnsignedInt64_Truncate(a[31:0])
+	</operation>
+	<instruction name='vcvttss2usi' form='r64, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtudq2pd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtudq2pd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtudq2pd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_cvt_roundepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := ConvertUnsignedInt32_To_FP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtudq2ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_cvtepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := ConvertUnsignedInt32_To_FP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtudq2ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_cvt_roundepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertUnsignedInt32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtudq2ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_cvtepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertUnsignedInt32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtudq2ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_cvt_roundepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := ConvertUnsignedInt32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtudq2ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_cvtepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := ConvertUnsignedInt32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vcvtudq2ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvt_roundu64_sd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="unsigned __int64"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the unsigned 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := Convert_UnsignedInt64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtusi2sd' form='xmm, xmm, r64 {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvtu32_sd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="unsigned int"/>
+	<description>Convert the unsigned 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := Convert_UnsignedInt32_To_FP64(b[31:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtusi2sd' form='xmm, xmm, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvtu64_sd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="unsigned __int64"/>
+	<description>Convert the unsigned 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := Convert_UnsignedInt64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtusi2sd' form='xmm, xmm, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundu32_ss">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="unsigned int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the unsigned 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_UnsignedInt32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtusi2ss' form='xmm, xmm, r32 {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundu64_ss">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="unsigned __int64"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the unsigned 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := Convert_UnsignedInt64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtusi2ss' form='xmm, xmm, r64 {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvtu32_ss">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="unsigned int"/>
+	<description>Convert the unsigned 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_UnsignedInt32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtusi2ss' form='xmm, xmm, r32 {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvtu64_ss">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="unsigned __int64"/>
+	<description>Convert the unsigned 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_UnsignedInt64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vcvtusi2ss' form='xmm, xmm, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	dst[i+63:i] := a[i+63:i] / b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vdivpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_div_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", =and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	dst[i+63:i] := a[i+63:i] / b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vdivpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vdivpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_div_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>
+	Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vdivpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vdivpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_div_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vdivpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := a[i+31:i] / b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vdivps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_div_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := a[i+31:i] / b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vdivps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vdivps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_div_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>
+	Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vdivps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vdivps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_div_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vdivps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_div_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+		[round_note]
+		</description>
+	<operation>
+dst[63:0] := a[63:0] / b[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vdivsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_div_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". 
+		[round_note]
+		</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] / b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vdivsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_div_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". </description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] / b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vdivsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_div_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+		[round_note]
+		</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] / b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vdivsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_div_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] / b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vdivsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_div_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+		[round_note]
+		</description>
+	<operation>
+dst[31:0] := a[31:0] / b[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vdivss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_div_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+		[round_note]
+		</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] / b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vdivss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_div_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". </description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] / b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vdivss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_div_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+		[round_note]
+		</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] / b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vdivss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_div_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] / b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vdivss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_expand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexpandpd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_expandloadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexpandpd' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_expand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexpandpd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_expandloadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexpandpd' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_expand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexpandps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_expandloadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexpandps' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_expand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexpandps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_expandloadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexpandps' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm512_extractf32x4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+2: dst[127:0] := a[383:256]
+3: dst[127:0] := a[511:384]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vextractf32x4' form='xmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm512_mask_extractf32x4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vextractf32x4' form='xmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm512_maskz_extractf32x4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vextractf32x4' form='xmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm512_extractf64x4_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[255:0] := a[255:0]
+1: dst[255:0] := a[511:256]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vextractf64x4' form='ymm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm512_mask_extractf64x4_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vextractf64x4' form='ymm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm512_maskz_extractf64x4_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vextractf64x4' form='ymm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_extracti32x4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+2: dst[127:0] := a[383:256]
+3: dst[127:0] := a[511:384]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vextracti32x4' form='xmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_extracti32x4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+2: dst[127:0] := a[383:256]
+3: dst[127:0] := a[511:384]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vextracti32x4' form='xmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_extracti32x4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+2: dst[127:0] := a[383:256]
+3: dst[127:0] := a[511:384]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vextracti32x4' form='xmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_extracti64x4_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[255:0] := a[255:0]
+1: dst[255:0] := a[511:256]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vextracti64x4' form='ymm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_extracti64x4_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[255:0] := a[255:0]
+1: dst[255:0] := a[511:256]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vextracti64x4' form='ymm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_extracti64x4_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[255:0] := a[255:0]
+1: dst[255:0] := a[511:256]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vextracti64x4' form='ymm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfixupimmpd' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_fixupimm_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.
+	[round_note]
+	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfixupimmpd' form='zmm {k}, zmm, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfixupimmpd' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_fixupimm_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.
+	[round_note]
+	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfixupimmpd' form='zmm {k}, zmm, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfixupimmpd' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fixupimm_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.
+	[round_note]
+	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfixupimmpd' form='zmm {k}, zmm, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfixupimmps' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_fixupimm_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.
+	[round_note]
+	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfixupimmps' form='zmm {k}, zmm, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfixupimmps' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_fixupimm_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.
+	[round_note]
+	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfixupimmps' form='zmm {k}, zmm, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfixupimmps' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fixupimm_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.
+	[round_note]
+	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfixupimmps' form='zmm {k}, zmm, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fixupimm_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.
+	[round_note]
+	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfixupimmsd' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fixupimm_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfixupimmsd' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fixupimm_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.
+	[round_note]
+	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+IF k[0]
+	dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfixupimmsd' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fixupimm_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+IF k[0]
+	dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfixupimmsd' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fixupimm_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.
+	[round_note]
+	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+IF k[0]
+	dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfixupimmsd' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fixupimm_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+IF k[0]
+	dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfixupimmsd' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fixupimm_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.
+	[round_note]
+	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfixupimmss' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fixupimm_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfixupimmss' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fixupimm_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.
+	[round_note]
+	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+IF k[0]
+	dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfixupimmss' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fixupimm_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+IF k[0]
+	dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfixupimmss' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fixupimm_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.
+	[round_note]
+	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+IF k[0]
+	dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfixupimmss' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fixupimm_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+IF k[0]
+	dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfixupimmss' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE 
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "a" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		a[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF (j is even) 
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_fmaddsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF (j is even) 
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask3_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask3_fmaddsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE 
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_fmaddsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmaddsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF (j is even) 
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_fmaddsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF (j is even) 
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask3_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask3_fmaddsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_fmaddsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmaddsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". 
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF (j is even) 
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_fmsubadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF (j is even) 
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask3_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask3_fmsubadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_fmsubadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmsubadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF (j is even) 
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_fmsubadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF (j is even) 
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask3_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask3_fmsubadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_fmsubadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmsubadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fnmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".
+	 [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fnmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fnmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fnmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	a[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fnmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".  
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fnmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fnmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fnmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fnmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fnmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fnmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fnmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fnmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fnmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fnmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fnmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fnmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fnmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fnmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fnmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fnmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fnmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fnmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst".  
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fnmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fnmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fnmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="c" type="__m512d"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fnmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fnmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fnmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fnmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="c" type="__m512"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
+	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fnmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fnmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fnmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fnmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fnmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fnmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fnmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fnmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", subtract the lower element in "c" from the negated intermediate result, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fnmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", subtract the lower element in "c" from the negated intermediate result, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fnmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fnmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fnmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fnmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
+	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fnmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm'/>
+	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgatherdpd' form='zmm {k}, vm32y'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	IF k[j]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+k[MAX:8] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgatherdpd' form='zmm {k}, vm32y'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgatherdps' form='zmm {k}, vm32z'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>
+	Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+k[MAX:16] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgatherdps' form='zmm {k}, vm32z'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgatherqpd' form='zmm {k}, vm32z'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+k[MAX:8] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgatherqpd' form='zmm {k}, vm32z'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+		<category>Load</category>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>
+	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vgatherqps' form='ymm {k}, vm64z'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>
+	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+k[MAX:8] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vgatherqps' form='ymm {k}, vm64z'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetexppd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_getexp_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetexppd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetexppd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_getexp_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetexppd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetexppd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_getexp_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetexppd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetexpps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_getexp_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetexpps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetexpps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_getexp_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetexpps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetexpps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_getexp_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetexpps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_getexp_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := ConvertExpFP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetexpsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_getexp_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
+	<operation>
+dst[63:0] := ConvertExpFP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetexpsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_getexp_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[63:0] := ConvertExpFP64(b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetexpsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_getexp_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := ConvertExpFP64(b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetexpsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_getexp_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[63:0] := ConvertExpFP64(b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetexpsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_getexp_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := ConvertExpFP64(b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetexpsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_getexp_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := ConvertExpFP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetexpss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_getexp_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
+	<operation>
+dst[31:0] := ConvertExpFP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetexpss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_getexp_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := ConvertExpFP32(b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetexpss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_getexp_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := ConvertExpFP32(b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetexpss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_getexp_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := ConvertExpFP32(b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetexpss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_getexp_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := ConvertExpFP32(b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetexpss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetmantpd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_getmant_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetmantpd' form='zmm {k}, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetmantpd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_getmant_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetmantpd' form='zmm {k}, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetmantpd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_getmant_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetmantpd' form='zmm {k}, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetmantps' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_getmant_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetmantps' form='zmm {k}, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetmantps' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_getmant_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetmantps' form='zmm {k}, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetmantps' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_getmant_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vgetmantps' form='zmm {k}, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_getmant_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][round_note]</description>
+	<operation>
+dst[63:0] := GetNormalizedMantissa(a[63:0], sc, interv)
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetmantsd' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_getmant_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+dst[63:0] := GetNormalizedMantissa(a[63:0], sc, interv)
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetmantsd' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_getmant_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := GetNormalizedMantissa(a[63:0], sc, interv)
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetmantsd' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_getmant_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := GetNormalizedMantissa(a[63:0], sc, interv)
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetmantsd' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_getmant_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := GetNormalizedMantissa(a[63:0], sc, interv)
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetmantsd' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_getmant_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := GetNormalizedMantissa(a[63:0], sc, interv)
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetmantsd' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_getmant_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][round_note]</description>
+	<operation>
+dst[31:0] := GetNormalizedMantissa(a[31:0], sc, interv)
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetmantss' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_getmant_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+dst[31:0] := GetNormalizedMantissa(a[31:0], sc, interv)
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetmantss' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_getmant_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := GetNormalizedMantissa(a[31:0], sc, interv)
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetmantss' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_getmant_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := GetNormalizedMantissa(a[31:0], sc, interv)
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetmantss' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_getmant_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := GetNormalizedMantissa(a[31:0], sc, interv)
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetmantss' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_getmant_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := GetNormalizedMantissa(a[31:0], sc, interv)
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vgetmantss' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_insertf32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE (imm8[1:0]) of
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+2: dst[383:256] := b[127:0]
+3: dst[511:384] := b[127:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vinsertf32x4' form='zmm {k}, zmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_insertf32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vinsertf32x4' form='zmm {k}, zmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_insertf32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vinsertf32x4' form='zmm {k}, zmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_insertf64x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "dst", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE (imm8[0]) of
+0: dst[255:0] := b[255:0]
+1: dst[511:256] := b[255:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vinsertf64x4' form='zmm {k}, zmm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_insertf64x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[0]) of
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vinsertf64x4' form='zmm {k}, zmm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_insertf64x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[0]) of
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vinsertf64x4' form='zmm {k}, zmm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_inserti32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE (imm8[1:0]) of
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+2: dst[383:256] := b[127:0]
+3: dst[511:384] := b[127:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vinserti32x4' form='zmm {k}, zmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_inserti32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vinserti32x4' form='zmm {k}, zmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_inserti32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vinserti32x4' form='zmm {k}, zmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_inserti64x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "dst", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE (imm8[7:0]) OF
+0: dst[255:0] := b[255:0]
+1: dst[511:256] := b[255:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vinserti64x4' form='zmm {k}, zmm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_inserti64x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[0]) of
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vinserti64x4' form='zmm {k}, zmm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_inserti64x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[0]) of
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vinserti64x4' form='zmm {k}, zmm, ymm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmaxpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_max_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmaxpd' form='zmm {k}, zmm, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmaxpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_max_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmaxpd' form='zmm {k}, zmm, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmaxpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_max_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmaxpd' form='zmm {k}, zmm, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmaxps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_max_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmaxps' form='zmm {k}, zmm, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmaxps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_max_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmaxps' form='zmm {k}, zmm, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmaxps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_max_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmaxps' form='zmm {k}, zmm, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_max_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MAX(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmaxsd' form='xmm {k}, xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_max_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MAX(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmaxsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_max_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MAX(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmaxsd' form='xmm {k}, xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_max_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MAX(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmaxsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_max_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+dst[63:0] := MAX(a[63:0], b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmaxsd' form='xmm {k}, xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_max_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MAX(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmaxss' form='xmm {k}, xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_max_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MAX(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmaxss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_max_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MAX(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmaxss' form='xmm {k}, xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_max_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MAX(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmaxss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_max_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+dst[31:0] := MAX(a[31:0], b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmaxss' form='xmm {k}, xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vminpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_min_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vminpd' form='zmm {k}, zmm, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vminpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_min_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vminpd' form='zmm {k}, zmm, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vminpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_min_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vminpd' form='zmm {k}, zmm, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vminps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_min_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vminps' form='zmm {k}, zmm, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vminps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_min_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vminps' form='zmm {k}, zmm, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vminps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_min_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vminps' form='zmm {k}, zmm, zmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_min_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MIN(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vminsd' form='xmm {k}, xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_min_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MIN(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vminsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_min_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MIN(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vminsd' form='xmm {k}, xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_min_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MIN(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vminsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_min_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" , and copy the upper element from "a" to the upper element of "dst".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+dst[63:0] := MIN(a[63:0], b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vminsd' form='xmm {k}, xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_min_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MIN(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vminss' form='xmm {k}, xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_min_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MIN(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vminss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_min_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MIN(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vminss' form='xmm {k}, xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_min_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MIN(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vminss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_min_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="sae" type="int"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+dst[31:0] := MIN(a[31:0], b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vminss' form='xmm {k}, xmm, xmm {sae}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into "dst". 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovapd' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovapd' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_mov_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovapd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_store_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vmovapd' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovapd' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_mov_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovapd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_store_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory.
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name='vmovapd' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into "dst". 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovaps' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovaps' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_mov_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovaps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_store_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vmovaps' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovaps' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_mov_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovaps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_store_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name='vmovaps' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+tmp[191:128] := a[191:128]
+tmp[255:192] := a[191:128]
+tmp[319:256] := a[319:256] 
+tmp[383:320] := a[319:256] 
+tmp[447:384] := a[447:384]
+tmp[511:448] := a[447:384]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovddup' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+tmp[191:128] := a[191:128]
+tmp[255:192] := a[191:128]
+tmp[319:256] := a[319:256] 
+tmp[383:320] := a[319:256] 
+tmp[447:384] := a[447:384]
+tmp[511:448] := a[447:384]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovddup' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst".
+	</description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+tmp[191:128] := a[191:128]
+tmp[255:192] := a[191:128]
+tmp[319:256] := a[319:256] 
+tmp[383:320] := a[319:256] 
+tmp[447:384] := a[447:384]
+tmp[511:448] := a[447:384]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovddup' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load 512-bits (composed of 16 packed 32-bit integers) from memory into "dst". 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqa32' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_load_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load 512-bits of integer data from memory into "dst". 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqa32' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqa32' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_mov_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqa32' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_store_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vmovdqa32' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqa32' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mov_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqa32' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_store_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Store 512-bits (composed of 16 packed 32-bit integers) from "a" into memory. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name='vmovdqa32' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_store_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Store 512-bits of integer data from "a" into memory. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name='vmovdqa32' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load 512-bits (composed of 8 packed 64-bit integers) from memory into "dst". 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqa64' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqa64' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_mov_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqa64' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_store_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vmovdqa64' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqa64' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mov_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqa64' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_store_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Store 512-bits (composed of 8 packed 64-bit integers) from "a" into memory. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name='vmovdqa64' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_loadu_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load 512-bits of integer data from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqu32' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqu32' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vmovdqu32' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqu32' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_storeu_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Store 512-bits of integer data from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name='vmovdqu32' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqu64' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_storeu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vmovdqu64' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovdqu64' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_stream_load_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load 512-bits of integer data from memory into "dst" using a non-temporal memory hint. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovntdqa' form='zmm, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_stream_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Store 512-bits of integer data from "a" into memory using a non-temporal memory hint. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name='vmovntdq' form='m512, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_stream_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name='vmovntpd' form='m512, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_stream_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name='vmovntps' form='m512, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_load_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="const double*"/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper element of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MEM[mem_addr+63:mem_addr]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vmovsd' form='xmm {k}, m64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_move_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmovsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_store_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="double*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Store the lower double-precision (64-bit) floating-point element from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+IF k[0]
+	MEM[mem_addr+63:mem_addr] := a[63:0]
+FI
+	</operation>
+	<instruction name='vmovsd' form='m64 {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_load_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="const double*"/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper element of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MEM[mem_addr+63:mem_addr]
+ELSE
+	dst[63:0] := 0
+FI
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vmovsd' form='xmm {k}, m64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_move_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmovsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[63:32] 
+tmp[63:32] := a[63:32] 
+tmp[95:64] := a[127:96] 
+tmp[127:96] := a[127:96]
+tmp[159:128] := a[191:160] 
+tmp[191:160] := a[191:160] 
+tmp[223:192] := a[255:224] 
+tmp[255:224] := a[255:224]
+tmp[287:256] := a[319:288] 
+tmp[319:288] := a[319:288] 
+tmp[351:320] := a[383:352] 
+tmp[383:352] := a[383:352] 
+tmp[415:384] := a[447:416] 
+tmp[447:416] := a[447:416] 
+tmp[479:448] := a[511:480]
+tmp[511:480] := a[511:480]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovshdup' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+tmp[31:0] := a[63:32] 
+tmp[63:32] := a[63:32] 
+tmp[95:64] := a[127:96] 
+tmp[127:96] := a[127:96]
+tmp[159:128] := a[191:160] 
+tmp[191:160] := a[191:160] 
+tmp[223:192] := a[255:224] 
+tmp[255:224] := a[255:224]
+tmp[287:256] := a[319:288] 
+tmp[319:288] := a[319:288] 
+tmp[351:320] := a[383:352] 
+tmp[383:352] := a[383:352] 
+tmp[415:384] := a[447:416] 
+tmp[447:416] := a[447:416] 
+tmp[479:448] := a[511:480]
+tmp[511:480] := a[511:480]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovshdup' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
+	</description>
+	<operation>
+dst[31:0] := a[63:32] 
+dst[63:32] := a[63:32] 
+dst[95:64] := a[127:96] 
+dst[127:96] := a[127:96]
+dst[159:128] := a[191:160] 
+dst[191:160] := a[191:160] 
+dst[223:192] := a[255:224] 
+dst[255:224] := a[255:224]
+dst[287:256] := a[319:288] 
+dst[319:288] := a[319:288] 
+dst[351:320] := a[383:352] 
+dst[383:352] := a[383:352] 
+dst[415:384] := a[447:416] 
+dst[447:416] := a[447:416] 
+dst[479:448] := a[511:480]
+dst[511:480] := a[511:480]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovshdup' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[31:0] 
+tmp[63:32] := a[31:0] 
+tmp[95:64] := a[95:64] 
+tmp[127:96] := a[95:64]
+tmp[159:128] := a[159:128] 
+tmp[191:160] := a[159:128] 
+tmp[223:192] := a[223:192] 
+tmp[255:224] := a[223:192]
+tmp[287:256] := a[287:256] 
+tmp[319:288] := a[287:256] 
+tmp[351:320] := a[351:320] 
+tmp[383:352] := a[351:320] 
+tmp[415:384] := a[415:384] 
+tmp[447:416] := a[415:384] 
+tmp[479:448] := a[479:448]
+tmp[511:480] := a[479:448]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovsldup' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+tmp[31:0] := a[31:0] 
+tmp[63:32] := a[31:0] 
+tmp[95:64] := a[95:64] 
+tmp[127:96] := a[95:64]
+tmp[159:128] := a[159:128] 
+tmp[191:160] := a[159:128] 
+tmp[223:192] := a[223:192] 
+tmp[255:224] := a[223:192]
+tmp[287:256] := a[287:256] 
+tmp[319:288] := a[287:256] 
+tmp[351:320] := a[351:320] 
+tmp[383:352] := a[351:320] 
+tmp[415:384] := a[415:384] 
+tmp[447:416] := a[415:384] 
+tmp[479:448] := a[479:448]
+tmp[511:480] := a[479:448]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0	
+	</operation>
+	<instruction name='vmovsldup' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
+	</description>
+	<operation>
+dst[31:0] := a[31:0] 
+dst[63:32] := a[31:0] 
+dst[95:64] := a[95:64] 
+dst[127:96] := a[95:64]
+dst[159:128] := a[159:128] 
+dst[191:160] := a[159:128] 
+dst[223:192] := a[223:192] 
+dst[255:224] := a[223:192]
+dst[287:256] := a[287:256] 
+dst[319:288] := a[287:256] 
+dst[351:320] := a[351:320] 
+dst[383:352] := a[351:320] 
+dst[415:384] := a[415:384] 
+dst[447:416] := a[415:384] 
+dst[479:448] := a[479:448]
+dst[511:480] := a[479:448]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovsldup' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_load_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="const float*"/>
+	<description>Load a single-precision (32-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper elements of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MEM[mem_addr+31:mem_addr]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[MAX:32] := 0
+	</operation>
+	<instruction name='vmovss' form='xmm {k}, m32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_move_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmovss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_store_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="float*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Store the lower single-precision (32-bit) floating-point element from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+IF k[0]
+	MEM[mem_addr+31:mem_addr] := a[31:0]
+FI
+	</operation>
+	<instruction name='vmovss' form='m32 {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_load_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="const float*"/>
+	<description>Load a single-precision (32-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper elements of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MEM[mem_addr+31:mem_addr]
+ELSE
+	dst[31:0] := 0
+FI
+dst[MAX:32] := 0
+	</operation>
+	<instruction name='vmovss' form='xmm {k}, m32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_move_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmovss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into "dst". 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovupd' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary. </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovupd' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_storeu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vmovupd' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovupd' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_storeu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory. 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name='vmovupd' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into "dst". 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovups' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovups' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_storeu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vmovups' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmovups' form='zmm {k}, m512'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_storeu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory. 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name='vmovups' form='m512 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  RM.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmulpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_mul_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmulpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmulpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_mul_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmulpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] * b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmulpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mul_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". 
+	[round_note]
+	 </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] * b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmulpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  RM.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmulps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_mul_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	 [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmulps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmulps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_mul_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmulps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmulps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mul_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". 
+	[round_note]
+	 </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vmulps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_mul_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+		[round_note]
+		</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] * b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmulsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_mul_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] * b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmulsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_mul_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+		[round_note]
+		</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] * b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmulsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_mul_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] * b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmulsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mul_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+		[round_note]
+		</description>
+	<operation>
+dst[63:0] := a[63:0] * b[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmulsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_mul_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+		[round_note]
+		</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] * b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmulss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_mul_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] * b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmulss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_mul_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+		[round_note]
+		</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] * b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmulss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_mul_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] * b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmulss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mul_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+		[round_note]
+		</description>
+	<operation>
+dst[31:0] := a[31:0] * b[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vmulss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ABS(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpabsd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpabsd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpabsd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ABS(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpabsq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpabsq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpabsq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpaddd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpaddd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpaddd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpaddq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpaddq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpaddq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] BITWISE AND b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpandd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_and_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of 512 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[511:0] := (a[511:0] AND b[511:0])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpandd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpandd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpandnd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_andnot_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NOT of 512 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
+	<operation>
+dst[511:0] := ((NOT a[511:0]) AND b[511:0])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpandnd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpandnd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpandnd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+dst[511:0] := ((NOT a[511:0]) AND b[511:0])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpandnq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpandnq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpandnq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in "a" and "b", and store the results in "dst".</description>
+	<operation>
+dst[511:0] := (a[511:0] AND b[511:0])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpandq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpandq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpandq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_blend_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpblendmd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_blend_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpblendmq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_set1_epi8" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="a" type="char"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastb' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastd' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastd' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="int"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastd' form='zmm {k}, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastd' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="int"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastd' form='zmm {k}, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="a" type="int"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastd' form='zmm {k}, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcastmb_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ZeroExtend(k[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastmb2q' form='zmm, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcastmw_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<description>Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ZeroExtend(k[15:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastmw2d' form='zmm, k'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__int64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastq' form='zmm {k}, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__int64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastq' form='zmm {k}, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="a" type="__int64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastq' form='zmm {k}, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_set1_epi16" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="a" type="short"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all all elements of "dst".
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpbroadcastw' form='ymm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmp_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpeq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpeqd' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpge_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpgt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpgtd' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmple_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpltd' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpneq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmp_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpeq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpeqd' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpge_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpgt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpgtd' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmple_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_mask_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_mask_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpltd' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpneq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmp_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpeq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpeqq' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpge_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpgt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpgtq' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmple_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmplt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpneq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmp_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpeq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpeqq' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpge_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpgt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpgtq' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmple_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmplt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpneq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmp_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpeq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpge_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpgt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmple_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmplt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpneq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmp_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpeq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpge_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpgt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmple_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmplt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpneq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmp_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpeq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpge_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpgt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmple_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmplt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpneq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmp_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpeq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpge_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpgt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmple_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmplt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpneq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_compress_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := src[511:m]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpcompressd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_compressstoreu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 32
+m := base_addr
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpcompressd' form='m32 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_compress_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpcompressd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_compress_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := src[511:m]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpcompressq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_compressstoreu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 64
+m := base_addr
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpcompressq' form='m64 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_compress_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpcompressq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	FOR k := 0 to j-1
+		m := k*32
+		dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+	ENDFOR
+	dst[i+31:i+j] := 0
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpconflictd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[i]
+		FOR l := 0 to j-1
+			m := l*32
+			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+		ENDFOR
+		dst[i+31:i+j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpconflictd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[i]
+		FOR l := 0 to j-1
+			m := l*32
+			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+		ENDFOR
+		dst[i+31:i+j] := 0
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpconflictd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	FOR k := 0 to j-1
+		m := k*64
+		dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+	ENDFOR
+	dst[i+63:i+j] := 0
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpconflictq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*64
+			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+		ENDFOR
+		dst[i+63:i+j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpconflictq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*64
+			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+		ENDFOR
+		dst[i+63:i+j] := 0
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpconflictq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_permutevar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the "permutevar" name. This intrinsic is identical to "_mm512_mask_permutexvar_epi32", and it is recommended that you use that intrinsic name.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutexvar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutexvar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_permutevar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the "permutevar" name. This intrinsic is identical to "_mm512_permutexvar_epi32", and it is recommended that you use that intrinsic name.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutexvar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask2_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := idx[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermi2d' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermt2d' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := (idx[i+4]) ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermi2d' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vpermt2d' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermi2d' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vpermt2d' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask2_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set)</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := idx[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermi2pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermt2pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := (idx[i+3]) ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermi2pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vpermt2pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermi2pd' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vpermt2pd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask2_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := idx[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermi2ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermt2ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := (idx[i+4]) ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermi2ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vpermt2ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermi2ps' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vpermt2ps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask2_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := idx[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermi2q' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermt2q' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := (idx[i+3]) ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermi2q' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vpermt2q' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermi2q' form='zmm {k}, zmm, zmm'/>
+	<instruction name='vpermt2q' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]
+IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]
+IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]
+IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]
+IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]
+IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]
+IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]
+IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]
+IF (imm8[4] == 0) tmp_dst[319:256] := a[319:256]
+IF (imm8[4] == 1) tmp_dst[319:256] := a[383:320]
+IF (imm8[5] == 0) tmp_dst[383:320] := a[319:256]
+IF (imm8[5] == 1) tmp_dst[383:320] := a[383:320]
+IF (imm8[6] == 0) tmp_dst[447:384] := a[447:384]
+IF (imm8[6] == 1) tmp_dst[447:384] := a[511:448]
+IF (imm8[7] == 0) tmp_dst[511:448] := a[447:384]
+IF (imm8[7] == 1) tmp_dst[511:448] := a[511:448]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermilpd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+IF (b[1] == 0) tmp_dst[63:0] := a[63:0]
+IF (b[1] == 1) tmp_dst[63:0] := a[127:64]
+IF (b[65] == 0) tmp_dst[127:64] := a[63:0]
+IF (b[65] == 1) tmp_dst[127:64] := a[127:64]
+IF (b[129] == 0) tmp_dst[191:128] := a[191:128]
+IF (b[129] == 1) tmp_dst[191:128] := a[255:192]
+IF (b[193] == 0) tmp_dst[255:192] := a[191:128]
+IF (b[193] == 1) tmp_dst[255:192] := a[255:192]
+IF (b[257] == 0) tmp_dst[319:256] := a[319:256]
+IF (b[257] == 1) tmp_dst[319:256] := a[383:320]
+IF (b[321] == 0) tmp_dst[383:320] := a[319:256]
+IF (b[321] == 1) tmp_dst[383:320] := a[383:320]
+IF (b[385] == 0) tmp_dst[447:384] := a[447:384]
+IF (b[385] == 1) tmp_dst[447:384] := a[511:448]
+IF (b[449] == 0) tmp_dst[511:448] := a[447:384]
+IF (b[449] == 1) tmp_dst[511:448] := a[511:448]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermilpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]
+IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]
+IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]
+IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]
+IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]
+IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]
+IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]
+IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]
+IF (imm8[4] == 0) tmp_dst[319:256] := a[319:256]
+IF (imm8[4] == 1) tmp_dst[319:256] := a[383:320]
+IF (imm8[5] == 0) tmp_dst[383:320] := a[319:256]
+IF (imm8[5] == 1) tmp_dst[383:320] := a[383:320]
+IF (imm8[6] == 0) tmp_dst[447:384] := a[447:384]
+IF (imm8[6] == 1) tmp_dst[447:384] := a[511:448]
+IF (imm8[7] == 0) tmp_dst[511:448] := a[447:384]
+IF (imm8[7] == 1) tmp_dst[511:448] := a[511:448]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermilpd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+IF (b[1] == 0) tmp_dst[63:0] := a[63:0]
+IF (b[1] == 1) tmp_dst[63:0] := a[127:64]
+IF (b[65] == 0) tmp_dst[127:64] := a[63:0]
+IF (b[65] == 1) tmp_dst[127:64] := a[127:64]
+IF (b[129] == 0) tmp_dst[191:128] := a[191:128]
+IF (b[129] == 1) tmp_dst[191:128] := a[255:192]
+IF (b[193] == 0) tmp_dst[255:192] := a[191:128]
+IF (b[193] == 1) tmp_dst[255:192] := a[255:192]
+IF (b[257] == 0) tmp_dst[319:256] := a[319:256]
+IF (b[257] == 1) tmp_dst[319:256] := a[383:320]
+IF (b[321] == 0) tmp_dst[383:320] := a[319:256]
+IF (b[321] == 1) tmp_dst[383:320] := a[383:320]
+IF (b[385] == 0) tmp_dst[447:384] := a[447:384]
+IF (b[385] == 1) tmp_dst[447:384] := a[511:448]
+IF (b[449] == 0) tmp_dst[511:448] := a[447:384]
+IF (b[449] == 1) tmp_dst[511:448] := a[511:448]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermilpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+IF (imm8[0] == 0) dst[63:0] := a[63:0]
+IF (imm8[0] == 1) dst[63:0] := a[127:64]
+IF (imm8[1] == 0) dst[127:64] := a[63:0]
+IF (imm8[1] == 1) dst[127:64] := a[127:64]
+IF (imm8[2] == 0) dst[191:128] := a[191:128]
+IF (imm8[2] == 1) dst[191:128] := a[255:192]
+IF (imm8[3] == 0) dst[255:192] := a[191:128]
+IF (imm8[3] == 1) dst[255:192] := a[255:192]
+IF (imm8[4] == 0) dst[319:256] := a[319:256]
+IF (imm8[4] == 1) dst[319:256] := a[383:320]
+IF (imm8[5] == 0) dst[383:320] := a[319:256]
+IF (imm8[5] == 1) dst[383:320] := a[383:320]
+IF (imm8[6] == 0) dst[447:384] := a[447:384]
+IF (imm8[6] == 1) dst[447:384] := a[511:448]
+IF (imm8[7] == 0) dst[511:448] := a[447:384]
+IF (imm8[7] == 1) dst[511:448] := a[511:448]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermilpd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst".</description>
+	<operation>
+IF (b[1] == 0) dst[63:0] := a[63:0]
+IF (b[1] == 1) dst[63:0] := a[127:64]
+IF (b[65] == 0) dst[127:64] := a[63:0]
+IF (b[65] == 1) dst[127:64] := a[127:64]
+IF (b[129] == 0) dst[191:128] := a[191:128]
+IF (b[129] == 1) dst[191:128] := a[255:192]
+IF (b[193] == 0) dst[255:192] := a[191:128]
+IF (b[193] == 1) dst[255:192] := a[255:192]
+IF (b[257] == 0) dst[319:256] := a[319:256]
+IF (b[257] == 1) dst[319:256] := a[383:320]
+IF (b[321] == 0) dst[383:320] := a[319:256]
+IF (b[321] == 1) dst[383:320] := a[383:320]
+IF (b[385] == 0) dst[447:384] := a[447:384]
+IF (b[385] == 1) dst[447:384] := a[511:448]
+IF (b[449] == 0) dst[511:448] := a[447:384]
+IF (b[449] == 1) dst[511:448] := a[511:448]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermilpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4])
+tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6])
+tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4])
+tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermilps' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
+tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
+tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
+tmp_dst[159:128] := SELECT4(a[255:128], b[129:128])
+tmp_dst[191:160] := SELECT4(a[255:128], b[161:160])
+tmp_dst[223:192] := SELECT4(a[255:128], b[193:192])
+tmp_dst[255:224] := SELECT4(a[255:128], b[225:224])
+tmp_dst[287:256] := SELECT4(a[383:256], b[257:256])
+tmp_dst[319:288] := SELECT4(a[383:256], b[289:288])
+tmp_dst[351:320] := SELECT4(a[383:256], b[321:320])
+tmp_dst[383:352] := SELECT4(a[383:256], b[353:352])
+tmp_dst[415:384] := SELECT4(a[511:384], b[385:384])
+tmp_dst[447:416] := SELECT4(a[511:384], b[417:416])
+tmp_dst[479:448] := SELECT4(a[511:384], b[449:448])
+tmp_dst[511:480] := SELECT4(a[511:384], b[481:480])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermilps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4])
+tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6])
+tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4])
+tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermilps' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
+tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
+tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
+tmp_dst[159:128] := SELECT4(a[255:128], b[129:128])
+tmp_dst[191:160] := SELECT4(a[255:128], b[161:160])
+tmp_dst[223:192] := SELECT4(a[255:128], b[193:192])
+tmp_dst[255:224] := SELECT4(a[255:128], b[225:224])
+tmp_dst[287:256] := SELECT4(a[383:256], b[257:256])
+tmp_dst[319:288] := SELECT4(a[383:256], b[289:288])
+tmp_dst[351:320] := SELECT4(a[383:256], b[321:320])
+tmp_dst[383:352] := SELECT4(a[383:256], b[353:352])
+tmp_dst[415:384] := SELECT4(a[511:384], b[385:384])
+tmp_dst[447:416] := SELECT4(a[511:384], b[417:416])
+tmp_dst[479:448] := SELECT4(a[511:384], b[449:448])
+tmp_dst[511:480] := SELECT4(a[511:384], b[481:480])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermilps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+dst[351:320] := SELECT4(a[383:256], imm8[5:4])
+dst[383:352] := SELECT4(a[383:256], imm8[7:6])
+dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+dst[479:448] := SELECT4(a[511:384], imm8[5:4])
+dst[511:480] := SELECT4(a[511:384], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermilps' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := SELECT4(a[127:0], b[1:0])
+dst[63:32] := SELECT4(a[127:0], b[33:32])
+dst[95:64] := SELECT4(a[127:0], b[65:64])
+dst[127:96] := SELECT4(a[127:0], b[97:96])
+dst[159:128] := SELECT4(a[255:128], b[129:128])
+dst[191:160] := SELECT4(a[255:128], b[161:160])
+dst[223:192] := SELECT4(a[255:128], b[193:192])
+dst[255:224] := SELECT4(a[255:128], b[225:224])
+dst[287:256] := SELECT4(a[383:256], b[257:256])
+dst[319:288] := SELECT4(a[383:256], b[289:288])
+dst[351:320] := SELECT4(a[383:256], b[321:320])
+dst[383:352] := SELECT4(a[383:256], b[353:352])
+dst[415:384] := SELECT4(a[511:384], b[385:384])
+dst[447:416] := SELECT4(a[511:384], b[417:416])
+dst[479:448] := SELECT4(a[511:384], b[449:448])
+dst[511:480] := SELECT4(a[511:384], b[481:480])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermilps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_permutex_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0])
+tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2])
+tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4])
+tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermpd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_permutexvar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	id := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_permutex_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0])
+tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2])
+tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4])
+tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermpd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_permutexvar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	id := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_permutex_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+dst[319:256] := SELECT4(a[511:256], imm8[1:0])
+dst[383:320] := SELECT4(a[511:256], imm8[3:2])
+dst[447:384] := SELECT4(a[511:256], imm8[5:4])
+dst[511:448] := SELECT4(a[511:256], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermpd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_permutexvar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	id := idx[i+2:i]*64
+	dst[i+63:i] := a[id+63:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_permutexvar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_permutexvar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_permutexvar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutex_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0])
+tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2])
+tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4])
+tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutexvar_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	id := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutex_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0])
+tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2])
+tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4])
+tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutexvar_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	id := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutex_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+dst[319:256] := SELECT4(a[511:256], imm8[1:0])
+dst[383:320] := SELECT4(a[511:256], imm8[3:2])
+dst[447:384] := SELECT4(a[511:256], imm8[5:4])
+dst[511:448] := SELECT4(a[511:256], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutexvar_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	id := idx[i+2:i]*64
+	dst[i+63:i] := a[id+63:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpermq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_expand_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpexpandd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_expandloadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpexpandd' form='zmm {k}, m32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_expand_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpexpandd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_expandloadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpexpandd' form='zmm {k}, m32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_expand_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpexpandq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_expandloadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpexpandq' form='zmm {k}, m64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_expand_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpexpandq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_expandloadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpexpandq' form='zmm {k}, m64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_i32gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>
+	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpgatherdd' form='zmm {k}, vm32z'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_i32gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>
+	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+k[MAX:16] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpgatherdd' form='zmm {k}, vm32z'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_i32gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpgatherdq' form='zmm {k}, vm32y'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_i32gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	IF k[j]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+k[MAX:8] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpgatherdq' form='zmm {k}, vm32y'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_i64gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpgatherqd' form='ymm {k}, vm64z'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_i64gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+k[MAX:8] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpgatherqd' form='ymm {k}, vm64z'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_i64gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpgatherqq' form='zmm {k}, vm64z'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_i64gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+k[MAX:8] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpgatherqq' form='zmm {k}, vm64z'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	tmp := 31
+	dst[i+31:i] := 0
+	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+		tmp := tmp - 1
+		dst[i+31:i] := dst[i+31:i] + 1
+	OD
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vplzcntd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp := 31
+		dst[i+31:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+31:i] := dst[i+31:i] + 1
+		OD
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vplzcntd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp := 31
+		dst[i+31:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+31:i] := dst[i+31:i] + 1
+		OD
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vplzcntd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	tmp := 63
+	dst[i+63:i] := 0
+	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+		tmp := tmp - 1
+		dst[i+63:i] := dst[i+63:i] + 1
+	OD
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vplzcntq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp := 63
+		dst[i+63:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+63:i] := dst[i+63:i] + 1
+		OD
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vplzcntq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp := 63
+		dst[i+63:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+63:i] := dst[i+63:i] + 1
+		OD
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vplzcntq' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &gt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmaxsd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &gt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0 
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmaxsd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF a[i+31:i] &gt; b[i+31:i]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := b[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmaxsd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &gt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmaxsq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &gt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmaxsq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF a[i+63:i] &gt; b[i+63:i]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmaxsq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &gt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmaxud' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &gt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmaxud' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF a[i+31:i] &gt; b[i+31:i]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := b[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmaxud' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &gt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmaxuq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &gt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmaxuq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF a[i+63:i] &gt; b[i+63:i]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmaxuq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &lt; b[i+31:i]
+				dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpminsd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &lt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpminsd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF a[i+31:i] &lt; b[i+31:i]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := b[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpminsd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &lt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpminsq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &lt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpminsq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF a[i+63:i] &lt; b[i+63:i]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpminsq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &lt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpminud' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &lt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpminud' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF a[i+31:i] &lt; b[i+31:i]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := b[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpminud' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &lt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpminuq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &lt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpminuq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF a[i+63:i] &lt; b[i+63:i]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpminuq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Truncate_Int32_To_Int8(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovdb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovdb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate_Int32_To_Int8(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovdb' form='m128 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovdb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Truncate_Int32_To_Int16(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovdw' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate_Int32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovdw' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Truncate_Int32_To_Int16(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovdw' form='m256 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate_Int32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovdw' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Truncate_Int64_To_Int8(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovqb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovqb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate_Int64_To_Int8(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovqb' form='m64 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovqb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Truncate_Int64_To_Int32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovqd' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Truncate_Int64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovqd' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Truncate_Int64_To_Int32(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovqd' form='m256 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Truncate_Int64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovqd' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Truncate_Int64_To_Int16(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovqw' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate_Int64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovqw' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Truncate_Int64_To_Int16(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovqw' form='m128 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate_Int64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovqw' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Saturate_Int32_To_Int8(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovsdb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovsdb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtsepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_Int32_To_Int8(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovsdb' form='m128 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovsdb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Saturate_Int32_To_Int16(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovsdw' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_Int32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovsdw' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtsepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate_Int32_To_Int16(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovsdw' form='m256 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_Int32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovsdw' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Saturate_Int64_To_Int8(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vpmovsqb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vpmovsqb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtsepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_Int64_To_Int8(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovsqb' form='m64 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vpmovsqb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Saturate_Int64_To_Int32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovsqd' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate_Int64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovsqd' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtsepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Saturate_Int64_To_Int32(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovsqd' form='m256 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate_Int64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovsqd' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Saturate_Int64_To_Int16(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovsqw' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_Int64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovsqw' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtsepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate_Int64_To_Int16(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovsqw' form='m128 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_Int64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovsqw' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 8*j
+	dst[i+31:i] := SignExtend(a[k+7:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxbd' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[l+7:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxbd' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[l+7:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxbd' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 8*j
+	dst[i+63:i] := SignExtend(a[k+7:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxbq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+7:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxbq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+7:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxbq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := SignExtend(a[k+31:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxdq' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxdq' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxdq' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 16*j
+	dst[i+31:i] := SignExtend(a[k+15:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxwd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	l := j*16
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[l+15:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxwd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[l+15:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxwd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 16*j
+	dst[i+63:i] := SignExtend(a[k+15:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxwq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+15:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxwq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+15:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovsxwq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovusdb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovusdb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtusepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovusdb' form='m128 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovusdb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovusdw' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovusdw' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtusepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovusdw' form='m256 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovusdw' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vpmovusqb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vpmovusqb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtusepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovusqb' form='m64 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name='vpmovusqb' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovusqd' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovusqd' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtusepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovusqd' form='m256 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name='vpmovusqd' form='ymm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovusqw' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovusqw' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtusepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name='vpmovusqw' form='m128 {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vpmovusqw' form='xmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 8*j
+	dst[i+31:i] := ZeroExtend(a[k+7:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxbd' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[l+7:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxbd' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[l+7:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxbd' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 8*j
+	dst[i+63:i] := ZeroExtend(a[k+7:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxbq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+7:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxbq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+7:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxbq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := ZeroExtend(a[k+31:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxdq' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxdq' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+31:l])
+	ELSE 
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxdq' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 16*j
+	dst[i+31:i] := ZeroExtend(a[k+15:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxwd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[l+15:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxwd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[l+15:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxwd' form='zmm {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 16*j
+	dst[i+63:i] := ZeroExtend(a[k+15:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxwq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+15:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxwq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+15:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmovzxwq' form='zmm {k}, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmuldq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmuldq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmuldq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp[63:0] := a[i+31:i] * b[i+31:i]
+		dst[i+31:i] := tmp[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmulld' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	tmp[63:0] := a[i+31:i] * b[i+31:i]
+	dst[i+31:i] := tmp[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmulld' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmuludq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmuludq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpmuludq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpord' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpord' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpord' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_or_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise OR of 512 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[511:0] := (a[511:0] OR b[511:0])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpord' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vporq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vporq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the resut in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vporq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprold' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprold' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprold' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprolq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprolq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprolq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprolvd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprolvd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprolvd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprolvq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprolvq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprolvq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprord' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprord' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprord' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprorq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprorq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprorq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprorvd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprorvd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprorvd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprorvq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprorvq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vprorvq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='vpscatterdd' form='vm32z {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vpscatterdd' form='vm32z {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_i32scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name='vpscatterdq' form='vz32y {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_i32scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpscatterdq' form='vz32y {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_i64scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='vpscatterqd' form='vm64z {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_i64scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpscatterqd' form='vm64z {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_i64scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name='vpscatterqq' form='vm64z {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_i64scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vpscatterqq' form='vm64z {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
+	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4])
+tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6])
+tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4])
+tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpshufd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
+	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4])
+tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6])
+tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4])
+tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpshufd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
+	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+dst[351:320] := SELECT4(a[383:256], imm8[5:4])
+dst[383:352] := SELECT4(a[383:256], imm8[7:6])
+dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+dst[479:448] := SELECT4(a[511:384], imm8[5:4])
+dst[511:480] := SELECT4(a[511:384], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpshufd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpslld' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpslld' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpslld' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpslld' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpslld' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpslld' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsllq' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsllq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsllq' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsllq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsllq' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsllq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsllvd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsllvd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsllvd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsllvq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsllvq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsllvq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := SignBit
+		ELSE
+			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrad' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := SignBit
+		ELSE
+			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrad' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := SignBit
+		ELSE
+			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrad' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := SignBit
+		ELSE
+			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrad' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := SignBit
+	ELSE
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrad' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := SignBit
+	ELSE
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrad' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := SignBit
+		ELSE
+			dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsraq' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := SignBit
+		ELSE
+			dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsraq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := SignBit
+		ELSE
+			dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsraq' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := SignBit
+		ELSE
+			dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsraq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := SignBit
+	ELSE
+		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsraq' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := SignBit
+	ELSE
+		dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsraq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsravd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsravd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsravd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsravq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsravq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsravq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrld' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrld' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrld' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrld' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrld' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrld' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrlq' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrlq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrlq' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrlq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrlq' form='zmm {k}, zmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrlq' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrlvd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrlvd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrlvd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrlvq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrlvq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsrlvq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsubd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsubd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsubd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsubq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsubq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpsubq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		FOR h := 0 to 31
+			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpternlogd' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		FOR h := 0 to 31
+			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpternlogd' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	FOR h := 0 to 31
+		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+		dst[i+h] := imm8[index[2:0]]
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpternlogd' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		FOR h := 0 to 63
+			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpternlogq' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		FOR h := 0 to 63
+			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpternlogq' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	FOR h := 0 to 63
+		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+		dst[i+h] := imm8[index[2:0]]
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpternlogq' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_test_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vptestmd' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_test_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vptestmd' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_test_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vptestmq' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_test_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vptestmq' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_mask_testn_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vptestnmd' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_testn_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vptestnmd' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_testn_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vptestnmq' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_testn_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vptestnmq' form='k {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpunpckhdq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpunpckhdq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpunpckhdq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpunpckhqdq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpunpckhqdq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpunpckhqdq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpunpckldq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpunpckldq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpunpckldq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpunpcklqdq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpunpcklqdq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpunpcklqdq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpxord' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpxord' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpxord' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_xor_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise XOR of 512 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[511:0] := (a[511:0] XOR b[511:0])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpxord' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrcp14pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrcp14pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrcp14pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrcp14ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrcp14ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrcp14ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rcp14_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := APPROXIMATE(1.0/b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrcp14sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rcp14_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := APPROXIMATE(1.0/b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrcp14sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rcp14_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+dst[63:0] := APPROXIMATE(1.0/b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrcp14sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rcp14_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := APPROXIMATE(1.0/b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrcp14ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rcp14_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := APPROXIMATE(1.0/b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrcp14ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rcp14_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+dst[31:0] := APPROXIMATE(1.0/b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrcp14ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}	
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrndscalepd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_roundscale_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}	
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrndscalepd' form='zmm {k}, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}	
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrndscalepd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_roundscale_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}	
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrndscalepd' form='zmm {k}, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
+	</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}	
+
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrndscalepd' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_roundscale_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}	
+
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrndscalepd' form='zmm {k}, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}	
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrndscaleps' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_roundscale_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}	
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrndscaleps' form='zmm {k}, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}	
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrndscaleps' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_roundscale_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}	
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrndscaleps' form='zmm {k}, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
+	</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}	
+
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrndscaleps' form='zmm {k}, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_roundscale_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}	
+
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrndscaleps' form='zmm {k}, zmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_roundscale_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}		
+
+IF k[0]
+	dst[63:0] := RoundTo_IntegerPD(a[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrndscalesd' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_roundscale_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}		
+
+IF k[0]
+	dst[63:0] := RoundTo_IntegerPD(a[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrndscalesd' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_roundscale_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}		
+
+IF k[0]
+	dst[63:0] := RoundTo_IntegerPD(a[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrndscalesd' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_roundscale_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}		
+
+IF k[0]
+	dst[63:0] := RoundTo_IntegerPD(a[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrndscalesd' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_roundscale_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}		
+
+dst[63:0] := RoundTo_IntegerPD(a[63:0], imm8[7:0])
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrndscalesd' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_roundscale_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst".</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}		
+
+dst[63:0] := RoundTo_IntegerPD(a[63:0], imm8[7:0])
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrndscalesd' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_roundscale_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}
+
+IF k[0]
+	dst[31:0] := RoundTo_IntegerPS(a[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrndscaless' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_roundscale_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}
+
+IF k[0]
+	dst[31:0] := RoundTo_IntegerPS(a[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrndscaless' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_roundscale_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}
+
+IF k[0]
+	dst[31:0] := RoundTo_IntegerPS(a[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrndscaless' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_roundscale_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}
+
+IF k[0]
+	dst[31:0] := RoundTo_IntegerPS(a[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrndscaless' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_roundscale_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<parameter varname="rounding" type="const int"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}
+
+dst[31:0] := RoundTo_IntegerPS(a[31:0], imm8[7:0])
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrndscaless' form='xmm {k}, xmm, xmm, imm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_roundscale_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}
+
+dst[31:0] := RoundTo_IntegerPS(a[31:0], imm8[7:0])
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrndscaless' form='xmm {k}, xmm, xmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrsqrt14pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrsqrt14pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrsqrt14pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrsqrt14ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrsqrt14ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vrsqrt14ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rsqrt14_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := APPROXIMATE(1.0 / SQRT(b[63:0]))
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrsqrt14sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rsqrt14_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := APPROXIMATE(1.0 / SQRT(b[63:0]))
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrsqrt14sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rsqrt14_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+dst[63:0] := APPROXIMATE(1.0 / SQRT(b[63:0]))
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrsqrt14sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rsqrt14_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := APPROXIMATE(1.0 / SQRT(b[31:0]))
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrsqrt14ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rsqrt14_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := APPROXIMATE(1.0 / SQRT(b[31:0]))
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrsqrt14ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rsqrt14_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+dst[31:0] := APPROXIMATE(1.0 / SQRT(b[31:0]))
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vrsqrt14ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vscalefpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_scalef_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vscalefpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vscalefpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_scalef_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vscalefpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vscalefpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_scalef_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vscalefpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vscalefps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_scalef_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vscalefps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vscalefps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_scalef_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vscalefps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vscalefps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_scalef_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vscalefps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_scalef_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+IF k[0]
+	dst[63:0] := SCALE(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vscalefsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_scalef_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+IF k[0]
+	dst[63:0] := SCALE(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vscalefsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_scalef_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+IF k[0]
+	dst[63:0] := SCALE(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vscalefsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_scalef_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+IF k[0]
+	dst[63:0] := SCALE(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vscalefsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_scalef_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+dst[63:0] := SCALE(a[63:0], b[63:0])
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vscalefsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_scalef_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst".</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+dst[63:0] := SCALE(a[63:0], b[63:0])
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vscalefsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_scalef_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[63:0]
+}
+
+IF k[0]
+	dst[31:0] := SCALE(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vscalefss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_scalef_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[63:0]
+}
+
+IF k[0]
+	dst[31:0] := SCALE(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vscalefss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_scalef_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[63:0]
+}
+
+IF k[0]
+	dst[31:0] := SCALE(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vscalefss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_scalef_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[63:0]
+}
+
+IF k[0]
+	dst[31:0] := SCALE(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vscalefss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_scalef_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[63:0]
+}
+
+dst[31:0] := SCALE(a[31:0], b[31:0])
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vscalefss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_scalef_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[63:0]
+}
+
+dst[31:0] := SCALE(a[31:0], b[31:0])
+dst[127:32] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vscalefss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name='vscatterdpd' form='vm32y {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vscatterdpd' form='vm32y {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='vscatterdps' form='vm32z {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name='vscatterdps' form='vm32z {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name='vscatterqpd' form='vm32z {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vscatterqpd' form='vm32z {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name='vscatterqps' form='vm32z {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="scale" type="int"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name='vscatterqps' form='vm32z {k}, ymm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_shuffle_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshuff32x4' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_shuffle_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshuff32x4' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_shuffle_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshuff32x4' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_shuffle_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshuff64x2' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_shuffle_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshuff64x2' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_shuffle_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshuff64x2' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_shuffle_i32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshufi32x4' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_shuffle_i32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshufi32x4' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_shuffle_i32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshufi32x4' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_shuffle_i64x2">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshufi64x2' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_shuffle_i64x2">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshufi64x2' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_shuffle_i64x2">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshufi64x2' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
+tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
+tmp_dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320]
+tmp_dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320]
+tmp_dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448]
+tmp_dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448]
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshufpd' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
+tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
+tmp_dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320]
+tmp_dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320]
+tmp_dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448]
+tmp_dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448]
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshufpd' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst". </description>
+	<operation>
+dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
+dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
+dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320]
+dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320]
+dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448]
+dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshufpd' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6])
+tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+tmp_dst[351:320] := SELECT4(b[383:256], imm8[5:4])
+tmp_dst[383:352] := SELECT4(b[383:256], imm8[7:6])
+tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+tmp_dst[479:448] := SELECT4(b[511:384], imm8[5:4])
+tmp_dst[511:480] := SELECT4(b[511:384], imm8[7:6])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshufps' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6])
+tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+tmp_dst[351:320] := SELECT4(b[383:256], imm8[5:4])
+tmp_dst[383:352] := SELECT4(b[383:256], imm8[7:6])
+tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+tmp_dst[479:448] := SELECT4(b[511:384], imm8[5:4])
+tmp_dst[511:480] := SELECT4(b[511:384], imm8[7:6])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshufps' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+dst[223:192] := SELECT4(b[255:128], imm8[5:4])
+dst[255:224] := SELECT4(b[255:128], imm8[7:6])
+dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+dst[351:320] := SELECT4(b[383:256], imm8[5:4])
+dst[383:352] := SELECT4(b[383:256], imm8[7:6])
+dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+dst[479:448] := SELECT4(b[511:384], imm8[5:4])
+dst[511:480] := SELECT4(b[511:384], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vshufps' form='zmm {k}, zmm, zmm, imm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsqrtpd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_sqrt_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsqrtpd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsqrtpd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_sqrt_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note].</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsqrtpd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsqrtpd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_sqrt_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".
+	[round_note].</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsqrtpd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsqrtps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_sqrt_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsqrtps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsqrtps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_sqrt_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsqrtps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsqrtps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_sqrt_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".
+	[round_note].</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsqrtps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_sqrt_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[63:0] := SQRT(b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsqrtsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_sqrt_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := SQRT(b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsqrtsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_sqrt_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[63:0] := SQRT(b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsqrtsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_sqrt_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := SQRT(b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsqrtsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_sqrt_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := SQRT(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsqrtsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_sqrt_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := SQRT(b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsqrtss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_sqrt_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := SQRT(b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsqrtss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_sqrt_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := SQRT(b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsqrtss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_sqrt_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := SQRT(b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsqrtss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_sqrt_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := SQRT(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsqrtss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsubpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_sub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsubpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsubpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_sub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description> =
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsubpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsubpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_sub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsubpd' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsubps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_sub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsubps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsubps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_sub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsubps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsubps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_sub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vsubps' form='zmm {k}, zmm, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_sub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] - b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsubsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_sub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] - b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsubsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_sub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] - b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsubsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_sub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] - b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsubsd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_sub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[63:0] := a[63:0] - b[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsubsd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_sub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] - b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsubss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_sub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] - b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsubss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_sub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]
+	</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] - b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsubss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_sub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] - b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsubss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_sub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]
+	</description>
+	<operation>
+dst[31:0] := a[31:0] - b[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name='vsubss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vunpckhpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vunpckhpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vunpckhpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vunpckhps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vunpckhps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vunpckhps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vunpcklpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vunpcklpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vunpcklpd' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vunpcklps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vunpcklps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vunpcklps' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_castpd128_pd512">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_castpd256_pd512">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m256d"/>
+	<description>Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm512_castpd512_pd128">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Cast vector of type __m512d to type __m128d. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm512_castps512_ps128">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Cast vector of type __m512 to type __m128. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm512_castpd512_pd256">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Cast vector of type __m512d to type __m256d. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_castpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Cast vector of type __m512d to type __m512.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_castpd_si512">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Cast vector of type __m512d to type __m512i.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_castps128_ps512">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_castps256_ps512">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m256"/>
+	<description>Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_castps512_ps256">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Cast vector of type __m512 to type __m256. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_castps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Cast vector of type __m512 to type __m512d.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_castps_si512">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Cast vector of type __m512 to type __m512i.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_castsi128_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_castsi256_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined.
+	 This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_castsi512_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Cast vector of type __m512i to type __m512d.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_castsi512_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Cast vector of type __m512i to type __m512.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_castsi512_si128">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Cast vector of type __m512i to type __m128i.
+	 This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_castsi512_si256">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Cast vector of type __m512i to type __m256i.
+	 This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' rettype='__m512d' name='_mm512_zextpd128_pd512'>
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m128d'/>
+	<description>Casts vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' rettype='__m512' name='_mm512_zextps128_ps512'>
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m128'/>
+	<description>Casts vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' rettype='__m512i' name='_mm512_zextsi128_si512'>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m128i'/>
+	<description>Casts vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' rettype='__m512d' name='_mm512_zextpd256_pd512'>
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m256d'/>
+	<description>Casts vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' rettype='__m512' name='_mm512_zextps256_ps512'>
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m256'/>
+	<description>Casts vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' rettype='__m512i' name='_mm512_zextsi256_si512'>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<parameter varname='a' type='__m256i'/>
+	<description>Casts vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+	</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_mask_reduce_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 32-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a".</description>
+	<operation>
+sum[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		sum[31:0] := sum[31:0] + a[i+31:i]
+	FI
+ENDFOR
+RETURN sum[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_mask_reduce_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 64-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a".</description>
+	<operation>
+sum[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		sum[63:0] := sum[63:0] + a[i+63:i]
+	FI
+ENDFOR
+RETURN sum[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_mask_reduce_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by addition using mask "k". Returns the sum of all active elements in "a".</description>
+	<operation>
+sum[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		sum[63:0] := sum[63:0] + a[i+63:i]
+	FI
+ENDFOR
+RETURN sum[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_mask_reduce_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by addition using mask "k". Returns the sum of all active elements in "a".</description>
+	<operation>
+sum[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		sum[31:0] := sum[31:0] + a[i+31:i]
+	FI
+ENDFOR
+RETURN sum[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_mask_reduce_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 32-bit integers in "a" by bitwise AND using mask "k". Returns the bitwise AND of all active elements in "a".</description>
+	<operation>
+reduced[31:0] := 0xFFFFFFFF
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		reduced[31:0] := reduced[31:0] AND a[i+31:i]
+	FI
+ENDFOR
+RETURN reduced[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_mask_reduce_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 64-bit integers in "a" by bitwise AND using mask "k". Returns the bitwise AND of all active elements in "a".</description>
+	<operation>
+reduced[63:0] := 0xFFFFFFFFFFFFFFFF
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		reduced[63:0] := reduced[63:0] AND a[i+63:i]
+	FI
+ENDFOR
+RETURN reduced[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_mask_reduce_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 32-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
+	<operation>
+max[31:0] := MIN_INT
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		max[31:0] := MAXIMUM(max[31:0], a[i+31:i])
+	FI
+ENDFOR
+RETURN max[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_mask_reduce_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 64-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
+	<operation>
+max[63:0] := MIN_INT
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		max[63:0] := MAXIMUM(max[63:0], a[i+63:i])
+	FI
+ENDFOR
+RETURN max[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned int" name="_mm512_mask_reduce_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed unsigned 32-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
+	<operation>
+max[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		max[31:0] := MAXIMUM(max[31:0], a[i+31:i])
+	FI
+ENDFOR
+RETURN max[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned __int64" name="_mm512_mask_reduce_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed unsigned 64-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
+	<operation>
+max[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		max[63:0] := MAXIMUM(max[63:0], a[i+63:i])
+	FI
+ENDFOR
+RETURN max[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_mask_reduce_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
+	<operation>
+max[63:0] := MIN_DOUBLE
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		max[63:0] := MAXIMUM(max[63:0], a[i+63:i])
+	FI
+ENDFOR
+RETURN max[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_mask_reduce_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
+	<operation>
+max[31:0] := MIN_FLOAT
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		max[31:0] := MAXIMUM(max[31:0], a[i+31:i])
+	FI
+ENDFOR
+RETURN max[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_mask_reduce_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 32-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
+	<operation>
+min[31:0] := MAX_INT
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		min[31:0] := MINIMUM(min[31:0], a[i+31:i])
+	FI
+ENDFOR
+RETURN min[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_mask_reduce_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 64-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
+	<operation>
+min[63:0] := MAX_INT
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		min[63:0] := MINIMUM(min[63:0], a[i+63:i])
+	FI
+ENDFOR
+RETURN min[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned int" name="_mm512_mask_reduce_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed unsigned 32-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
+	<operation>
+min[31:0] := MAX_UINT
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		min[31:0] := MINIMUM(min[31:0], a[i+31:i])
+	FI
+ENDFOR
+RETURN min[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned __int64" name="_mm512_mask_reduce_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed unsigned 64-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a".</description>
+	<operation>
+min[63:0] := MAX_UINT
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		min[63:0] := MINIMUM(min[63:0], a[i+63:i])
+	FI
+ENDFOR
+RETURN min[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_mask_reduce_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
+	<operation>
+min[63:0] := MAX_DOUBLE
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		min[63:0] := MINIMUM(min[63:0], a[i+63:i])
+	FI
+ENDFOR
+RETURN min[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_mask_reduce_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
+	<operation>
+min[31:0] := MAX_FLOAT
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		min[31:0] := MINIMUM(min[31:0], a[i+31:i])
+	FI
+ENDFOR
+RETURN min[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_mask_reduce_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 32-bit integers in "a" by multiplication using mask "k". Returns the product of all active elements in "a".</description>
+	<operation>
+prod[31:0] := 1
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		prod[31:0] := prod[31:0] * a[i+31:i]
+	FI
+ENDFOR
+RETURN prod[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_mask_reduce_mul_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 64-bit integers in "a" by multiplication using mask "k". Returns the product of all active elements in "a".</description>
+	<operation>
+prod[63:0] := 1
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		prod[63:0] := prod[63:0] * a[i+63:i]
+	FI
+ENDFOR
+RETURN prod[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_mask_reduce_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by multiplication using mask "k". Returns the product of all active elements in "a".</description>
+	<operation>
+prod[63:0] := 1
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		prod[63:0] := prod[63:0] * a[i+63:i]
+	FI
+ENDFOR
+RETURN prod[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_mask_reduce_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by multiplication using mask "k". Returns the product of all active elements in "a".</description>
+	<operation>
+prod[31:0] := 1
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		prod[31:0] := prod[31:0] * a[i+31:i]
+	FI
+ENDFOR
+RETURN prod[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_mask_reduce_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 32-bit integers in "a" by bitwise OR using mask "k". Returns the bitwise OR of all active elements in "a".</description>
+	<operation>
+reduced[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		reduced[31:0] := reduced[31:0] OR a[i+31:i]
+	FI
+ENDFOR
+RETURN reduced[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_mask_reduce_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 64-bit integers in "a" by bitwise OR using mask "k". Returns the bitwise OR of all active elements in "a".</description>
+	<operation>
+reduced[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		reduced[63:0] := reduced[63:0] OR a[i+63:i]
+	FI
+ENDFOR
+RETURN reduced[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_reduce_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 32-bit integers in "a" by addition. Returns the sum of all elements in "a".</description>
+	<operation>
+sum[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	sum[31:0] := sum[31:0] + a[i+31:i]
+ENDFOR
+RETURN sum[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_reduce_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 64-bit integers in "a" by addition. Returns the sum of all elements in "a".</description>
+	<operation>
+sum[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	sum[63:0] := sum[63:0] + a[i+63:i]
+ENDFOR
+RETURN sum[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_reduce_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a".</description>
+	<operation>
+sum[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	sum[63:0] := sum[63:0] + a[i+63:i]
+ENDFOR
+RETURN sum[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_reduce_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a".</description>
+	<operation>
+sum[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	sum[31:0] := sum[31:0] + a[i+31:i]
+ENDFOR
+RETURN sum[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_reduce_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 32-bit integers in "a" by bitwise AND. Returns the bitwise AND of all elements in "a".</description>
+	<operation>
+reduced[31:0] := 0xFFFFFFFF
+FOR j := 0 to 15
+	i := j*32
+	reduced[31:0] := reduced[31:0] AND a[i+31:i]
+ENDFOR
+RETURN reduced[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_reduce_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 64-bit integers in "a" by bitwise AND. Returns the bitwise AND of all elements in "a".</description>
+	<operation>
+reduced[63:0] := 0xFFFFFFFFFFFFFFFF
+FOR j := 0 to 7
+	i := j*64
+	reduced[63:0] := reduced[63:0] AND a[i+63:i]
+ENDFOR
+RETURN reduced[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_reduce_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 32-bit integers in "a" by maximum. Returns the maximum of all elements in "a".</description>
+	<operation>
+max[31:0] := MIN_INT
+FOR j := 0 to 15
+	i := j*32
+	max[31:0] := MAXIMUM(max[31:0], a[i+31:i])
+ENDFOR
+RETURN max[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_reduce_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 64-bit integers in "a" by maximum. Returns the maximum of all elements in "a".</description>
+	<operation>
+max[63:0] := MIN_INT
+FOR j := 0 to 7
+	i := j*64
+	max[63:0] := MAXIMUM(max[63:0], a[i+63:i])
+ENDFOR
+RETURN max[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned int" name="_mm512_reduce_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed unsigned 32-bit integers in "a" by maximum. Returns the maximum of all elements in "a".</description>
+	<operation>
+max[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	max[31:0] := MAXIMUM(max[31:0], a[i+31:i])
+ENDFOR
+RETURN max[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned __int64" name="_mm512_reduce_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed unsigned 64-bit integers in "a" by maximum. Returns the maximum of all elements in "a".</description>
+	<operation>
+max[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	max[63:0] := MAXIMUM(max[63:0], a[i+63:i])
+ENDFOR
+RETURN max[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_reduce_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a".</description>
+	<operation>
+max[63:0] := MIN_DOUBLE
+FOR j := 0 to 7
+	i := j*64
+	max[63:0] := MAXIMUM(max[63:0], a[i+63:i])
+ENDFOR
+RETURN max[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_reduce_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a".</description>
+	<operation>
+max[31:0] := MIN_FLOAT
+FOR j := 0 to 15
+	i := j*32
+	max[31:0] := MAXIMUM(max[31:0], a[i+31:i])
+ENDFOR
+RETURN max[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_reduce_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 32-bit integers in "a" by minimum. Returns the minimum of all elements in "a".</description>
+	<operation>
+min[31:0] := MAX_INT
+FOR j := 0 to 15
+	i := j*32
+	min[31:0] := MINIMUM(min[31:0], a[i+31:i])
+ENDFOR
+RETURN min[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_reduce_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 64-bit integers in "a" by minimum. Returns the minimum of all elements in "a".</description>
+	<operation>
+min[63:0] := MAX_INT
+FOR j := 0 to 7
+	i := j*64
+	min[63:0] := MINIMUM(min[63:0], a[i+63:i])
+ENDFOR
+RETURN min[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned int" name="_mm512_reduce_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed unsigned 32-bit integers in "a" by minimum. Returns the minimum of all elements in "a".</description>
+	<operation>
+min[31:0] := MAX_UINT
+FOR j := 0 to 15
+	i := j*32
+	min[31:0] := MINIMUM(min[31:0], a[i+31:i])
+ENDFOR
+RETURN min[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned __int64" name="_mm512_reduce_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed unsigned 64-bit integers in "a" by minimum. Returns the minimum of all elements in "a".</description>
+	<operation>
+min[63:0] := MAX_UINT
+FOR j := 0 to 7
+	i := j*64
+	min[63:0] := MINIMUM(min[63:0], a[i+63:i])
+ENDFOR
+RETURN min[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_reduce_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a".</description>
+	<operation>
+min[63:0] := MAX_DOUBLE
+FOR j := 0 to 7
+	i := j*64
+	min[63:0] := MINIMUM(min[63:0], a[i+63:i])
+ENDFOR
+RETURN min[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_reduce_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a".</description>
+	<operation>
+min[31:0] := MAX_INT
+FOR j := 0 to 15
+	i := j*32
+	min[31:0] := MINIMUM(min[31:0], a[i+31:i])
+ENDFOR
+RETURN min[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_reduce_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 32-bit integers in "a" by multiplication. Returns the product of all elements in "a".</description>
+	<operation>
+prod[31:0] := 1
+FOR j := 0 to 15
+	i := j*32
+	prod[31:0] := prod[31:0] * a[i+31:i]
+ENDFOR
+RETURN prod[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_reduce_mul_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 64-bit integers in "a" by multiplication. Returns the product of all elements in "a".</description>
+	<operation>
+prod[63:0] := 1
+FOR j := 0 to 7
+	i := j*64
+	prod[63:0] := prod[63:0] * a[i+63:i]
+ENDFOR
+RETURN prod[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_reduce_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a".</description>
+	<operation>
+prod[63:0] := 1
+FOR j := 0 to 7
+	i := j*64
+	prod[63:0] := prod[63:0] * a[i+63:i]
+ENDFOR
+RETURN prod[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_reduce_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a".</description>
+	<operation>
+prod[31:0] := 1
+FOR j := 0 to 15
+	i := j*32
+	prod[31:0] := prod[31:0] * a[i+31:i]
+ENDFOR
+RETURN prod[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_reduce_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 32-bit integers in "a" by bitwise OR. Returns the bitwise OR of all elements in "a".</description>
+	<operation>
+reduced[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	reduced[31:0] := reduced[31:0] OR a[i+31:i]
+ENDFOR
+RETURN reduced[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_reduce_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Reduce the packed 64-bit integers in "a" by bitwise OR. Returns the bitwise OR of all elements in "a".</description>
+	<operation>
+reduced[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	reduced[63:0] := reduced[63:0] OR a[i+63:i]
+ENDFOR
+RETURN reduced[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512d" name="_mm512_set1_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="a" type="double"/>
+	<description>Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512" name="_mm512_set1_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="a" type="float"/>
+	<description>Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_set4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="d" type="int"/>
+	<parameter varname="c" type="int"/>
+	<parameter varname="b" type="int"/>
+	<parameter varname="a" type="int"/>
+	<description>Set packed 32-bit integers in "dst" with the repeated 4 element sequence.</description>
+	<operation>
+dst[31:0] := d
+dst[63:32] := c
+dst[95:64] := b
+dst[127:96] := a
+dst[159:128] := d
+dst[191:160] := c
+dst[223:192] := b
+dst[255:224] := a
+dst[287:256] := d
+dst[319:288] := c
+dst[351:320] := b
+dst[383:352] := a
+dst[415:384] := d
+dst[447:416] := c
+dst[479:448] := b
+dst[511:480] := a
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_set4_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="d" type="__int64"/>
+	<parameter varname="c" type="__int64"/>
+	<parameter varname="b" type="__int64"/>
+	<parameter varname="a" type="__int64"/>
+	<description>Set packed 64-bit integers in "dst" with the repeated 4 element sequence.</description>
+	<operation>
+dst[63:0] := d
+dst[127:64] := c
+dst[191:128] := b
+dst[255:192] := a
+dst[319:256] := d
+dst[383:320] := c
+dst[447:384] := b
+dst[511:448] := a
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512d" name="_mm512_set4_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="d" type="double"/>
+	<parameter varname="c" type="double"/>
+	<parameter varname="b" type="double"/>
+	<parameter varname="a" type="double"/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the repeated 4 element sequence.</description>
+	<operation>
+dst[63:0] := d
+dst[127:64] := c
+dst[191:128] := b
+dst[255:192] := a
+dst[319:256] := d
+dst[383:320] := c
+dst[447:384] := b
+dst[511:448] := a
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512" name="_mm512_set4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="d" type="float"/>
+	<parameter varname="c" type="float"/>
+	<parameter varname="b" type="float"/>
+	<parameter varname="a" type="float"/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the repeated 4 element sequence.</description>
+	<operation>
+dst[31:0] := d
+dst[63:32] := c
+dst[95:64] := b
+dst[127:96] := a
+dst[159:128] := d
+dst[191:160] := c
+dst[223:192] := b
+dst[255:224] := a
+dst[287:256] := d
+dst[319:288] := c
+dst[351:320] := b
+dst[383:352] := a
+dst[415:384] := d
+dst[447:416] := c
+dst[479:448] := b
+dst[511:480] := a
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_set_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname='e63' type='char'/>
+	<parameter varname='e62' type='char'/>
+	<parameter varname='e61' type='char'/>
+	<parameter varname='e60' type='char'/>
+	<parameter varname='e59' type='char'/>
+	<parameter varname='e58' type='char'/>
+	<parameter varname='e57' type='char'/>
+	<parameter varname='e56' type='char'/>
+	<parameter varname='e55' type='char'/>
+	<parameter varname='e54' type='char'/>
+	<parameter varname='e53' type='char'/>
+	<parameter varname='e52' type='char'/>
+	<parameter varname='e51' type='char'/>
+	<parameter varname='e50' type='char'/>
+	<parameter varname='e49' type='char'/>
+	<parameter varname='e48' type='char'/>
+	<parameter varname='e47' type='char'/>
+	<parameter varname='e46' type='char'/>
+	<parameter varname='e45' type='char'/>
+	<parameter varname='e44' type='char'/>
+	<parameter varname='e43' type='char'/>
+	<parameter varname='e42' type='char'/>
+	<parameter varname='e41' type='char'/>
+	<parameter varname='e40' type='char'/>
+	<parameter varname='e39' type='char'/>
+	<parameter varname='e38' type='char'/>
+	<parameter varname='e37' type='char'/>
+	<parameter varname='e36' type='char'/>
+	<parameter varname='e35' type='char'/>
+	<parameter varname='e34' type='char'/>
+	<parameter varname='e33' type='char'/>
+	<parameter varname='e32' type='char'/>
+	<parameter varname='e31' type='char'/>
+	<parameter varname='e30' type='char'/>
+	<parameter varname='e29' type='char'/>
+	<parameter varname='e28' type='char'/>
+	<parameter varname='e27' type='char'/>
+	<parameter varname='e26' type='char'/>
+	<parameter varname='e25' type='char'/>
+	<parameter varname='e24' type='char'/>
+	<parameter varname='e23' type='char'/>
+	<parameter varname='e22' type='char'/>
+	<parameter varname='e21' type='char'/>
+	<parameter varname='e20' type='char'/>
+	<parameter varname='e19' type='char'/>
+	<parameter varname='e18' type='char'/>
+	<parameter varname='e17' type='char'/>
+	<parameter varname='e16' type='char'/>
+	<parameter varname='e15' type='char'/>
+	<parameter varname='e14' type='char'/>
+	<parameter varname='e13' type='char'/>
+	<parameter varname='e12' type='char'/>
+	<parameter varname='e11' type='char'/>
+	<parameter varname='e10' type='char'/>
+	<parameter varname='e9' type='char'/>
+	<parameter varname='e8' type='char'/>
+	<parameter varname='e7' type='char'/>
+	<parameter varname='e6' type='char'/>
+	<parameter varname='e5' type='char'/>
+	<parameter varname='e4' type='char'/>
+	<parameter varname='e3' type='char'/>
+	<parameter varname='e2' type='char'/>
+	<parameter varname='e1' type='char'/>
+	<parameter varname='e0' type='char'/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[7:0] := e0
+dst[15:8] := e1
+dst[23:16] := e2
+dst[31:24] := e3
+dst[39:32] := e4
+dst[47:40] := e5
+dst[55:48] := e6
+dst[63:56] := e7
+dst[71:64] := e8
+dst[79:72] := e9
+dst[87:80] := e10
+dst[95:88] := e11
+dst[103:96] := e12
+dst[111:104] := e13
+dst[119:112] := e14
+dst[127:120] := e15
+dst[135:128] := e16
+dst[143:136] := e17
+dst[151:144] := e18
+dst[159:152] := e19
+dst[167:160] := e20
+dst[175:168] := e21
+dst[183:176] := e22
+dst[191:184] := e23
+dst[199:192] := e24
+dst[207:200] := e25
+dst[215:208] := e26
+dst[223:216] := e27
+dst[231:224] := e28
+dst[239:232] := e29
+dst[247:240] := e30
+dst[255:248] := e31
+dst[263:256] := e32
+dst[271:264] := e33
+dst[279:272] := e34
+dst[287:280] := e35
+dst[295:288] := e36
+dst[303:296] := e37
+dst[311:304] := e38
+dst[319:312] := e39
+dst[327:320] := e40
+dst[335:328] := e41
+dst[343:336] := e42
+dst[351:344] := e43
+dst[359:352] := e44
+dst[367:360] := e45
+dst[375:368] := e46
+dst[383:376] := e47
+dst[391:384] := e48
+dst[399:392] := e49
+dst[407:400] := e50
+dst[415:408] := e51
+dst[423:416] := e52
+dst[431:424] := e53
+dst[439:432] := e54
+dst[447:440] := e55
+dst[455:448] := e56
+dst[463:456] := e57
+dst[471:464] := e58
+dst[479:472] := e59
+dst[487:480] := e60
+dst[495:488] := e61
+dst[503:496] := e62
+dst[511:503] := e63
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_set_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname='e31' type='short'/>
+	<parameter varname='e30' type='short'/>
+	<parameter varname='e29' type='short'/>
+	<parameter varname='e28' type='short'/>
+	<parameter varname='e27' type='short'/>
+	<parameter varname='e26' type='short'/>
+	<parameter varname='e25' type='short'/>
+	<parameter varname='e24' type='short'/>
+	<parameter varname='e23' type='short'/>
+	<parameter varname='e22' type='short'/>
+	<parameter varname='e21' type='short'/>
+	<parameter varname='e20' type='short'/>
+	<parameter varname='e19' type='short'/>
+	<parameter varname='e18' type='short'/>
+	<parameter varname='e17' type='short'/>
+	<parameter varname='e16' type='short'/>
+	<parameter varname='e15' type='short'/>
+	<parameter varname='e14' type='short'/>
+	<parameter varname='e13' type='short'/>
+	<parameter varname='e12' type='short'/>
+	<parameter varname='e11' type='short'/>
+	<parameter varname='e10' type='short'/>
+	<parameter varname='e9' type='short'/>
+	<parameter varname='e8' type='short'/>
+	<parameter varname='e7' type='short'/>
+	<parameter varname='e6' type='short'/>
+	<parameter varname='e5' type='short'/>
+	<parameter varname='e4' type='short'/>
+	<parameter varname='e3' type='short'/>
+	<parameter varname='e2' type='short'/>
+	<parameter varname='e1' type='short'/>
+	<parameter varname='e0' type='short'/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[15:0] := e0
+dst[31:16] := e1
+dst[47:32] := e2
+dst[63:48] := e3
+dst[79:64] := e4
+dst[95:80] := e5
+dst[111:96] := e6
+dst[127:112] := e7
+dst[145:128] := e8
+dst[159:144] := e9
+dst[175:160] := e10
+dst[191:176] := e11
+dst[207:192] := e12
+dst[223:208] := e13
+dst[239:224] := e14
+dst[255:240] := e15
+dst[271:256] := e16
+dst[287:272] := e17
+dst[303:288] := e18
+dst[319:304] := e19
+dst[335:320] := e20
+dst[351:336] := e21
+dst[367:352] := e22
+dst[383:368] := e23
+dst[399:384] := e24
+dst[415:400] := e25
+dst[431:416] := e26
+dst[447:432] := e27
+dst[463:448] := e28
+dst[479:464] := e29
+dst[495:480] := e30
+dst[511:496] := e31
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_set_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="e15" type="int"/>
+	<parameter varname="e14" type="int"/>
+	<parameter varname="e13" type="int"/>
+	<parameter varname="e12" type="int"/>
+	<parameter varname="e11" type="int"/>
+	<parameter varname="e10" type="int"/>
+	<parameter varname="e9" type="int"/>
+	<parameter varname="e8" type="int"/>
+	<parameter varname="e7" type="int"/>
+	<parameter varname="e6" type="int"/>
+	<parameter varname="e5" type="int"/>
+	<parameter varname="e4" type="int"/>
+	<parameter varname="e3" type="int"/>
+	<parameter varname="e2" type="int"/>
+	<parameter varname="e1" type="int"/>
+	<parameter varname="e0" type="int"/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+dst[95:64] := e2
+dst[127:96] := e3
+dst[159:128] := e4
+dst[191:160] := e5
+dst[223:192] := e6
+dst[255:224] := e7
+dst[287:256] := e8
+dst[319:288] := e9
+dst[351:320] := e10
+dst[383:352] := e11
+dst[415:384] := e12
+dst[447:416] := e13
+dst[479:448] := e14
+dst[511:480] := e15
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_set_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="e7" type="__int64"/>
+	<parameter varname="e6" type="__int64"/>
+	<parameter varname="e5" type="__int64"/>
+	<parameter varname="e4" type="__int64"/>
+	<parameter varname="e3" type="__int64"/>
+	<parameter varname="e2" type="__int64"/>
+	<parameter varname="e1" type="__int64"/>
+	<parameter varname="e0" type="__int64"/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+dst[191:128] := e2
+dst[255:192] := e3
+dst[319:256] := e4
+dst[383:320] := e5
+dst[447:384] := e6
+dst[511:448] := e7
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512d" name="_mm512_set_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="e7" type="double"/>
+	<parameter varname="e6" type="double"/>
+	<parameter varname="e5" type="double"/>
+	<parameter varname="e4" type="double"/>
+	<parameter varname="e3" type="double"/>
+	<parameter varname="e2" type="double"/>
+	<parameter varname="e1" type="double"/>
+	<parameter varname="e0" type="double"/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+dst[191:128] := e2
+dst[255:192] := e3
+dst[319:256] := e4
+dst[383:320] := e5
+dst[447:384] := e6
+dst[511:448] := e7
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512" name="_mm512_set_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="e15" type="float"/>
+	<parameter varname="e14" type="float"/>
+	<parameter varname="e13" type="float"/>
+	<parameter varname="e12" type="float"/>
+	<parameter varname="e11" type="float"/>
+	<parameter varname="e10" type="float"/>
+	<parameter varname="e9" type="float"/>
+	<parameter varname="e8" type="float"/>
+	<parameter varname="e7" type="float"/>
+	<parameter varname="e6" type="float"/>
+	<parameter varname="e5" type="float"/>
+	<parameter varname="e4" type="float"/>
+	<parameter varname="e3" type="float"/>
+	<parameter varname="e2" type="float"/>
+	<parameter varname="e1" type="float"/>
+	<parameter varname="e0" type="float"/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+dst[95:64] := e2
+dst[127:96] := e3
+dst[159:128] := e4
+dst[191:160] := e5
+dst[223:192] := e6
+dst[255:224] := e7
+dst[287:256] := e8
+dst[319:288] := e9
+dst[351:320] := e10
+dst[383:352] := e11
+dst[415:384] := e12
+dst[447:416] := e13
+dst[479:448] := e14
+dst[511:480] := e15
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_setr4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="d" type="int"/>
+	<parameter varname="c" type="int"/>
+	<parameter varname="b" type="int"/>
+	<parameter varname="a" type="int"/>
+	<description>Set packed 32-bit integers in "dst" with the repeated 4 element sequence in reverse order.</description>
+	<operation>
+dst[31:0] := a
+dst[63:32] := b
+dst[95:64] := c
+dst[127:96] := d
+dst[159:128] := a
+dst[191:160] := b
+dst[223:192] := c
+dst[255:224] := d
+dst[287:256] := a
+dst[319:288] := b
+dst[351:320] := c
+dst[383:352] := d
+dst[415:384] := a
+dst[447:416] := b
+dst[479:448] := c
+dst[511:480] := d
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_setr4_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="d" type="__int64"/>
+	<parameter varname="c" type="__int64"/>
+	<parameter varname="b" type="__int64"/>
+	<parameter varname="a" type="__int64"/>
+	<description>Set packed 64-bit integers in "dst" with the repeated 4 element sequence in reverse order.</description>
+	<operation>
+dst[63:0] := a
+dst[127:64] := b
+dst[191:128] := c
+dst[255:192] := d
+dst[319:256] := a
+dst[383:320] := b
+dst[447:384] := c
+dst[511:448] := d
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512d" name="_mm512_setr4_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="d" type="double"/>
+	<parameter varname="c" type="double"/>
+	<parameter varname="b" type="double"/>
+	<parameter varname="a" type="double"/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the repeated 4 element sequence in reverse order.</description>
+	<operation>
+dst[63:0] := a
+dst[127:64] := b
+dst[191:128] := c
+dst[255:192] := d
+dst[319:256] := a
+dst[383:320] := b
+dst[447:384] := c
+dst[511:448] := d
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512" name="_mm512_setr4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="d" type="float"/>
+	<parameter varname="c" type="float"/>
+	<parameter varname="b" type="float"/>
+	<parameter varname="a" type="float"/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the repeated 4 element sequence in reverse order.</description>
+	<operation>
+dst[31:0] := a
+dst[63:32] := b
+dst[95:64] := c
+dst[127:96] := d
+dst[159:128] := a
+dst[191:160] := b
+dst[223:192] := c
+dst[255:224] := d
+dst[287:256] := a
+dst[319:288] := b
+dst[351:320] := c
+dst[383:352] := d
+dst[415:384] := a
+dst[447:416] := b
+dst[479:448] := c
+dst[511:480] := d
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_setr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="e15" type="int"/>
+	<parameter varname="e14" type="int"/>
+	<parameter varname="e13" type="int"/>
+	<parameter varname="e12" type="int"/>
+	<parameter varname="e11" type="int"/>
+	<parameter varname="e10" type="int"/>
+	<parameter varname="e9" type="int"/>
+	<parameter varname="e8" type="int"/>
+	<parameter varname="e7" type="int"/>
+	<parameter varname="e6" type="int"/>
+	<parameter varname="e5" type="int"/>
+	<parameter varname="e4" type="int"/>
+	<parameter varname="e3" type="int"/>
+	<parameter varname="e2" type="int"/>
+	<parameter varname="e1" type="int"/>
+	<parameter varname="e0" type="int"/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e15
+dst[63:32] := e14
+dst[95:64] := e13
+dst[127:96] := e12
+dst[159:128] := e11
+dst[191:160] := e10
+dst[223:192] := e9
+dst[255:224] := e8
+dst[287:256] := e7
+dst[319:288] := e6
+dst[351:320] := e5
+dst[383:352] := e4
+dst[415:384] := e3
+dst[447:416] := e2
+dst[479:448] := e1
+dst[511:480] := e0
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_setr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="e7" type="__int64"/>
+	<parameter varname="e6" type="__int64"/>
+	<parameter varname="e5" type="__int64"/>
+	<parameter varname="e4" type="__int64"/>
+	<parameter varname="e3" type="__int64"/>
+	<parameter varname="e2" type="__int64"/>
+	<parameter varname="e1" type="__int64"/>
+	<parameter varname="e0" type="__int64"/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[63:0] := e7
+dst[127:64] := e6
+dst[191:128] := e5
+dst[255:192] := e4
+dst[319:256] := e3
+dst[383:320] := e2
+dst[447:384] := e1
+dst[511:448] := e0
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512d" name="_mm512_setr_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="e7" type="double"/>
+	<parameter varname="e6" type="double"/>
+	<parameter varname="e5" type="double"/>
+	<parameter varname="e4" type="double"/>
+	<parameter varname="e3" type="double"/>
+	<parameter varname="e2" type="double"/>
+	<parameter varname="e1" type="double"/>
+	<parameter varname="e0" type="double"/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[63:0] := e7
+dst[127:64] := e6
+dst[191:128] := e5
+dst[255:192] := e4
+dst[319:256] := e3
+dst[383:320] := e2
+dst[447:384] := e1
+dst[511:448] := e0
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="true" rettype="__m512" name="_mm512_setr_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="e15" type="float"/>
+	<parameter varname="e14" type="float"/>
+	<parameter varname="e13" type="float"/>
+	<parameter varname="e12" type="float"/>
+	<parameter varname="e11" type="float"/>
+	<parameter varname="e10" type="float"/>
+	<parameter varname="e9" type="float"/>
+	<parameter varname="e8" type="float"/>
+	<parameter varname="e7" type="float"/>
+	<parameter varname="e6" type="float"/>
+	<parameter varname="e5" type="float"/>
+	<parameter varname="e4" type="float"/>
+	<parameter varname="e3" type="float"/>
+	<parameter varname="e2" type="float"/>
+	<parameter varname="e1" type="float"/>
+	<parameter varname="e0" type="float"/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e15
+dst[63:32] := e14
+dst[95:64] := e13
+dst[127:96] := e12
+dst[159:128] := e11
+dst[191:160] := e10
+dst[223:192] := e9
+dst[255:224] := e8
+dst[287:256] := e7
+dst[319:288] := e6
+dst[351:320] := e5
+dst[383:352] := e4
+dst[415:384] := e3
+dst[447:416] := e2
+dst[479:448] := e1
+dst[511:480] := e0
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_setzero">
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="" type="void"/>
+	<description>Return vector of type __m512 with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_setzero_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<description>Return vector of type __m512i with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_setzero_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<description>Return vector of type __m512d with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_setzero_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<description>Return vector of type __m512 with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_setzero_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<description>Return vector of type __m512i with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_undefined">
+	<CPUID>AVX512F</CPUID>
+	<category>General Support</category>
+	<parameter varname="" type="void"/>
+	<description>Return vector of type __m512 with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_undefined_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>General Support</category>
+	<description>Return vector of type __m512i with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_undefined_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>General Support</category>
+	<description>Return vector of type __m512d with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_undefined_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>General Support</category>
+	<description>Return vector of type __m512 with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_prefetch_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetch single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache. "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j:= 0 to 7
+	i := j*64;
+	Prefetch([base_addr + SignExtend(vindex[i+63:i]) * scale], hint, RFO=0);
+ENDFOR;
+	</operation>
+	<instruction name='vgatherpf0qps' form='vm64z {k}'/>
+	<instruction name='vgatherpf1qps' form='vm64z {k}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_prefetch_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="mask" type="__mmask8"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetch single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache using writemask "k" (elements are only brought into cache when their corresponding mask bit is set). "scale" should be 1, 2, 4 or 8.. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j:= 0 to 7
+	i := j*64;
+	IF mask[j] THEN
+		Prefetch([base_addr + SignExtend(vindex[i+63:i]) * scale], hint, RFO=0);
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vgatherpf0qps' form='vm64z {k}'/>
+	<instruction name='vgatherpf1qps' form='vm64z {k}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_prefetch_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetch single-precision (32-bit) floating-point elements with intent to write into memory using 64-bit indices. Elements are prefetched into cache level "hint", where "hint" is 0 or 1. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	Prefetch([base_addr + SignExtend(vindex[i+63:i]) * scale], Level=hint, RFO=1);
+ENDFOR;
+	</operation>
+	<instruction name='vscatterpf0qps' form='vm64z {k}'/>
+	<instruction name='vscatterpf1qps' form='vm64z {k}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_prefetch_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="mask" type="__mmask8"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetch single-precision (32-bit) floating-point elements with intent to write into memory using 64-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not brought into cache when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF mask[j] THEN
+		Prefetch([base_addr + SignExtend(vindex[i+63:i]) * scale], Level=hint, RFO=1);
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vscatterpf0qps' form='vm64z {k}'/>
+	<instruction name='vscatterpf1qps' form='vm64z {k}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_prefetch_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache. "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32;
+	Prefetch([base_addr + SignExtend(vindex[i*31:i]) * scale], hint, RFO=0);
+ENDFOR;
+	</operation>
+	<instruction name='vgatherpf0dpd' form='vm32y {k}'/>
+	<instruction name='vgatherpf1dpd' form='vm32y {k}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_prefetch_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="mask" type="__mmask8"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache using writemask "k" (elements are brought into cache only when their corresponding mask bits are set). "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32;
+	IF mask[j] THEN
+		Prefetch([base_addr + SignExtend(vindex[i*31:i]) * scale], hint, RFO=0);
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vgatherpf0dpd' form='vm32y {k}'/>
+	<instruction name='vgatherpf1dpd' form='vm32y {k}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_prefetch_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="mask" type="__mmask16"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetch single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache using writemask "k" (elements are brought into cache only when their corresponding mask bits are set). "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16;
+	IF mask[j] THEN
+		Prefetch([base_addr + SignExtend(vindex[i*31:i]) * scale], hint, RFO=0);
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vgatherpf0dps' form='vm32y {k}'/>
+	<instruction name='vgatherpf1dps' form='vm32y {k}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_prefetch_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements with intent to write using 32-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 TO 7
+	i := j*32;
+	Prefetch(base_addr + SignExtend(vindex[i+31:i]) * scale], Level=hint, RFO=1);
+ENDFOR;
+	</operation>
+	<instruction name='vscatterpf0dpd' form='vm32y {k}'/>
+	<instruction name='vscatterpf1dpd' form='vm32y {k}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_prefetch_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="mask" type="__mmask8"/>
+	<parameter varname="vinde" type="__m256i"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements with intent to write using 32-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not brought into cache when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 TO 7
+	i := j*32;
+	IF mask[j] THEN
+		Prefetch(base_addr + SignExtend(vindex[i+31:i]) * scale], Level=hint, RFO=1);
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vscatterpf0dpd' form='vm32y {k}'/>
+	<instruction name='vscatterpf1dpd' form='vm32y {k}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_prefetch_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements from memory into cache level specified by "hint" using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	Prefetch([base_addr + SignExtend(vindex[i*63:i] * scale]), Level=hint, RFO=0);
+ENDFOR;
+	</operation>
+	<instruction name='vgatherpf0qpd' form='vm32z {k}'/>
+	<instruction name='vgatherpf1qpd' form='vm32z {k}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_prefetch_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Load</category>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="mask" type="__mmask8"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements from memory into cache level specified by "hint" using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Prefetched elements are merged in cache using writemask "k" (elements are copied from memory when the corresponding mask bit is set). "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF mask[j] THEN
+		Prefetch([base_addr + SignExtend(vindex[i*63:i] * scale]), Level=hint, RFO=0);
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vgatherpf0qpd' form='vm32z {k}'/>
+	<instruction name='vgatherpf1qpd' form='vm32z {k}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_prefetch_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements with intent to write into memory using 64-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	Prefetch([base_addr + SignExtend(vindex[i+63:i]) * scale], Level=hint, RFO=1);
+ENDFOR;
+	</operation>
+	<instruction name='vscatterpf0qpd' form='vm32z {k}'/>
+	<instruction name='vscatterpf1qpd' form='vm32z {k}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_prefetch_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="mask" type="__mmask8"/>
+	<parameter varname="vindex" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements with intent to write into memory using 64-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not brought into cache when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF mask[j] THEN
+		Prefetch([base_addr + SignExtend(vindex[i+63:i]) * scale], Level=hint, RFO=1);
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vscatterpf0qpd' form='vm32z {k}'/>
+	<instruction name='vscatterpf1qpd' form='vm32z {k}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_exp2a23_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-23. [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	dst[i+31:i] := EXP_2_23_SP(a[i+31:i]);
+ENDFOR;
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexp2ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_exp2a23_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-23.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	dst[i+31:i] := EXP_2_23_SP(a[i+31:i]);
+ENDFOR;
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexp2ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_exp2a23_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23. [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	IF k[j] THEN
+		dst[i+31:i] := EXP_2_23_SP(a[i+31:i]);
+	ELSE
+		dst[i*31:i] := src[i*31:i];
+	FI
+ENDFOR;
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexp2ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_exp2a23_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	IF k[j] THEN
+		dst[i+31:i] := EXP_2_23_SP(a[i+31:i]);
+	ELSE
+		dst[i*31:i] := src[i*31:i];
+	FI
+ENDFOR;
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexp2ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_exp2a23_round_ps">
+	<type>Floating Point</type>
+<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23. [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	IF k[j] THEN
+		dst[i+31:i] := EXP_2_23_SP(a[i+31:i]);
+	ELSE
+		dst[i*31:i] := 0;
+	FI
+ENDFOR;
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexp2ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_exp2a23_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	IF k[j] THEN
+		dst[i+31:i] := EXP_2_23_SP(a[i+31:i]);
+	ELSE
+		dst[i*31:i] := 0;
+	FI
+ENDFOR;
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexp2ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_exp2a23_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-23. [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	dst[i+63:i] := EXP_2_23_DP(a[i+63:i]);
+ENDFOR;
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexp2pd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_exp2a23_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-23.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	dst[i+63:i] := EXP_2_23_DP(a[i+63:i]);
+ENDFOR;
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexp2pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_exp2a23_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23. [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF k[j] THEN
+		dst[i+63:i] := EXP_2_23_DP(a[i+63:i]);
+	ELSE
+		dst[i+63:i] := src[i+63:i];
+	FI
+ENDFOR;
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexp2pd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_exp2a23_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="src" type="__m512d"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF k[j] THEN
+		dst[i+63:i] := EXP_2_23_DP(a[i+63:i]);
+	ELSE
+		dst[i+63:i] := src[i+63:i];
+	FI
+ENDFOR;
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexp2pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_exp2a23_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23. [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF k[j] THEN
+		dst[i+63:i] := EXP_2_23_DP(a[i+63:i]);
+	ELSE
+		dst[i+63:i] := 0;
+	FI
+ENDFOR;
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexp2pd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_exp2a23_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF k[j] THEN
+		dst[i+63:i] := EXP_2_23_DP(a[i+63:i]);
+	ELSE
+		dst[i+63:i] := 0;
+	FI
+ENDFOR;
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vexp2pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rcp28_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+dst[63:0] := RCP_28_DP(1.0/b[63:0];
+dst[127:64] := a[127:64];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrcp28sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rcp28_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+dst[63:0] := RCP_28_DP(1.0/b[63:0];
+dst[127:64] := a[127:64];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrcp28sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rcp28_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+IF k[0] THEN
+	dst[63:0] := RCP_28_DP(1.0/b[63:0];
+ELSE
+	dst[63:0] := src[63:0];
+FI
+dst[127:64] := a[127:64];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrcp28sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rcp28_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0] THEN
+	dst[63:0] := RCP_28_DP(1.0/b[63:0];
+ELSE
+	dst[63:0] := src[63:0];
+FI
+dst[127:64] := a[127:64];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrcp28sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rcp28_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+IF k[0] THEN
+	dst[63:0] := RCP_28_DP(1.0/b[63:0];
+ELSE
+	dst[63:0] := 0;
+FI
+dst[127:64] := a[127:64];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrcp28sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rcp28_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0] THEN
+	dst[63:0] := RCP_28_DP(1.0/b[63:0];
+ELSE
+	dst[63:0] := 0;
+FI
+dst[127:64] := a[127:64];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrcp28sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rcp28_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst". The maximum relative error for this approximation is less than 2^-28, and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_note]</description>
+	<operation>
+dst[31:0] := RCP_28_DP(1.0/b[31:0];
+dst[127:32] := a[127:32];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrcp28ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rcp28_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+dst[31:0] := RCP_28_DP(1.0/b[31:0];
+dst[127:32] := a[127:32];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrcp28ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rcp28_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+IF k[0] THEN
+	dst[31:0] := RCP_28_DP(1.0/b[31:0];
+ELSE
+	dst[31:0] := src[31:0];
+FI
+dst[127:32] := a[127:32];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrcp28ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rcp28_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0] THEN
+	dst[31:0] := RCP_28_DP(1.0/b[31:0];
+ELSE
+	dst[31:0] := src[31:0];
+FI
+dst[127:32] := a[127:32];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrcp28ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rcp28_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+IF k[0] THEN
+	dst[31:0] := RCP_28_DP(1.0/b[31:0];
+ELSE
+	dst[31:0] := 0;
+FI
+dst[127:32] := a[127:32];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrcp28ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rcp28_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0] THEN
+	dst[31:0] := RCP_28_DP(1.0/b[31:0];
+ELSE
+	dst[31:0] := 0;
+FI
+dst[127:32] := a[127:32];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrcp28ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_rcp28_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	dst[i+31:i] := RCP_28_SP(1.0/a[i+31:i];
+ENDFOR;
+	</operation>
+	<instruction name='vrcp28ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_rcp28_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	dst[i+31:i] := RCP_28_SP(1.0/a[i+31:i];
+ENDFOR;
+	</operation>
+	<instruction name='vrcp28ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_rcp28_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	IF k[j] THEN
+		dst[i+31:i] := RCP_28_SP(1.0/a[i+31:i];
+	ELSE
+		dst[i+31:i] := src[i+31:i];
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrcp28ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_rcp28_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	IF k[j] THEN
+		dst[i+31:i] := RCP_28_SP(1.0/a[i+31:i];
+	ELSE
+		dst[i+31:i] := src[i+31:i];
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrcp28ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_rcp28_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	IF k[j] THEN
+		dst[i+31:i] := RCP_28_SP(1.0/a[i+31:i];
+	ELSE
+		dst[i+31:i] := 0;
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrcp28ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_rcp28_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	IF k[j] THEN
+		dst[i+31:i] := RCP_28_SP(1.0/a[i+31:i];
+	ELSE
+		dst[i+31:i] := 0;
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrcp28ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_rcp28_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	dst[i+63:i] := RCP_28_SP(1.0/a[i+63:i];
+ENDFOR;
+	</operation>
+	<instruction name='vrcp28pd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_rcp28_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	dst[i+63:i] := RCP_28_SP(1.0/a[i+63:i];
+ENDFOR;
+	</operation>
+	<instruction name='vrcp28pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_rcp28_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF k[j] THEN
+		dst[i+63:i] := RCP_28_SP(1.0/a[i+63:i];
+	ELSE
+		dst[i+63:i] := src[i+63:i];
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrcp28pd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_rcp28_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF k[j] THEN
+		dst[i+63:i] := RCP_28_SP(1.0/a[i+63:i];
+	ELSE
+		dst[i+63:i] := src[i+63:i];
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrcp28pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_rcp28_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF k[j] THEN
+		dst[i+63:i] := RCP_28_SP(1.0/a[i+63:i];
+	ELSE
+		dst[i+63:i] := 0;
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrcp28pd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_rcp28_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF k[j] THEN
+		dst[i+63:i] := RCP_28_SP(1.0/a[i+63:i];
+	ELSE
+		dst[i+63:i] := 0;
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrcp28pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rsqrt28_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+dst[63:0] := (1.0/SQRT(b[63:0]));
+dst[127:64] := a[127:64];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrsqrt28sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rsqrt28_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+dst[63:0] := (1.0/SQRT(b[63:0]));
+dst[127:64] := a[127:64];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrsqrt28sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rsqrt28_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+IF k[0] THEN
+	dst[63:0] := (1.0/SQRT(b[63:0]));
+ELSE
+	dst[63:0] := src[63:0];
+FI
+dst[127:64] := a[127:64];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrsqrt28sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rsqrt28_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0] THEN
+	dst[63:0] := (1.0/SQRT(b[63:0]));
+ELSE
+	dst[63:0] := src[63:0];
+FI
+dst[127:64] := a[127:64];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrsqrt28sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rsqrt28_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+IF k[0] THEN
+	dst[63:0] := (1.0/SQRT(b[63:0]));
+ELSE
+	dst[63:0] := 0;
+FI
+dst[127:64] := a[127:64];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrsqrt28sd' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rsqrt28_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0] THEN
+	dst[63:0] := (1.0/SQRT(b[63:0]));
+ELSE
+	dst[63:0] := 0;
+FI
+dst[127:64] := a[127:64];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrsqrt28sd' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rsqrt28_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+dst[31:0] := (1.0/SQRT(b[31:0]));
+dst[127:32] := a[127:32];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrsqrt28ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rsqrt28_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+dst[31:0] := (1.0/SQRT(b[31:0]));
+dst[127:32] := a[127:32];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrsqrt28ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rsqrt28_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+IF k[0] THEN
+	dst[31:0] := (1.0/SQRT(b[31:0]));
+ELSE
+	dst[31:0] := src[31:0];
+FI
+dst[127:32] := a[127:32];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrsqrt28ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rsqrt28_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0] THEN
+	dst[31:0] := (1.0/SQRT(b[31:0]));
+ELSE
+	dst[31:0] := src[31:0];
+FI
+dst[127:32] := a[127:32];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrsqrt28ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rsqrt28_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+IF k[0] THEN
+	dst[31:0] := (1.0/SQRT(b[31:0]));
+ELSE
+	dst[31:0] := 0;
+FI
+dst[127:32] := a[127:32];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrsqrt28ss' form='xmm {k}, xmm, xmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rsqrt28_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0] THEN
+	dst[31:0] := (1.0/SQRT(b[31:0]));
+ELSE
+	dst[31:0] := 0;
+FI
+dst[127:32] := a[127:32];
+dst[MAX:128] := 0;
+	</operation>
+	<instruction name='vrsqrt28ss' form='xmm {k}, xmm, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_rsqrt28_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	dst[i+31:i] := (1.0/SQRT(a[i+31:i]));
+ENDFOR;
+	</operation>
+	<instruction name='vrsqrt28ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_rsqrt28_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	dst[i+31:i] := (1.0/SQRT(a[i+31:i]));
+ENDFOR;
+	</operation>
+	<instruction name='vrsqrt28ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_rsqrt28_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	IF k[j] THEN
+		dst[i+31:i] := (1.0/SQRT(a[i+31:i]));
+	ELSE
+		dst[i+31:i] := src[i+31:i];
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrsqrt28ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_rsqrt28_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	IF k[j] THEN
+		dst[i+31:i] := (1.0/SQRT(a[i+31:i]));
+	ELSE
+		dst[i+31:i] := src[i+31:i];
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrsqrt28ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_rsqrt28_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	IF k[j] THEN
+		dst[i+31:i] := (1.0/SQRT(a[i+31:i]));
+	ELSE
+		dst[i+31:i] := 0;
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrsqrt28ps' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_rsqrt28_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32;
+	IF k[j] THEN
+		dst[i+31:i] := (1.0/SQRT(a[i+31:i]));
+	ELSE
+		dst[i+31:i] := 0;
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrsqrt28ps' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_rsqrt28_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	dst[i+63:i] := (1.0/SQRT(a[i+63:i]));
+ENDFOR;
+	</operation>
+	<instruction name='vrsqrt28pd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_rsqrt28_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	dst[i+63:i] := (1.0/SQRT(a[i+63:i]));
+ENDFOR;
+	</operation>
+	<instruction name='vrsqrt28pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_rsqrt28_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF k[j] THEN
+		dst[i+63:i] := (1.0/SQRT(a[i+63:i]));
+	ELSE
+		dst[i+63:i] := src[i+63:i];
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrsqrt28pd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_rsqrt28_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF k[j] THEN
+		dst[i+63:i] := (1.0/SQRT(a[i+63:i]));
+	ELSE
+		dst[i+63:i] := src[i+63:i];
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrsqrt28pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_rsqrt28_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF k[j] THEN
+		dst[i+63:i] := (1.0/SQRT(a[i+63:i]));
+	ELSE
+		dst[i+63:i] := 0;
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrsqrt28pd' form='zmm {k}, zmm {er}'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_rsqrt28_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64;
+	IF k[j] THEN
+		dst[i+63:i] := (1.0/SQRT(a[i+63:i]));
+	ELSE
+		dst[i+63:i] := 0;
+	FI
+ENDFOR;
+	</operation>
+	<instruction name='vrsqrt28pd' form='zmm {k}, zmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='int' name='_mm_tzcnt_32'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned int' varname='a' />
+	<description>Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst".</description>
+	<operation>
+tmp := 0
+dst := 0
+DO WHILE ((tmp &lt; 32) AND a[tmp] = 0)
+	tmp := tmp + 1
+	dst := dst + 1
+OD	
+	</operation>
+	<instruction name='tzcnt' form='r32, r32'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" rettype='__int64' name='_mm_tzcnt_64'>
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter type='unsigned __int64' varname='a' />
+	<description>Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst".</description>
+	<operation>
+tmp := 0
+dst := 0
+DO WHILE ((tmp &lt; 64) AND a[tmp] = 0)
+	tmp := tmp + 1
+	dst := dst + 1
+OD	
+	</operation>
+	<instruction name='tzcnt' form='r64, r64'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_extload_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="bc" type="_MM_BROADCAST32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to single-precision (32-bit) floating-point elements, storing the results in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr = MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	CASE bc OF
+	_MM_BROADCAST32_NONE:
+		CASE conv OF
+		_MM_UPCONV_PS_NONE:
+			n	 := j*32
+			dst[i+31:i] := addr[n+31:n]
+		_MM_UPCONV_PS_FLOAT16:
+			n	 := j*16
+			dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
+		_MM_UPCONV_PS_UINT8:
+			n	 := j*8
+			dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
+		_MM_UPCONV_PS_SINT8:
+			n	 := j*8
+			dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
+		_MM_UPCONV_PS_UINT16:
+			n	 := j*16
+			dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
+		_MM_UPCONV_PS_SINT16:
+			n	 := j*16
+			dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
+		ESAC
+	_MM_BROADCAST_1X16:
+		CASE conv OF
+		_MM_UPCONV_PS_NONE:
+			n	 := j*32
+			dst[i+31:i] := addr[31:0]
+		_MM_UPCONV_PS_FLOAT16:
+			n	 := j*16
+			dst[i+31:i] := Float16ToFloat32(addr[15:0])
+		_MM_UPCONV_PS_UINT8:
+			n	 := j*8
+			dst[i+31:i] := UInt8ToFloat32(addr[7:0])
+		_MM_UPCONV_PS_SINT8:
+			n	 := j*8
+			dst[i+31:i] := SInt8ToFloat32(addr[7:0])
+		_MM_UPCONV_PS_UINT16:
+			n	 := j*16
+			dst[i+31:i] := UInt16ToFloat32(addr[15:0])
+		_MM_UPCONV_PS_SINT16:
+			n	 := j*16
+			dst[i+31:i] := SInt16ToFloat32(addr[15:0])
+		ESAC
+	_MM_BROADCAST_4X16:
+		mod := j%4
+		CASE conv OF
+		_MM_UPCONV_PS_NONE:
+			n := mod*32
+			dst[i+31:i] := addr[n+31:n]
+		_MM_UPCONV_PS_FLOAT16:
+			n := mod*16
+			dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
+		_MM_UPCONV_PS_UINT8:
+			n := mod*8
+			dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
+		_MM_UPCONV_PS_SINT8:
+			n := mod*8
+			dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
+		_MM_UPCONV_PS_UINT16:
+			n := mod*16
+			dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
+		_MM_UPCONV_PS_SINT16:
+			n := mod*16
+			dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
+		ESAC
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovaps" form="zmm {k}, m512" xed=""/>
+	<instruction name="vbroadcastf32x4" form="zmm {k}, m512" xed=""/>
+	<instruction name="vbroadcastss" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_extload_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="bc" type="_MM_BROADCAST32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to single-precision (32-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr = MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		CASE bc OF
+		_MM_BROADCAST32_NONE:
+			CASE conv OF
+			_MM_UPCONV_PS_NONE:
+				n	 := j*32
+				dst[i+31:i] := addr[n+31:n]
+			_MM_UPCONV_PS_FLOAT16:
+				n	 := j*16
+				dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
+			_MM_UPCONV_PS_UINT8:
+				n	 := j*8
+				dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
+			_MM_UPCONV_PS_SINT8:
+				n	 := j*8
+				dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
+			_MM_UPCONV_PS_UINT16:
+				n	 := j*16
+				dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
+			_MM_UPCONV_PS_SINT16:
+				n	 := j*16
+				dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
+			ESAC
+		_MM_BROADCAST_1X16:
+			CASE conv OF
+			_MM_UPCONV_PS_NONE:
+				n	 := j*32
+				dst[i+31:i] := addr[31:0]
+			_MM_UPCONV_PS_FLOAT16:
+				n	 := j*16
+				dst[i+31:i] := Float16ToFloat32(addr[15:0])
+			_MM_UPCONV_PS_UINT8:
+				n	 := j*8
+				dst[i+31:i] := UInt8ToFloat32(addr[7:0])
+			_MM_UPCONV_PS_SINT8:
+				n	 := j*8
+				dst[i+31:i] := SInt8ToFloat32(addr[7:0])
+			_MM_UPCONV_PS_UINT16:
+				n	 := j*16
+				dst[i+31:i] := UInt16ToFloat32(addr[15:0])
+			_MM_UPCONV_PS_SINT16:
+				n	 := j*16
+				dst[i+31:i] := SInt16ToFloat32(addr[15:0])
+			ESAC
+		_MM_BROADCAST_4X16:
+			mod := j%4
+			CASE conv OF
+			_MM_UPCONV_PS_NONE:
+				n := mod*32
+				dst[i+31:i] := addr[n+31:n]
+			_MM_UPCONV_PS_FLOAT16:
+				n := mod*16
+				dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
+			_MM_UPCONV_PS_UINT8:
+				n := mod*8
+				dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
+			_MM_UPCONV_PS_SINT8:
+				n := mod*8
+				dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
+			_MM_UPCONV_PS_UINT16:
+				n := mod*16
+				dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
+			_MM_UPCONV_PS_SINT16:
+				n := mod*16
+				dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
+			ESAC
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovaps" form="zmm {k}, m512" xed=""/>
+	<instruction name="vbroadcastf32x4" form="zmm {k}, m512" xed=""/>
+	<instruction name="vbroadcastss" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_extload_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
+	<parameter varname="bc" type="_MM_BROADCAST32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to 32-bit integer elements, storing the results in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr = MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	CASE bc OF
+	_MM_BROADCAST32_NONE:
+		CASE conv OF
+		_MM_UPCONV_EPI32_NONE:
+			n	 := j*32
+			dst[i+31:i] := addr[n+31:n]
+		_MM_UPCONV_EPI32_UINT8:
+			n	 := j*8
+			dst[i+31:i] := UInt8ToInt32(addr[n+7:n])
+		_MM_UPCONV_EPI32_SINT8:
+			n	 := j*8
+			dst[i+31:i] := SInt8ToInt32(addr[n+7:n])
+		_MM_UPCONV_EPI32_UINT16:
+			n	 := j*16
+			dst[i+31:i] := UInt16ToInt32(addr[n+15:n])
+		_MM_UPCONV_EPI32_SINT16:
+			n	 := j*16
+			dst[i+31:i] := SInt16ToInt32(addr[n+15:n])
+		ESAC
+	_MM_BROADCAST_1X16:
+		CASE conv OF
+		_MM_UPCONV_EPI32_NONE:
+			n	 := j*32
+			dst[i+31:i] := addr[31:0]
+		_MM_UPCONV_EPI32_UINT8:
+			n	 := j*8
+			dst[i+31:i] := UInt8ToInt32(addr[7:0])
+		_MM_UPCONV_EPI32_SINT8:
+			n	 := j*8
+			dst[i+31:i] := SInt8ToInt32(addr[7:0])
+		_MM_UPCONV_EPI32_UINT16:
+			n	 := j*16
+			dst[i+31:i] := UInt16ToInt32(addr[15:0])
+		_MM_UPCONV_EPI32_SINT16:
+			n	 := j*16
+			dst[i+31:i] := SInt16ToInt32(addr[15:0])
+		ESAC
+	_MM_BROADCAST_4X16:
+		mod := j%4
+		CASE conv OF
+		_MM_UPCONV_EPI32_NONE:
+			n := mod*32
+			dst[i+31:i] := addr[n+31:n]
+		_MM_UPCONV_EPI32_UINT8:
+			n := mod*8
+			dst[i+31:i] := UInt8ToInt32(addr[n+7:n])
+		_MM_UPCONV_EPI32_SINT8:
+			n := mod*8
+			dst[i+31:i] := SInt8ToInt32(addr[n+7:n])
+		_MM_UPCONV_EPI32_UINT16:
+			n := mod*16
+			dst[i+31:i] := UInt16ToInt32(addr[n+15:n])
+		_MM_UPCONV_EPI32_SINT16:
+			n := mod*16
+			dst[i+31:i] := SInt16ToInt32(addr[n+15:n])
+		ESAC
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovdqa32" form="zmm {k}, m512" xed=""/>
+	<instruction name="vbroadcasti32x4" form="zmm {k}, m512" xed=""/>
+	<instruction name="vpbroadcastd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_extload_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
+	<parameter varname="bc" type="_MM_BROADCAST32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to 32-bit integer elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr = MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		CASE bc OF
+		_MM_BROADCAST32_NONE:
+			CASE conv OF
+			_MM_UPCONV_EPI32_NONE:
+				n	 := j*32
+				dst[i+31:i] := addr[n+31:n]
+			_MM_UPCONV_EPI32_UINT8:
+				n	 := j*8
+				dst[i+31:i] := UInt8ToInt32(addr[n+7:n])
+			_MM_UPCONV_EPI32_SINT8:
+				n	 := j*8
+				dst[i+31:i] := SInt8ToInt32(addr[n+7:n])
+			_MM_UPCONV_EPI32_UINT16:
+				n	 := j*16
+				dst[i+31:i] := UInt16ToInt32(addr[n+15:n])
+			_MM_UPCONV_EPI32_SINT16:
+				n	 := j*16
+				dst[i+31:i] := SInt16ToInt32(addr[n+15:n])
+			ESAC
+		_MM_BROADCAST_1X16:
+			CASE conv OF
+			_MM_UPCONV_EPI32_NONE:
+				n	 := j*32
+				dst[i+31:i] := addr[31:0]
+			_MM_UPCONV_EPI32_UINT8:
+				n	 := j*8
+				dst[i+31:i] := UInt8ToInt32(addr[7:0])
+			_MM_UPCONV_EPI32_SINT8:
+				n	 := j*8
+				dst[i+31:i] := SInt8ToInt32(addr[7:0])
+			_MM_UPCONV_EPI32_UINT16:
+				n	 := j*16
+				dst[i+31:i] := UInt16ToInt32(addr[15:0])
+			_MM_UPCONV_EPI32_SINT16:
+				n	 := j*16
+				dst[i+31:i] := SInt16ToInt32(addr[15:0])
+			ESAC
+		_MM_BROADCAST_4X16:
+			mod := j%4
+			CASE conv OF
+			_MM_UPCONV_EPI32_NONE:
+				n := mod*32
+				dst[i+31:i] := addr[n+31:n]
+			_MM_UPCONV_EPI32_UINT8:
+				n := mod*8
+				dst[i+31:i] := UInt8ToInt32(addr[n+7:n])
+			_MM_UPCONV_EPI32_SINT8:
+				n := mod*8
+				dst[i+31:i] := SInt8ToInt32(addr[n+7:n])
+			_MM_UPCONV_EPI32_UINT16:
+				n := mod*16
+				dst[i+31:i] := UInt16ToInt32(addr[n+15:n])
+			_MM_UPCONV_EPI32_SINT16:
+				n := mod*16
+				dst[i+31:i] := SInt16ToInt32(addr[n+15:n])
+			ESAC
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovdqa32" form="zmm {k}, m512" xed=""/>
+	<instruction name="vbroadcasti32x4" form="zmm {k}, m512" xed=""/>
+	<instruction name="vpbroadcastd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_extload_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
+	<parameter varname="bc" type="_MM_BROADCAST64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Depending on "bc", loads 1, 4, or 8 elements of type and size determined by "conv" from memory address "mt" and converts all elements to double-precision (64-bit) floating-point elements, storing the results in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr = MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	CASE bc OF
+	_MM_BROADCAST64_NONE:
+		CASE conv OF
+		_MM_UPCONV_PD_NONE:
+			n := j*64
+			dst[i+63:i] := addr[n+63:n]
+		ESAC
+	_MM_BROADCAST_1X8:
+		CASE conv OF
+		_MM_UPCONV_PD_NONE:
+			n := j*64
+			dst[i+63:i] := addr[63:0]
+		ESAC
+	_MM_BROADCAST_4X8:
+		mod := j%4
+		CASE conv OF
+		_MM_UPCONV_PD_NONE:
+			n := mod*64
+			dst[i+63:i] := addr[n+63:n]
+		ESAC
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovapd" form="zmm {k}, m512" xed=""/>
+	<instruction name="vbroadcastf64x4" form="zmm {k}, m512" xed=""/>
+	<instruction name="vbroadcastsd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_extload_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
+	<parameter varname="bc" type="_MM_BROADCAST64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Depending on "bc", loads 1, 4, or 8 elements of type and size determined by "conv" from memory address "mt" and converts all elements to double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr = MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		CASE bc OF
+		_MM_BROADCAST64_NONE:
+			CASE conv OF
+			_MM_UPCONV_PD_NONE:
+				n := j*64
+				dst[i+63:i] := addr[n+63:n]
+			ESAC
+		_MM_BROADCAST_1X8:
+			CASE conv OF
+			_MM_UPCONV_PD_NONE:
+				n := j*64
+				dst[i+63:i] := addr[63:0]
+			ESAC
+		_MM_BROADCAST_4X8:
+			mod := j%4
+			CASE conv OF
+			_MM_UPCONV_PD_NONE:
+				n := mod*64
+				dst[i+63:i] := addr[n+63:n]
+			ESAC
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovapd" form="zmm {k}, m512" xed=""/>
+	<instruction name="vbroadcastf64x4" form="zmm {k}, m512" xed=""/>
+	<instruction name="vbroadcastsd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_extload_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
+	<parameter varname="bc" type="_MM_BROADCAST64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Depending on "bc", loads 1, 4, or 8 elements of type and size determined by "conv" from memory address "mt" and converts all elements to 64-bit integer elements, storing the results in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr = MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	CASE bc OF
+	_MM_BROADCAST64_NONE:
+		CASE conv OF
+		_MM_UPCONV_EPI64_NONE:
+			n := j*64
+			dst[i+63:i] := addr[n+63:n]
+		ESAC
+	_MM_BROADCAST_1X8:
+		CASE conv OF
+		_MM_UPCONV_EPI64_NONE:
+			n := j*64
+			dst[i+63:i] := addr[63:0]
+		ESAC
+	_MM_BROADCAST_4X8:
+		mod := j%4
+		CASE conv OF
+		_MM_UPCONV_EPI64_NONE:
+			n := mod*64
+			dst[i+63:i] := addr[n+63:n]
+		ESAC
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovdqa64" form="zmm {k}, m512" xed=""/>
+	<instruction name="vbroadcasti64x4" form="zmm {k}, m512" xed=""/>
+	<instruction name="vpbroadcastq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_extload_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
+	<parameter varname="bc" type="_MM_BROADCAST64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Depending on "bc", loads 1, 4, or 8 elements of type and size determined by "conv" from memory address "mt" and converts all elements to 64-bit integer elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr = MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		CASE bc OF
+		_MM_BROADCAST64_NONE:
+			CASE conv OF
+			_MM_UPCONV_EPI64_NONE:
+				n := j*64
+				dst[i+63:i] := addr[n+63:n]
+			ESAC
+		_MM_BROADCAST_1X8:
+			CASE conv OF
+			_MM_UPCONV_EPI64_NONE:
+				n := j*64
+				dst[i+63:i] := addr[63:0]
+			ESAC
+		_MM_BROADCAST_4X8:
+			mod := j%4
+			CASE conv OF
+			_MM_UPCONV_EPI64_NONE:
+				n := mod*64
+				dst[i+63:i] := addr[n+63:n]
+			ESAC
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovdqa64" form="m512 {k}, zmm" xed=""/>
+	<instruction name="vbroadcasti64x4" form="zmm {k}, m512" xed=""/>
+	<instruction name="vpbroadcastq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_swizzle_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="v" type="__m512"/>
+	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
+	<description>Performs a swizzle transformation of each of the four groups of packed 4xsingle-precision (32-bit) floating-point elements in "v" using swizzle parameter "s", storing the results in "dst".</description>
+	<operation>
+CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 7
+		i := j*64
+		dst[i+31:i]    := v[i+63:i+32]
+		dst[i+63:i+32] := v[i+31:i]
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]     := v[i+95:i+64]
+		dst[i+63:i+32]  := v[i+127:i+96]
+		dst[i+95:i+64]  := v[i+31:i]
+		dst[i+127:i+96] := v[i+63:i+32]
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]     := v[i+31:i]
+		dst[i+63:i+32]  := v[i+31:i]
+		dst[i+95:i+64]  := v[i+31:i]
+		dst[i+127:i+96] := v[i+31:i]
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]     := v[i+63:i+32]
+		dst[i+63:i+32]  := v[i+63:i+32]
+		dst[i+95:i+64]  := v[i+63:i+32]
+		dst[i+127:i+96] := v[i+63:i+32]
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]     := v[i+95:i+64]
+		dst[i+63:i+32]  := v[i+95:i+64]
+		dst[i+95:i+64]  := v[i+95:i+64]
+		dst[i+127:i+96] := v[i+95:i+64]
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]     := v[i+127:i+96]
+		dst[i+63:i+32]  := v[i+127:i+96]
+		dst[i+95:i+64]  := v[i+127:i+96]
+		dst[i+127:i+96] := v[i+127:i+96]
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]     := v[i+63:i+32]
+		dst[i+63:i+32]  := v[i+95:i+64]
+		dst[i+95:i+64]  := v[i+31:i]
+		dst[i+127:i+96] := v[i+127:i+96]
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_swizzle_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="v" type="__m512d"/>
+	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
+	<description>Performs a swizzle transformation of each of the two groups of packed 4x double-precision (64-bit) floating-point elements in "v" using swizzle parameter "s", storing the results in "dst".</description>
+	<operation>
+CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 3
+		i := j*64
+		dst[i+63:i]     := v[i+127:i+64]
+		dst[i+127:i+64] := v[i+63:i]
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]      := v[i+191:i+128]
+		dst[i+127:i+64]  := v[i+255:i+192]
+		dst[i+191:i+128] := v[i+63:i]
+		dst[i+255:i+192] := v[i+127:i+64]
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]      := v[i+63:i]
+		dst[i+127:i+64]  := v[i+63:i]
+		dst[i+191:i+128] := v[i+63:i]
+		dst[i+255:i+192] := v[i+63:i]
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]      := v[i+127:i+63]
+		dst[i+127:i+64]  := v[i+127:i+63]
+		dst[i+191:i+128] := v[i+127:i+63]
+		dst[i+255:i+192] := v[i+127:i+63]
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]      := v[i+191:i+128]
+		dst[i+127:i+64]  := v[i+191:i+128]
+		dst[i+191:i+128] := v[i+191:i+128]
+		dst[i+255:i+192] := v[i+191:i+128]
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+255:i+192]
+		dst[i+127:i+64]  := v[i+255:i+192]
+		dst[i+191:i+128] := v[i+255:i+192]
+		dst[i+255:i+192] := v[i+255:i+192]
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+127:i+64]
+		dst[i+127:i+64]  := v[i+191:i+128]
+		dst[i+191:i+128] := v[i+63:i]
+		dst[i+255:i+192] := v[i+255:i+192]
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_swizzle_epi32" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="v" type="__m512i"/>
+	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
+	<description>Performs a swizzle transformation of each of the four groups of packed 4x 32-bit integer elements in "v" using swizzle parameter "s", storing the results in "dst".</description>
+	<operation>
+CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 7
+		i := j*64
+		dst[i+31:i]    := v[i+63:i+32]
+		dst[i+63:i+32] := v[i+31:i]
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]	    := v[i+95:i+64]
+		dst[i+63:i+32]  := v[i+127:i+96]
+		dst[i+95:i+64]  := v[i+31:i]
+		dst[i+127:i+96] := v[i+63:i+32]
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]	    := v[i+31:i]
+		dst[i+63:i+32]  := v[i+31:i]
+		dst[i+95:i+64]  := v[i+31:i]
+		dst[i+127:i+96] := v[i+31:i]
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]	    := v[i+63:i+32]
+		dst[i+63:i+32]  := v[i+63:i+32]
+		dst[i+95:i+64]  := v[i+63:i+32]
+		dst[i+127:i+96] := v[i+63:i+32]
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]	    := v[i+95:i+64]
+		dst[i+63:i+32]  := v[i+95:i+64]
+		dst[i+95:i+64]  := v[i+95:i+64]
+		dst[i+127:i+96] := v[i+95:i+64]
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]	    := v[i+127:i+96]
+		dst[i+63:i+32]  := v[i+127:i+96]
+		dst[i+95:i+64]  := v[i+127:i+96]
+		dst[i+127:i+96] := v[i+127:i+96]
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]	    := v[i+63:i+32]
+		dst[i+63:i+32]  := v[i+95:i+64]
+		dst[i+95:i+64]  := v[i+31:i]
+		dst[i+127:i+96] := v[i+127:i+96]
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_swizzle_epi64" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="v" type="__m512i"/>
+	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
+	<description>Performs a swizzle transformation of each of the two groups of packed 4x64-bit integer elements in "v" using swizzle parameter "s", storing the results in "dst".</description>
+	<operation>
+CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 3
+		i := j*64
+		dst[i+63:i]	    := v[i+127:i+64]
+		dst[i+127:i+64] := v[i+63:i]
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+191:i+128]
+		dst[i+127:i+64]  := v[i+255:i+192]
+		dst[i+191:i+128] := v[i+63:i]
+		dst[i+255:i+192] := v[i+127:i+64]
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+63:i]
+		dst[i+127:i+64]  := v[i+63:i]
+		dst[i+191:i+128] := v[i+63:i]
+		dst[i+255:i+192] := v[i+63:i]
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+127:i+63]
+		dst[i+127:i+64]  := v[i+127:i+63]
+		dst[i+191:i+128] := v[i+127:i+63]
+		dst[i+255:i+192] := v[i+127:i+63]
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+191:i+128]
+		dst[i+127:i+64]  := v[i+191:i+128]
+		dst[i+191:i+128] := v[i+191:i+128]
+		dst[i+255:i+192] := v[i+191:i+128]
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+255:i+192]
+		dst[i+127:i+64]  := v[i+255:i+192]
+		dst[i+191:i+128] := v[i+255:i+192]
+		dst[i+255:i+192] := v[i+255:i+192]
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+127:i+64]
+		dst[i+127:i+64]  := v[i+191:i+128]
+		dst[i+191:i+128] := v[i+63:i]
+		dst[i+255:i+192] := v[i+255:i+192]
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_swizzle_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v" type="__m512"/>
+	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
+	<description>Performs a swizzle transformation of each of the four groups of packed 4x single-precision (32-bit) floating-point elements in "v" using swizzle parameter "s", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 7
+		i := j*64
+		IF k[j*2]
+			dst[i+31:i]	:= v[i+63:i+32]
+		ELSE
+			dst[i+31:i]	:= src[i+31:i]
+		FI
+		IF k[j*2+1]
+			dst[i+63:i+32] := v[i+31:i]
+		ELSE
+			dst[i+63:i+32] := src[i+63:i+32]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+95:i+64]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+127:i+96]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+31:i]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+63:i+32]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+31:i]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+31:i]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+31:i]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+31:i]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+63:i+32]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+63:i+32]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+63:i+32]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+63:i+32]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+95:i+64]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+95:i+64]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+95:i+64]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+95:i+64]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+127:i+96]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+127:i+96]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+127:i+96]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+127:i+96]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+63:i+32]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+95:i+64]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+31:i]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+127:i+96]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_swizzle_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v" type="__m512d"/>
+	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
+	<description>Performs a swizzle transformation of each of the two groups of packed 4x double-precision (64-bit) floating-point elements in "v" using swizzle parameter "s", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 3
+		i := j*64
+		IF k[j*2]
+			dst[i+63:i]	 := v[i+127:i+64]
+		ELSE
+			dst[i+63:i]	 := src[i+63:i]
+		FI
+		IF k[j*2+1]
+			dst[i+127:i+64] := v[i+63:i]
+		ELSE
+			dst[i+127:i+64] := src[i+127:i+64]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+191:i+128]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+255:i+192]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+63:i]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+127:i+64]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+63:i]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+63:i]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+63:i]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+63:i]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+127:i+63]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+127:i+63]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+127:i+63]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+127:i+63]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+191:i+128]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+191:i+128]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+191:i+128]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+191:i+128]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+255:i+192]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+255:i+192]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+255:i+192]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+255:i+192]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+127:i+64]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+191:i+128]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+63:i]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+255:i+192]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_swizzle_epi32" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v" type="__m512i"/>
+	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
+	<description>Performs a swizzle transformation of each of the four groups of packed 4x32-bit integer elements in "v" using swizzle parameter "s", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 7
+		i := j*64
+		IF k[j*2]
+			dst[i+31:i]	:= v[i+63:i+32]
+		ELSE
+			dst[i+31:i]	:= src[i+31:i]
+		FI
+		IF k[j*2+1]
+			dst[i+63:i+32] := v[i+31:i]
+		ELSE
+			dst[i+63:i+32] := src[i+63:i+32]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+95:i+64]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+127:i+96]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+31:i]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+63:i+32]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+31:i]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+31:i]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+31:i]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+31:i]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+63:i+32]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+63:i+32]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+63:i+32]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+63:i+32]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+95:i+64]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+95:i+64]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+95:i+64]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+95:i+64]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+127:i+96]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+127:i+96]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+127:i+96]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+127:i+96]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+63:i+32]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+95:i+64]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+31:i]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+127:i+96]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_swizzle_epi64" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v" type="__m512i"/>
+	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
+	<description>Performs a swizzle transformation of each of the four groups of packed 4x64-bit integer elements in "v" using swizzle parameter "s", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 3
+		i := j*64
+		IF k[j*2]
+			dst[i+63:i]	 := v[i+127:i+64]
+		ELSE
+			dst[i+63:i]	 := src[i+63:i]
+		FI
+		IF k[j*2+1]
+			dst[i+127:i+64] := v[i+63:i]
+		ELSE
+			dst[i+127:i+64] := src[i+127:i+64]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+191:i+128]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+255:i+192]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+63:i]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+127:i+64]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+63:i]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+63:i]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+63:i]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+63:i]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+127:i+63]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+127:i+63]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+127:i+63]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+127:i+63]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+191:i+128]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+191:i+128]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+191:i+128]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+191:i+128]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+255:i+192]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+255:i+192]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+255:i+192]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+255:i+192]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+127:i+64]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+191:i+128]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+63:i]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+255:i+192]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_extstore_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v" type="__m512"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Downconverts packed single-precision (32-bit) floating-point elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr := MEM[mt]		
+FOR j := 0 to 15
+	i := j*32
+	CASE conv OF
+	_MM_DOWNCONV_PS_NONE:
+		addr[i+31:i] := v[i+31:i]
+	_MM_DOWNCONV_PS_FLOAT16:
+		n := j*16
+		addr[n+15:n] := Float32ToFloat16(v[i+31:i])
+	_MM_DOWNCONV_PS_UINT8:
+		n := j*8
+		addr[n+7:n] := Float32ToUInt8(v[i+31:i])
+	_MM_DOWNCONV_PS_SINT8:
+		n := j*8
+		addr[n+7:n] := Float32ToSInt8(v[i+31:i])
+	_MM_DOWNCONV_PS_UINT16:
+		n := j*16
+		addr[n+15:n] := Float32ToUInt16(v[i+31:i])
+	_MM_DOWNCONV_PS_SINT16:
+		n := j*16
+		addr[n+15:n] := Float32ToSInt16(v[i+31:i])
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="vmovaps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_extstore_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Downconverts packed 32-bit integer elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	CASE conv OF
+	_MM_DOWNCONV_EPI32_NONE:
+		addr[i+31:i] := v[i+31:i]
+	_MM_DOWNCONV_EPI32_UINT8:
+		n := j*8
+		addr[n+7:n] := Int32ToUInt8(v[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT8:
+		n := j*8
+		addr[n+7:n] := Int32ToSInt8(v[i+31:i])
+	_MM_DOWNCONV_EPI32_UINT16:
+		n := j*16
+		addr[n+15:n] := Int32ToUInt16(v[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT16:
+		n := j*16
+		addr[n+15:n] := Int32ToSInt16(v[i+31:i])
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="vmovdqa32" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_extstore_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v" type="__m512d"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Downconverts packed double-precision (64-bit) floating-point elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	CASE conv OF
+	_MM_DOWNCONV_PS_NONE:
+		addr[i+63:i] := v[i+63:i]
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="vmovapd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_extstore_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Downconverts packed 64-bit integer elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	CASE conv OF
+	_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v[i+63:i]
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="vmovdqa64" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extstore_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v" type="__m512"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Downconverts packed single-precision (32-bit) floating-point elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt" using writemask "k" (elements are not written to memory when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_PS_NONE:
+			mt[i+31:i] := v[i+31:i]
+		_MM_DOWNCONV_PS_FLOAT16:
+			n := j*16
+			mt[n+15:n] := Float32ToFloat16(v[i+31:i])
+		_MM_DOWNCONV_PS_UINT8:
+			n := j*8
+			mt[n+7:n] := Float32ToUInt8(v[i+31:i])
+		_MM_DOWNCONV_PS_SINT8:
+			n := j*8
+			mt[n+7:n] := Float32ToSInt8(v[i+31:i])
+		_MM_DOWNCONV_PS_UINT16:
+			n := j*16
+			mt[n+15:n] := Float32ToUInt16(v[i+31:i])
+		_MM_DOWNCONV_PS_SINT16:
+			n := j*16
+			mt[n+15:n] := Float32ToSInt16(v[i+31:i])
+		ESAC
+	 FI
+ENDFOR
+	</operation>
+	<instruction name="vmovaps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extstore_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v" type="__m512d"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Downconverts packed double-precision (64-bit) floating-point elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt" (elements in "mt" are unaltered when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr := MEM[mt]		
+FOR j := 0 to 7
+	i := j*64
+	CASE conv OF
+	_MM_DOWNCONV_PD_NONE:
+		IF k[j]
+			mt[i+63:i] := v[i+63:i]
+		FI
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="vmovapd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extstore_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Downconverts packed 32-bit integer elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt" (elements in "mt" are unaltered when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_EPI32_NONE:
+			addr[i+31:i] := v[i+31:i]
+		_MM_DOWNCONV_EPI32_UINT8:
+			n := j*8
+			addr[n+7:n] := Int32ToUInt8(v[i+31:i])
+		_MM_DOWNCONV_EPI32_SINT8:
+			n := j*8
+			addr[n+7:n] := Int32ToSInt8(v[i+31:i])
+		_MM_DOWNCONV_EPI32_UINT16:
+			n := j*16
+			addr[n+15:n] := Int32ToUInt16(v[i+31:i])
+		_MM_DOWNCONV_EPI32_SINT16:
+			n := j*16
+			addr[n+15:n] := Int32ToSInt16(v[i+31:i])
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovdqa32" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extstore_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Downconverts packed 64-bit integer elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt" (elements in "mt" are unaltered when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v[i+63:i]
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovdqa64" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_storenr_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v" type="__m512"/>
+	<description>Stores packed single-precision (32-bit) floating-point elements from "v" to memory address "mt" with a no-read hint to the processor.</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	addr[i+31:i] := v[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="vmovnraps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_storenr_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v" type="__m512d"/>
+	<description>Stores packed double-precision (64-bit) floating-point elements from "v" to memory address "mt" with a no-read hint to the processor.</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	addr[i+63:i] := v[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="vmovnrapd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_storenrngo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v" type="__m512"/>
+	<description>Stores packed single-precision (32-bit) floating-point elements from "v" to memory address "mt" with a no-read hint and using a weakly-ordered memory consistency model (stores performed with this function are not globally ordered, and subsequent stores from the same thread can be observed before them).</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	addr[i+31:i] := v[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="vmovnrngoaps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_storenrngo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v" type="__m512d"/>
+	<description>Stores packed double-precision (64-bit) floating-point elements from "v" to memory address "mt" with a no-read hint and using a weakly-ordered memory consistency model (stores performed with this function are not globally ordered, and subsequent stores from the same thread can be observed before them).</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	addr[i+63:i] := v[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="vmovnrngoapd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_adc_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="k2_res" type="__mmask16 *"/>
+	<description>Performs element-by-element addition of packed 32-bit integers in "v2" and "v3" and the corresponding bit in "k2", storing the result of the addition in "dst" and the result of the carry in "k2_res".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k2_res[j]   := Carry(v2[i+31:i] + v3[i+31:i] + k2[j])
+	dst[i+31:i] := v2[i+31:i] + v3[i+31:i] + k2[j]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpadcd" form="zmm {k}, k, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_adc_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="k2_res" type="__mmask16 *"/>
+	<description>Performs element-by-element addition of packed 32-bit integers in "v2" and "v3" and the corresponding bit in "k2", storing the result of the addition in "dst" and the result of the carry in "k2_res" using writemask "k1" (elements are copied from "v2" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k2_res[j]   := Carry(v2[i+31:i] + v3[i+31:i] + k2[j])
+		dst[i+31:i] := v2[i+31:i] + v3[i+31:i] + k2[j]
+	ELSE
+		dst[i+31:i] := v2[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpadcd" form="zmm {k}, k, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_addn_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="v3" type="__m512d"/>
+	<description>Performs element-by-element addition between packed double-precision (64-bit) floating-point elements in "v2" and "v3" and negates their sum, storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := -(v2[i+63:i] + v3[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vaddnpd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_addn_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="v3" type="__m512d"/>
+	<description>Performs element-by-element addition between packed double-precision (64-bit) floating-point elements in "v2" and "v3" and negates their sum, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(v2[i+63:i] + v3[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vaddnpd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_addn_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512"/>
+	<description>Performs element-by-element addition between packed single-precision (32-bit) floating-point elements in "v2" and "v3" and negates their sum, storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := -(v2[i+31:i] + v3[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vaddnps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_addn_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512"/>
+	<description>Performs element-by-element addition between packed single-precision (32-bit) floating-point elements in "v2" and "v3" and negates their sum, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(v2[i+31:i] + v3[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vaddnps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_addn_round_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="v3" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs element by element addition between packed double-precision (64-bit) floating-point elements in "v2" and "v3" and negates the sum, storing the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := -(v2[i+63:i] + v3[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vaddnpd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_addn_round_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="v3" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs element by element addition between packed double-precision (64-bit) floating-point elements in "v2" and "v3" and negates the sum, storing the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(v2[i+63:i] + v3[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vaddnpd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_addn_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs element by element addition between packed single-precision (32-bit) floating-point elements in "v2" and "v3" and negates the sum, storing the result in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := -(v2[i+31:i] + v3[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vaddnps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_addn_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs element by element addition between packed single-precision (32-bit) floating-point elements in "v2" and "v3" and negates the sum, storing the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(v2[i+31:i] + v3[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vaddnps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_subr_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="v3" type="__m512d"/>
+	<description>Performs element-by-element subtraction of packed double-precision (64-bit) floating-point elements in "v2" from "v3" storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := v3[i+63:i] - v2[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vsubrpd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_subr_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="v3" type="__m512d"/>
+	<description>Performs element-by-element subtraction of packed double-precision (64-bit) floating-point elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := v3[i+63:i] - v2[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vsubrpd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_subr_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512"/>
+	<description>Performs element-by-element subtraction of packed single-precision (32-bit) floating-point elements in "v2" from "v3" storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vsubrps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_subr_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512"/>
+	<description>Performs element-by-element subtraction of packed single-precision (32-bit) floating-point elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vsubrps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_subr_round_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="v3" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs element-by-element subtraction of packed double-precision (64-bit) floating-point elements in "v2" from "v3" storing the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := v3[i+63:i] - v2[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vsubrpd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_subr_round_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="v3" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs element-by-element subtraction of packed double-precision (64-bit) floating-point elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := v3[i+63:i] - v2[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vsubrpd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_subr_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs element-by-element subtraction of packed single-precision (32-bit) floating-point elements in "v2" from "v3" storing the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v3[i+31:i] - v2[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vsubrps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_subr_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs element-by-element subtraction of packed single-precision (32-bit) floating-point elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vsubrps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_subr_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="v3" type="__m512i"/>
+	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v2" from "v3" storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubrd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_subr_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="v3" type="__m512i"/>
+	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set)</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubrd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_addsetc_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="k2_res" type="__mmask16 *"/>
+	<description>Performs element-by-element addition of packed 32-bit integer elements in "v2" and "v3", storing the resultant carry in "k2_res" (carry flag) and the addition results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+	k2_res[j] := Carry(v2[i+31:i] + v3[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddsetcd" form="zmm {k}, k, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_addsetc_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="k_old" type="__mmask16"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="k2_res" type="__mmask16 *"/>
+	<description>Performs element-by-element addition of packed 32-bit integer elements in "v2" and "v3", storing the resultant carry in "k2_res" (carry flag) and the addition results in "dst" using writemask "k" (elements are copied from "v2" and "k_old" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+	ELSE
+		dst[i+31:i] := v2[i+31:i]
+		k2_res[j] := k_old[j]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddsetcd" form="zmm {k}, k, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_addsets_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="sign" type="__mmask16 *"/>
+	<description>Performs an element-by-element addition of packed 32-bit integer elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+	sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddsetsd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_addsets_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="sign" type="__mmask16 *"/>
+	<description>Performs an element-by-element addition of packed 32-bit integer elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag). Results are stored using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+		sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddsetsd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_addsets_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512"/>
+	<parameter varname="sign" type="__mmask16 *"/>
+	<description>Performs an element-by-element addition of packed single-precision (32-bit) floating-point elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+	sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vaddsetsps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_addsets_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512"/>
+	<parameter varname="sign" type="__mmask16 *"/>
+	<description>Performs an element-by-element addition of packed single-precision (32-bit) floating-point elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag). Results are stored using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+		sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vaddsetsps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_addsets_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512"/>
+	<parameter varname="sign" type="__mmask16 *"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs an element-by-element addition of packed single-precision (32-bit) floating-point elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+	sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vaddsetsps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_addsets_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512"/>
+	<parameter varname="sign" type="__mmask16 *"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs an element-by-element addition of packed single-precision (32-bit) floating-point elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag). Results are stored using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+		sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vaddsetsps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_subsetb_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="borrow" type="__mmask16 *"/>
+	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v3" from "v2", storing the results in "dst" and the nth borrow bit in the nth position of "borrow" (borrow flag).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i] - v3[i+31:i]
+	borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubsetbd" form="zmm {k}, k, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_subsetb_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="k_old" type="__mmask16"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="borrow" type="__mmask16 *"/>
+	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v3" from "v2", storing the results in "dst" and the nth borrow bit in the nth position of "borrow" (borrow flag). Results are stored using writemask "k" (elements are copied from "v2" and "k_old" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v2[i+31:i] - v3[i+31:i]
+		borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+		borrow[j] := k_old[j]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubsetbd" form="zmm {k}, k, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_subrsetb_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="borrow" type="__mmask16 *"/>
+	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v2" from "v3", storing the results in "dst" and "v2". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
+	borrow[j] := Borrow(v3[i+31:i] - v2[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubrsetbd" form="zmm {k}, k, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_subrsetb_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="k_old" type="__mmask16"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="borrow" type="__mmask16 *"/>
+	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v2" from "v3", storing the results in "dst" and "v2". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are written using writemask "k" (elements are copied from "k" to "k_old" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		diff := v3[i+31:i] - v2[i+31:i]
+		borrow[j] := Borrow(v3[i+31:i] - v2[i+31:i])
+		dst[i+31:i] := diff
+		v2[i+31:i] := diff
+	ELSE
+		borrow[j] := k_old[j]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubrsetbd" form="zmm {k}, k, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_sbb_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="borrow" type="__mmask16 *"/>
+	<description>Performs element-by-element three-input subtraction of packed 32-bit integer elements of "v3" as well as the corresponding bit from "k" from "v2". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are stored in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i] - v3[i+31:i] - k[j]
+	borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i] - k[j])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsbbd" form="zmm {k}, k, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_sbb_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="borrow" type="__mmask16 *"/>
+	<description>Performs element-by-element three-input subtraction of packed 32-bit integer elements of "v3" as well as the corresponding bit from "k2" from "v2". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are stored in "dst" using writemask "k1" (elements are copied from "v2" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		dst[i+31:i] := v2[i+31:i] - v3[i+31:i] - k2[j]
+		borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i] - k2[j])
+	ELSE
+		dst[i+31:i] := v2[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsbbd" form="zmm {k}, k, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_sbbr_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="borrow" type="__mmask16 *"/>
+	<description>Performs element-by-element three-input subtraction of packed 32-bit integer elements of "v2" as well as the corresponding bit from "k" from "v3". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are stored in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v3[i+31:i] - v2[i+31:i] - k[j]
+	borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i] - k[j])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsbbrd" form="zmm {k}, k, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_sbbr_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<parameter varname="v3" type="__m512i"/>
+	<parameter varname="borrow" type="__mmask16 *"/>
+	<description>Performs element-by-element three-input subtraction of packed 32-bit integer elements of "v2" as well as the corresponding bit from "k2" from "v3". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are stored in "dst" using writemask "k1" (elements are copied from "v2" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		dst[i+31:i] := v3[i+31:i] - v2[i+31:i] - k2[j]
+		borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i] - k[j])
+	ELSE
+		dst[i+31:i] := v2[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsbbrd" form="zmm {k}, k, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="v3" type="__m512i"/>
+	<description>Performs element-by-element bitwise AND between packed 32-bit integer elements of "v2" and "v3", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v2[i+31:i] &amp; v3[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpandd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_cvt_roundpd_pslo">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to packed single-precision (32-bit) floating-point elements, storing the results in "dst". Results are written to the lower half of "dst", and the upper half locations are set to '0'.
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k := j*32
+	dst[k+31:k] := Float64ToFloat32(v2[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2ps" form="zmm {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_cvt_roundpd_pslo">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to packed single-precision (32-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Results are written to the lower half of "dst", and the upper half locations are set to '0'.
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Float64ToFloat32(v2[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2ps" form="zmm {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_cvtfxpnt_roundpd_epu32lo">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to packed 32-bit unsigned integer elements, storing the results in "dst". Results are written to the lower half of "dst", and the upper half locations are set to '0'.
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k := j*32
+	dst[k+31:k] := Float64ToInt32(v2[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtfxpntpd2udq" form="zmm {k}, zmm, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_cvtfxpnt_roundpd_epu32lo">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to packed 32-bit unsigned integer elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Results are written to the lower half of "dst", and the upper half locations are set to '0'.
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Float64ToInt32(v2[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtfxpntpd2udq" form="zmm {k}, zmm, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_cvtpslo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="v2" type="__m512"/>
+	<description>Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k := j*64
+	dst[k+63:k] := Float32ToFloat64(v2[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2pd" form="zmm {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_cvtpslo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v2" type="__m512"/>
+	<description>Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[l+63:l] := Float32ToFloat64(v2[i+31:i])
+	ELSE
+		dst[l+63:l] := src[l+63:l]:
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2pd" form="zmm {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_cvtfxpnt_round_adjustps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
+	<description>Performs element-by-element conversion of packed single-precision (32-bit) floating-point elements in "v2" to packed 32-bit integer elements and performs an optional exponent adjust using "expadj", storing the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := Float32ToInt32(v2[i+31:i])
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
+	_MM_EXPADJ_4:	 dst[i+31:i] = dst[i+31:i] * 2**4
+	_MM_EXPADJ_5:	 dst[i+31:i] = dst[i+31:i] * 2**5
+	_MM_EXPADJ_8:	 dst[i+31:i] = dst[i+31:i] * 2**8
+	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
+	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
+	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
+	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtfxpntps2dq" form="zmm {k}, zmm, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_cvtfxpnt_round_adjustps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
+	<description>Performs element-by-element conversion of packed single-precision (32-bit) floating-point elements in "v2" to packed 32-bit unsigned integer elements and performing an optional exponent adjust using "expadj", storing the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := Float32ToUInt32(v2[i+31:i])
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i]  0
+	_MM_EXPADJ_4:	 dst[i+31:i] = dst[i+31:i]  4
+	_MM_EXPADJ_5:	 dst[i+31:i] = dst[i+31:i]  5
+	_MM_EXPADJ_8:	 dst[i+31:i] = dst[i+31:i]  8
+	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i]  16
+	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i]  24
+	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i]  31
+	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i]  32
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtfxpntps2udq" form="zmm {k}, zmm, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_cvtepi32lo_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="v2" type="__m512i"/>
+	<description>Performs element-by-element conversion of the lower half of packed 32-bit integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	dst[l+63:l] := Int32ToFloat64(v2[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtdq2pd" form="zmm {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_cvtepi32lo_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v2" type="__m512i"/>
+	<description>Performs element-by-element conversion of the lower half of packed 32-bit integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := j*64
+	IF k[j]
+		dst[k+63:k] := Int32ToFloat64(v2[i+31:i])
+	ELSE
+		dst[n+63:n] := src[n+63:n]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtdq2pd" form="zmm {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_cvtepu32lo_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="v2" type="__m512i"/>
+	<description>Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k := j*64
+	dst[k+63:k] := UInt32ToFloat64(v2[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtudq2pd" form="zmm {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_cvtepu32lo_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v2" type="__m512i"/>
+	<description>Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[l+63:l] := UInt32ToFloat64(v2[i+31:i])
+	ELSE
+		dst[l+63:l] := src[l+63:l]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtudq2pd" form="zmm {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_cvtfxpnt_round_adjustepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
+	<description>Performs element-by-element conversion of packed 32-bit unsigned integer elements in "v2" to packed single-precision (32-bit) floating-point elements and performing an optional exponent adjust using "expadj", storing the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := UInt32ToFloat32(v2[i+31:i])
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
+	_MM_EXPADJ_4:	 dst[i+31:i] = dst[i+31:i] * 2**4
+	_MM_EXPADJ_5:	 dst[i+31:i] = dst[i+31:i] * 2**5
+	_MM_EXPADJ_8:	 dst[i+31:i] = dst[i+31:i] * 2**8
+	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
+	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
+	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
+	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtfxpntudq2ps" form="zmm {k}, zmm, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_cvtfxpnt_round_adjustepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
+	<description>Performs element-by-element conversion of packed 32-bit unsigned integer elements in "v2" to packed single-precision (32-bit) floating-point elements and performing an optional exponent adjust using "expadj", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Int32ToFloat32(v2[i+31:i])
+		CASE expadj OF
+		_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
+		_MM_EXPADJ_4:	 dst[i+31:i] = dst[i+31:i] * 2**4
+		_MM_EXPADJ_5:	 dst[i+31:i] = dst[i+31:i] * 2**5
+		_MM_EXPADJ_8:	 dst[i+31:i] = dst[i+31:i] * 2**8
+		_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
+		_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
+		_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
+		_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtfxpntudq2ps" form="zmm {k}, zmm, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_exp223_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="v2" type="__m512i"/>
+	<description>Approximates the base-2 exponent of the packed single-precision (32-bit) floating-point elements in "v2" with eight bits for sign and magnitude and 24 bits for the fractional part. Results are stored in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := exp223(v2[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vexp223ps" form="zmm {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_exp223_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v2" type="__m512i"/>
+	<description>Approximates the base-2 exponent of the packed single-precision (32-bit) floating-point elements in "v2" with eight bits for sign and magnitude and 24 bits for the fractional part. Results are stored in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := exp223(v2[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vexp223ps" form="zmm {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_fixupnan_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="v1" type="__m512d"/>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="v3" type="__m512i"/>
+	<description>Fixes up NaN's from packed double-precision (64-bit) floating-point elements in "v1" and "v2", storing the results in "dst" and storing the quietized NaN's from "v1" in "v3".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := FixupNaNs(v1[i+63:i], v2[i+63:i])
+	v3[i+63:i] := QuietizeNaNs(v1[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vfixupnanpd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_fixupnan_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="v1" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="v3" type="__m512i"/>
+	<description>Fixes up NaN's from packed double-precision (64-bit) floating-point elements in "v1" and "v2", storing the results in "dst" using writemask "k" (only elements whose corresponding mask bit is set are used in the computation). Quietized NaN's from "v1" are stored in "v3".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FixupNaNs(v1[i+63:i], v2[i+63:i])
+		v3[i+63:i] := QuietizeNaNs(v1[i+63:i])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vfixupnanpd" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_fixupnan_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="v1" type="__m512"/>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512i"/>
+	<description>Fixes up NaN's from packed single-precision (32-bit) floating-point elements in "v1" and "v2", storing the results in "dst" and storing the quietized NaN's from "v1" in "v3".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FixupNaNs(v1[i+31:i], v2[i+31:i])
+	v3[i+31:i] := QuietizeNaNs(v1[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vfixupnanps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_fixupnan_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="v1" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v2" type="__m512"/>
+	<parameter varname="v3" type="__m512i"/>
+	<description>Fixes up NaN's from packed single-precision (32-bit) floating-point elements in "v1" and "v2", storing the results in "dst" using writemask "k" (only elements whose corresponding mask bit is set are used in the computation). Quietized NaN's from "v1" are stored in "v3".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FixupNaNs(v1[i+31:i], v2[i+31:i])
+		v3[i+31:i] := QuietizeNaNs(v1[i+31:i])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vfixupnanps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_i32extgather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 16 memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv" to 32-bit integer elements and stores them in "dst". AVX512 supports _MM_UPCONV_EPI32_NONE.</description>
+	<operation>
+FOR j := 0 to 15
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:
+		dst[i+31:i] := addr[i+31:i]
+	_MM_UPCONV_EPI32_UINT8:
+		n := j*7
+		dst[i+31:i] := UInt8ToUInt32(addr[n+7:n])
+	_MM_UPCONV_EPI32_SINT8:
+		n := j*7
+		dst[i+31:i] := Int8ToInt32(addr[n+7:n])
+	_MM_UPCONV_EPI32_UINT16:
+		n := j*16
+		dst[i+31:i] := UInt16ToUInt32(addr[n+15:n])
+	_MM_UPCONV_EPI32_SINT16:
+		n := j*16
+		dst[i+31:i] := Int16ToInt32(addr[n+15:n])
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpgatherdd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_i32extgather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 16 single-precision (32-bit) memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv" to 32-bit integer elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). AVX512 supports _MM_UPCONV_EPI32_NONE.</description>
+	<operation>
+FOR j := 0 to 15
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_EPI32_NONE:
+			dst[i+31:i] := addr[i+31:i]
+		_MM_UPCONV_EPI32_UINT8:
+			n := j*7
+			dst[i+31:i] := UInt8ToUInt32(addr[n+7:n])
+		_MM_UPCONV_EPI32_SINT8:
+			n := j*7
+			dst[i+31:i] := Int8ToInt32(addr[n+7:n])
+		_MM_UPCONV_EPI32_UINT16:
+			n := j*16
+			dst[i+31:i] := UInt16ToUInt32(addr[n+15:n])
+		_MM_UPCONV_EPI32_SINT16:
+			n := j*16
+			dst[i+31:i] := Int16ToInt32(addr[n+15:n])
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpgatherdd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_i32loextgather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 8 double-precision (64-bit) memory locations starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" using "conv" to 64-bit integer elements and stores them in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE: dst[i+63:i] := addr[i+63:i]
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpgatherdq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_i32loextgather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 8 double-precision (64-bit) memory locations starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" using "conv" to 64-bit integer elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_EPI64_NONE: dst[i+63:i] := addr[i+63:i]
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpgatherdq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_i32extgather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 16 memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv" to single-precision (32-bit) floating-point elements and stores them in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:
+		dst[i+31:i] := addr[i+31:i]
+	_MM_UPCONV_PS_FLOAT16:
+		n := j*16
+		dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
+	_MM_UPCONV_PS_UINT8:
+		n := j*8
+		dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
+	_MM_UPCONV_PS_SINT8:
+		n := j*8
+		dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
+	_MM_UPCONV_PS_UINT16:
+		n := j*16
+		dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
+	_MM_UPCONV_PS_SINT16:
+		n := j*16
+		dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgatherdps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_i32extgather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 16 single-precision (32-bit) memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv" to single-precision (32-bit) floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_PS_NONE:
+			dst[i+31:i] := addr[i+31:i]
+		_MM_UPCONV_PS_FLOAT16:
+			n := j*16
+			dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
+		_MM_UPCONV_PS_UINT8:
+			n := j*8
+			dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
+		_MM_UPCONV_PS_SINT8:
+			n := j*8
+			dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
+		_MM_UPCONV_PS_UINT16:
+			n := j*16
+			dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
+		_MM_UPCONV_PS_SINT16:
+			n := j*16
+			dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgatherdps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_i32loextgather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 8 double-precision (64-bit) floating-point elements in memory locations starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" using "conv" to 64-bit floating-point elements and stores them in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	CASE conv OF
+	_MM_UPCONV_PD_NONE: dst[i+63:i] := addr[i+63:i]
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgatherdpd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_i32loextgather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 8 double-precision (64-bit) floating-point elements in memory locations starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" using "conv" to 64-bit floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_PD_NONE: dst[i+63:i] := addr[i+63:i]
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgatherdpd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_prefetch_i32extgather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address "mv" and 32-bit integer index vector "index" with scale "scale" to L1 or L2 level of cache depending on the value of "hint". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.
+The "conv" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the "conv" parameter specified for the subsequent gather intrinsic.</description>
+	<operation>
+FOR j := 0 to 15
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	CASE hint OF
+	_MM_HINT_T0: PrefetchL1WithT0Hint(addr[i+31:i])
+	_MM_HINT_T1: PrefetchL2WithT1Hint(addr[i+31:i])
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgatherpf0dps" form="m512 {k}" xed=""/>
+	<instruction name="vgatherpf1dps" form="m512 {k}" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_prefetch_i32extgather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address "mv" and 32-bit integer index vector "index" with scale "scale" to L1 or L2 level of cache depending on the value of "hint". Gathered elements are merged in cache using writemask "k" (elements are brought into cache only when their corresponding mask bits are set). The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.
+The "conv" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the "conv" parameter specified for the subsequent gather intrinsic.</description>
+	<operation>
+FOR j := 0 to 15
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	IF k[j] THEN
+		CASE hint OF
+		_MM_HINT_T0: PrefetchL1WithT0Hint(addr[i+31:i])
+		_MM_HINT_T1: PrefetchL2WithT1Hint(addr[i+31:i])
+		ESAC
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgatherpf0dps" form="m512 {k}" xed=""/>
+	<instruction name="vgatherpf1dps" form="m512 {k}" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32extscatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="scale" type="int"/>
+	<description>Down-converts 16 packed single-precision (32-bit) floating-point elements in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv".</description>
+	<operation>
+FOR j := 0 to 15
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	CASE conv OF
+	_MM_DOWNCONV_PS_NONE:
+		n := j*32
+		addr[i+31:i] := v1[n+31:n]
+	_MM_DOWNCONV_PS_FLOAT16:
+		i := j*16
+		addr[i+15:i] := Float32ToFloat16(v1[n+31:n])
+	_MM_DOWNCONV_PS_UINT8:
+		i := j*8
+		addr[i+7:i] := Float32ToUInt8(v1[n+31:n])
+	_MM_DOWNCONV_PS_SINT8:
+		i := j*8
+		addr[i+7:i] := Float32ToSInt8(v1[n+31:n])
+	_MM_DOWNCONV_PS_UINT16:
+		i := j*8
+		addr[i+15:i] := Float32ToUInt16(v1[n+31:n])
+	_MM_DOWNCONV_PS_SINT16:
+		i := j*8
+		addr[i+15:i] := Float32ToSInt16(v1[n+31:n])
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="vscatterdps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32extscatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts 16 packed single-precision (32-bit) floating-point elements in "v1" according to "conv" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using writemask "k" (elements are written only when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		CASE conv OF
+		_MM_DOWNCONV_PS_NONE:
+			n := j*32
+			addr[i+31:i] := v1[n+31:n]
+		_MM_DOWNCONV_PS_FLOAT16:
+			i := j*16
+			addr[i+15:i] := Float32ToFloat16(v1[n+31:n])
+		_MM_DOWNCONV_PS_UINT8:
+			i := j*8
+			addr[i+7:i] := Float32ToUInt8(v1[n+31:n])
+		_MM_DOWNCONV_PS_SINT8:
+			i := j*8
+			addr[i+7:i] := Float32ToSInt8(v1[n+31:n])
+		_MM_DOWNCONV_PS_UINT16:
+			i := j*8
+			addr[i+15:i] := Float32ToUInt16(v1[n+31:n])
+		_MM_DOWNCONV_PS_SINT16:
+			i := j*8
+			addr[i+15:i] := Float32ToSInt16(v1[n+31:n])
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vscatterdps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32loextscatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512d"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv".</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	CASE conv OF
+	_MM_DOWNCONV_PD_NONE: addr[i+63:i] := v1[i+63:i]
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="vscatterdpd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32loextscatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512d"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory.</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		i := j*64
+		CASE conv OF
+		_MM_DOWNCONV_PD_NONE: addr[i+63:i] := v1[i+63:i]
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vscatterdpd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32loextscatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts 8 packed 64-bit integer elements in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv".</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	CASE conv OF
+	_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v1[i+63:i]
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="vpscatterdq" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32loextscatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts 8 packed 64-bit integer elements in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory.</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		i := j*64
+		CASE conv OF
+		_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v1[i+63:i]
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vpscatterdq" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_prefetch_i32extscatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address "mv" and 32-bit integer index vector "index" with scale "scale" to L1 or L2 level of cache depending on the value of "hint", with a request for exclusive ownership. The "hint" parameter may be one of the following: _MM_HINT_T0 = 1 for prefetching to L1 cache, _MM_HINT_T1 = 2 for prefetching to L2 cache, _MM_HINT_T2 = 3 for prefetching to L2 cache non-temporal, _MM_HINT_NTA = 0 for prefetching to L1 cache non-temporal. The "conv" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the "conv" parameter specified for the subsequent scatter intrinsic.</description>
+	<operation>
+cachev := 0
+FOR j := 0 to 15
+	i := j*32
+	addr := MEM[mv + index[j] * scale]
+	CASE hint OF
+	_MM_HINT_T0: PrefetchL1WithT0Hint(addr[i+31:i])
+	_MM_HINT_T1: PrefetchL2WithT1Hint(addr[i+31:i])
+	_MM_HINT_T2: PrefetchL2WithT1HintNonTemporal(addr[i+31:i])
+	_MM_HINT_NTA: PrefetchL1WithT0HintNonTemporal(addr[i+31:i])
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="vscatterpf0dps" form="m512 {k}" xed=""/>
+	<instruction name="vscatterpf1dps" form="m512 {k}" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_prefetch_i32extscatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address "mv" and 32-bit integer index vector "index" with scale "scale" to L1 or L2 level of cache depending on the value of "hint". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.
+The "conv" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the "conv" parameter specified for the subsequent gather intrinsic. Only those elements whose corresponding mask bit in "k" is set are loaded into cache.</description>
+	<operation>
+cachev := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		CASE hint OF
+		_MM_HINT_T0: PrefetchL1WithT0Hint(addr[i+31:i])
+		_MM_HINT_T1: PrefetchL2WithT1Hint(addr[i+31:i])
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vscatterpf0dps" form="m512 {k}" xed=""/>
+	<instruction name="vscatterpf1dps" form="m512 {k}" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_extloadunpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:   RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_EPI32_UINT8:  RETURN UInt8ToInt32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_SINT8:  RETURN SInt8ToInt32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_UINT16: RETURN UInt16ToInt32(MEM[addr + 2*offset])
+	_MM_UPCONV_EPI32_SINT16: RETURN SInt16ToInt32(MEM[addr + 2*offset])
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:   RETURN 4
+	_MM_UPCONV_EPI32_UINT8:  RETURN 1
+	_MM_UPCONV_EPI32_SINT8:  RETURN 1
+	_MM_UPCONV_EPI32_UINT16: RETURN 2
+	_MM_UPCONV_EPI32_SINT16: RETURN 2
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*upSize % 64) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*32
+		dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_extloadunpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:   RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_EPI32_UINT8:  RETURN UInt8ToInt32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_SINT8:  RETURN SInt8ToInt32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_UINT16: RETURN UInt16ToInt32(MEM[addr + 2*offset])
+	_MM_UPCONV_EPI32_SINT16: RETURN SInt16ToInt32(MEM[addr + 2*offset])
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:   RETURN 4
+	_MM_UPCONV_EPI32_UINT8:  RETURN 1
+	_MM_UPCONV_EPI32_SINT8:  RETURN 1
+	_MM_UPCONV_EPI32_UINT16: RETURN 2
+	_MM_UPCONV_EPI32_SINT16: RETURN 2
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*upSize % 64) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*32
+			dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_extloadunpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:   RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_EPI32_UINT8:  RETURN UInt8ToInt32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_SINT8:  RETURN SInt8ToInt32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_UINT16: RETURN UInt16ToInt32(MEM[addr + 2*offset])
+	_MM_UPCONV_EPI32_SINT16: RETURN SInt16ToInt32(MEM[addr + 2*offset])
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:   RETURN 4
+	_MM_UPCONV_EPI32_UINT8:  RETURN 1
+	_MM_UPCONV_EPI32_SINT8:  RETURN 1
+	_MM_UPCONV_EPI32_UINT16: RETURN 2
+	_MM_UPCONV_EPI32_SINT16: RETURN 2
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+	loadOffset := loadOffset + 1
+	IF (mt + loadOffset * upSize) % 64 == 0
+		break
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackld" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_extloadunpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:   RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_EPI32_UINT8:  RETURN UInt8ToInt32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_SINT8:  RETURN SInt8ToInt32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_UINT16: RETURN UInt16ToInt32(MEM[addr + 2*offset])
+	_MM_UPCONV_EPI32_SINT16: RETURN SInt16ToInt32(MEM[addr + 2*offset])
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:   RETURN 4
+	_MM_UPCONV_EPI32_UINT8:  RETURN 1
+	_MM_UPCONV_EPI32_SINT8:  RETURN 1
+	_MM_UPCONV_EPI32_UINT16: RETURN 2
+	_MM_UPCONV_EPI32_SINT16: RETURN 2
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 15
+	IF k[j]
+		i := j*32
+		dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+		loadOffset := loadOffset + 1
+		IF (mt + loadOffset * upSize) % 64 == 0
+			break
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackld" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_extloadunpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:   RETURN MEM[addr + 8*offset]
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:   RETURN 8
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*upSize) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*64
+		dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_extloadunpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:   RETURN MEM[addr + 8*offset]
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:   RETURN 8
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*upSize) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*64
+			dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_extloadunpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:   RETURN MEM[addr + 8*offset]
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:   RETURN 8
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+	loadOffset := loadOffset + 1
+	IF (addr + loadOffset*upSize % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpacklq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_extloadunpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:   RETURN MEM[addr + 8*offset]
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:   RETURN 8
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 7
+	IF k[j]
+		i := j*64
+		dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+		loadOffset := loadOffset + 1
+		IF (addr + loadOffset*upSize % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpacklq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_extloadunpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:	   RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_PS_FLOAT16: RETURN Float16ToFloat32(MEM[addr + 4*offset])
+	_MM_UPCONV_PS_UINT8:   RETURN UInt8ToFloat32(MEM[addr + offset])
+	_MM_UPCONV_PS_SINT8:   RETURN SInt8ToFloat32(MEM[addr + offset])
+	_MM_UPCONV_PS_UINT16:  RETURN UInt16ToFloat32(MEM[addr + 2*offset])
+	_MM_UPCONV_PS_SINT16:  RETURN SInt16ToFloat32(MEM[addr + 2*offset])
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:	   RETURN 4
+	_MM_UPCONV_PS_FLOAT16: RETURN 2
+	_MM_UPCONV_PS_UINT8:   RETURN 1
+	_MM_UPCONV_PS_SINT8:   RETURN 1
+	_MM_UPCONV_PS_UINT16:  RETURN 2
+	_MM_UPCONV_PS_SINT16:  RETURN 2
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*upSize % 64) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*32
+		dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_extloadunpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:	   RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_PS_FLOAT16: RETURN Float16ToFloat32(MEM[addr + 4*offset])
+	_MM_UPCONV_PS_UINT8:   RETURN UInt8ToFloat32(MEM[addr + offset])
+	_MM_UPCONV_PS_SINT8:   RETURN SInt8ToFloat32(MEM[addr + offset])
+	_MM_UPCONV_PS_UINT16:  RETURN UInt16ToFloat32(MEM[addr + 2*offset])
+	_MM_UPCONV_PS_SINT16:  RETURN SInt16ToFloat32(MEM[addr + 2*offset])
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:	   RETURN 4
+	_MM_UPCONV_PS_FLOAT16: RETURN 2
+	_MM_UPCONV_PS_UINT8:   RETURN 1
+	_MM_UPCONV_PS_SINT8:   RETURN 1
+	_MM_UPCONV_PS_UINT16:  RETURN 2
+	_MM_UPCONV_PS_SINT16:  RETURN 2
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*upSize % 64) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*32
+			dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_extloadunpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:	   RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_PS_FLOAT16: RETURN Float16ToFloat32(MEM[addr + 4*offset])
+	_MM_UPCONV_PS_UINT8:   RETURN UInt8ToFloat32(MEM[addr + offset])
+	_MM_UPCONV_PS_SINT8:   RETURN SInt8ToFloat32(MEM[addr + offset])
+	_MM_UPCONV_PS_UINT16:  RETURN UInt16ToFloat32(MEM[addr + 2*offset])
+	_MM_UPCONV_PS_SINT16:  RETURN SInt16ToFloat32(MEM[addr + 2*offset])
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:	   RETURN 4
+	_MM_UPCONV_PS_FLOAT16: RETURN 2
+	_MM_UPCONV_PS_UINT8:   RETURN 1
+	_MM_UPCONV_PS_SINT8:   RETURN 1
+	_MM_UPCONV_PS_UINT16:  RETURN 2
+	_MM_UPCONV_PS_SINT16:  RETURN 2
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr = MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+	loadOffset := loadOffset + 1
+	IF (mt + loadOffset * upSize) % 64 == 0
+		break
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpacklps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_extloadunpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:    RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_PS_FLOAT16: RETURN Float16ToFloat32(MEM[addr + 4*offset])
+	_MM_UPCONV_PS_UINT8:   RETURN UInt8ToFloat32(MEM[addr + offset])
+	_MM_UPCONV_PS_SINT8:   RETURN SInt8ToFloat32(MEM[addr + offset])
+	_MM_UPCONV_PS_UINT16:  RETURN UInt16ToFloat32(MEM[addr + 2*offset])
+	_MM_UPCONV_PS_SINT16:  RETURN SInt16ToFloat32(MEM[addr + 2*offset])
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:	   RETURN 4
+	_MM_UPCONV_PS_FLOAT16: RETURN 2
+	_MM_UPCONV_PS_UINT8:   RETURN 1
+	_MM_UPCONV_PS_SINT8:   RETURN 1
+	_MM_UPCONV_PS_UINT16:  RETURN 2
+	_MM_UPCONV_PS_SINT16:  RETURN 2
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr = MEM[mt]
+FOR j := 0 to 15
+	IF k[j]
+		i := j*32
+		dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+		loadOffset := loadOffset + 1
+		IF (mt + loadOffset * upSize) % 64 == 0
+			break
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpacklps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_extloadunpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE: RETURN MEM[addr + 8*offset]
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE: RETURN 8
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*upSize) % 64 == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*64
+		dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhpd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_extloadunpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE: RETURN MEM[addr + 8*offset]
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE: RETURN 8
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*upSize) % 64 == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*64
+			dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhpd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_extloadunpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE:	RETURN MEM[addr + 8*offset]
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE:	RETURN 8
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+	loadOffset := loadOffset + 1
+	IF (mt + loadOffset * upSize) % 64 == 0
+		break
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpacklpd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_extloadunpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mt" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elemenst are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+UPCONVERT(address, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE:	RETURN MEM[addr + 8*offset]
+	ESAC
+}
+
+UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE:	RETURN 8
+	ESAC
+}
+
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 7
+	IF k[j]
+		i := j*64
+		dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+		loadOffset := loadOffset + 1
+		IF (mt + loadOffset * upSize) % 64 == 0
+			break
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpacklpd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorehi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_DOWNCONV_EPI32_NONE:   RETURN element[i+31:i]
+	_MM_DOWNCONV_EPI32_UINT8:  RETURN UInt32ToUInt8(element[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT8:  RETURN SInt32ToSInt8(element[i+31:i])
+	_MM_DOWNCONV_EPI32_UINT16: RETURN UInt32ToUInt16(element[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT16: RETURN SInt32ToSInt16(element[i+31:i])
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_DOWNCONV_EPI32_NONE:   RETURN 4
+	_MM_DOWNCONV_EPI32_UINT8:  RETURN 1
+	_MM_DOWNCONV_EPI32_SINT8:  RETURN 1
+	_MM_DOWNCONV_EPI32_UINT16: RETURN 2
+	_MM_DOWNCONV_EPI32_SINT16: RETURN 2
+	ESAC
+}
+
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+			foundNext64BytesBoundary = true
+		FI
+	ELSE
+		i := j*32
+		tmp := DOWNCONVERT(v1[i+31:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		4: MEM[storeAddr] := tmp[31:0]
+		2: MEM[storeAddr] := tmp[15:0]
+		1: MEM[storeAddr] := tmp[7:0]
+		ESAC
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorehi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresonding mask bit is not set).</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_DOWNCONV_EPI32_NONE:   RETURN element[i+31:i]
+	_MM_DOWNCONV_EPI32_UINT8:  RETURN UInt32ToUInt8(element[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT8:  RETURN SInt32ToSInt8(element[i+31:i])
+	_MM_DOWNCONV_EPI32_UINT16: RETURN UInt32ToUInt16(element[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT16: RETURN SInt32ToSInt16(element[i+31:i])
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_DOWNCONV_EPI32_NONE:   RETURN 4
+	_MM_DOWNCONV_EPI32_UINT8:  RETURN 1
+	_MM_DOWNCONV_EPI32_SINT8:  RETURN 1
+	_MM_DOWNCONV_EPI32_UINT16: RETURN 2
+	_MM_DOWNCONV_EPI32_SINT16: RETURN 2
+	ESAC
+}
+
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+				foundNext64BytesBoundary = true
+			FI
+		ELSE
+			i := j*32
+			tmp := DOWNCONVERT(v1[i+31:i], conv)
+			storeAddr := addr + storeOffset * downSize
+			CASE downSize OF
+			4: MEM[storeAddr] := tmp[31:0]
+			2: MEM[storeAddr] := tmp[15:0]
+			1: MEM[storeAddr] := tmp[7:0]
+			ESAC
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorelo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_DOWNCONV_EPI32_NONE:   RETURN element[i+31:i]
+	_MM_DOWNCONV_EPI32_UINT8:  RETURN UInt32ToUInt8(element[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT8:  RETURN SInt32ToSInt8(element[i+31:i])
+	_MM_DOWNCONV_EPI32_UINT16: RETURN UInt32ToUInt16(element[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT16: RETURN SInt32ToSInt16(element[i+31:i])
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_DOWNCONV_EPI32_NONE:   RETURN 4
+	_MM_DOWNCONV_EPI32_UINT8:  RETURN 1
+	_MM_DOWNCONV_EPI32_SINT8:  RETURN 1
+	_MM_DOWNCONV_EPI32_UINT16: RETURN 2
+	_MM_DOWNCONV_EPI32_SINT16: RETURN 2
+	ESAC
+}
+
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 15
+	i := j*32
+	tmp := DOWNCONVERT(v1[i+31:i], conv)
+	storeAddr := addr + storeOffset * downSize
+	CASE downSize OF
+	4: MEM[storeAddr] := tmp[31:0]
+	2: MEM[storeAddr] := tmp[15:0]
+	1: MEM[storeAddr] := tmp[7:0]
+	ESAC
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset * downSize) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstoreld" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorelo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are written to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_DOWNCONV_EPI32_NONE:   RETURN element[i+31:i]
+	_MM_DOWNCONV_EPI32_UINT8:  RETURN UInt32ToUInt8(element[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT8:  RETURN SInt32ToSInt8(element[i+31:i])
+	_MM_DOWNCONV_EPI32_UINT16: RETURN UInt32ToUInt16(element[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT16: RETURN SInt32ToSInt16(element[i+31:i])
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_DOWNCONV_EPI32_NONE:   RETURN 4
+	_MM_DOWNCONV_EPI32_UINT8:  RETURN 1
+	_MM_DOWNCONV_EPI32_SINT8:  RETURN 1
+	_MM_DOWNCONV_EPI32_UINT16: RETURN 2
+	_MM_DOWNCONV_EPI32_SINT16: RETURN 2
+	ESAC
+}
+
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 15
+	IF k[j]
+		i := j*32
+		tmp := DOWNCONVERT(v1[i+31:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		4: MEM[storeAddr] := tmp[31:0]
+		2: MEM[storeAddr] := tmp[15:0]
+		1: MEM[storeAddr] := tmp[7:0]
+		ESAC
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset * downSize) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstoreld" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorehi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_EPI64_NONE: RETURN element[i+63:i]
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_EPI64_NONE: RETURN 8
+	ESAC
+}
+
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+			foundNext64BytesBoundary = true
+		FI
+	ELSE
+		i := j*64
+		tmp := DOWNCONVERT(v1[i+63:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		8: MEM[storeAddr] := tmp[63:0]
+		ESAC
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehq" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorehi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (mt-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresonding mask bit is not set).</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_EPI64_NONE: RETURN element[i+63:i]
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_EPI64_NONE: RETURN 8
+	ESAC
+}
+
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+				foundNext64BytesBoundary = true
+			FI
+		ELSE
+			i := j*64
+			tmp := DOWNCONVERT(v1[i+63:i], conv)
+			storeAddr := addr + storeOffset * downSize
+			CASE downSize OF
+			8: MEM[storeAddr] := tmp[63:0]
+			ESAC
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehq" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorelo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_EPI64_NONE: RETURN element[i+63:i]
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_EPI64_NONE: RETURN 8
+	ESAC
+}
+
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 7
+	i := j*63
+	tmp := DOWNCONVERT(v1[i+63:i], conv)
+	storeAddr := addr + storeOffset * downSize
+	CASE downSize OF
+	8: MEM[storeAddr] := tmp[63:0]
+	ESAC
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset * downSize) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorelq" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorelo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped whent he corresponding mask bit is not set).</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_EPI64_NONE: RETURN element[i+63:i]
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_EPI64_NONE: RETURN 8
+	ESAC
+}
+
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 7
+	IF k[j]
+		i := j*63
+		tmp := DOWNCONVERT(v1[i+63:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		8: MEM[storeAddr] := tmp[63:0]
+		ESAC
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset * downSize) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorelq" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorehi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v1" type="__m512"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PS_NONE:	   RETURN element[i+31:i]
+	_MM_UPCONV_PS_FLOAT16: RETURN Float32ToFloat16(element[i+31:i])
+	_MM_UPCONV_PS_UINT8:   RETURN UInt32ToUInt8(element[i+31:i])
+	_MM_UPCONV_PS_SINT8:   RETURN SInt32ToSInt8(element[i+31:i])
+	_MM_UPCONV_PS_UINT16:  RETURN UInt32ToUInt16(element[i+31:i])
+	_MM_UPCONV_PS_SINT16:  RETURN SInt32ToSInt16(element[i+31:i])
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PS_NONE:	   RETURN 4
+	_MM_UPCONV_PS_FLOAT16: RETURN 2
+	_MM_UPCONV_PS_UINT8:   RETURN 1
+	_MM_UPCONV_PS_SINT8:   RETURN 1
+	_MM_UPCONV_PS_UINT16:  RETURN 2
+	_MM_UPCONV_PS_SINT16:  RETURN 2
+	ESAC
+}
+
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+			foundNext64BytesBoundary = true
+		FI
+	ELSE
+		i := j*32
+		tmp := DOWNCONVERT(v1[i+31:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		4: MEM[storeAddr] := tmp[31:0]
+		2: MEM[storeAddr] := tmp[15:0]
+		1: MEM[storeAddr] := tmp[7:0]
+		ESAC
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorehi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v1" type="__m512"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PS_NONE:	   RETURN element[i+31:i]
+	_MM_UPCONV_PS_FLOAT16: RETURN Float32ToFloat16(element[i+31:i])
+	_MM_UPCONV_PS_UINT8:   RETURN UInt32ToUInt8(element[i+31:i])
+	_MM_UPCONV_PS_SINT8:   RETURN SInt32ToSInt8(element[i+31:i])
+	_MM_UPCONV_PS_UINT16:  RETURN UInt32ToUInt16(element[i+31:i])
+	_MM_UPCONV_PS_SINT16:  RETURN SInt32ToSInt16(element[i+31:i])
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PS_NONE:    RETURN 4
+	_MM_UPCONV_PS_FLOAT16: RETURN 2
+	_MM_UPCONV_PS_UINT8:   RETURN 1
+	_MM_UPCONV_PS_SINT8:   RETURN 1
+	_MM_UPCONV_PS_UINT16:  RETURN 2
+	_MM_UPCONV_PS_SINT16:  RETURN 2
+	ESAC
+}
+
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+				foundNext64BytesBoundary = true
+			FI
+		ELSE
+			i := j*32
+			tmp := DOWNCONVERT(v1[i+31:i], conv)
+			storeAddr := addr + storeOffset * downSize
+			CASE downSize OF
+			4: MEM[storeAddr] := tmp[31:0]
+			2: MEM[storeAddr] := tmp[15:0]
+			1: MEM[storeAddr] := tmp[7:0]
+			ESAC
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorelo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v1" type="__m512"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PS_NONE:	   RETURN element[i+31:i]
+	_MM_UPCONV_PS_FLOAT16: RETURN Float32ToFloat16(element[i+31:i])
+	_MM_UPCONV_PS_UINT8:   RETURN UInt32ToUInt8(element[i+31:i])
+	_MM_UPCONV_PS_SINT8:   RETURN SInt32ToSInt8(element[i+31:i])
+	_MM_UPCONV_PS_UINT16:  RETURN UInt32ToUInt16(element[i+31:i])
+	_MM_UPCONV_PS_SINT16:  RETURN SInt32ToSInt16(element[i+31:i])
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PS_NONE:	   RETURN 4
+	_MM_UPCONV_PS_FLOAT16: RETURN 2
+	_MM_UPCONV_PS_UINT8:   RETURN 1
+	_MM_UPCONV_PS_SINT8:   RETURN 1
+	_MM_UPCONV_PS_UINT16:  RETURN 2
+	_MM_UPCONV_PS_SINT16:  RETURN 2
+	ESAC
+}
+
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 15
+	i := j*32
+	tmp := DOWNCONVERT(v1[i+31:i], conv)
+	storeAddr := addr + storeOffset * downSize
+	CASE downSize OF
+	4: MEM[storeAddr] := tmp[31:0]
+	2: MEM[storeAddr] := tmp[15:0]
+	1: MEM[storeAddr] := tmp[7:0]
+	ESAC
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset * downSize) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorelps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorelo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v1" type="__m512"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PS_NONE:	   RETURN element[i+31:i]
+	_MM_UPCONV_PS_FLOAT16: RETURN Float32ToFloat16(element[i+31:i])
+	_MM_UPCONV_PS_UINT8:   RETURN UInt32ToUInt8(element[i+31:i])
+	_MM_UPCONV_PS_SINT8:   RETURN SInt32ToSInt8(element[i+31:i])
+	_MM_UPCONV_PS_UINT16:  RETURN UInt32ToUInt16(element[i+31:i])
+	_MM_UPCONV_PS_SINT16:  RETURN SInt32ToSInt16(element[i+31:i])
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PS_NONE:    RETURN 4
+	_MM_UPCONV_PS_FLOAT16: RETURN 2
+	_MM_UPCONV_PS_UINT8:   RETURN 1
+	_MM_UPCONV_PS_SINT8:   RETURN 1
+	_MM_UPCONV_PS_UINT16:  RETURN 2
+	_MM_UPCONV_PS_SINT16:  RETURN 2
+	ESAC
+}
+
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 15
+	IF k[j]
+		i := j*32
+		tmp := DOWNCONVERT(v1[i+31:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		4: MEM[storeAddr] := tmp[31:0]
+		2: MEM[storeAddr] := tmp[15:0]
+		1: MEM[storeAddr] := tmp[7:0]
+		ESAC
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset * downSize) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorelps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorehi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v1" type="__m512d"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PD_NONE: RETURN element[i+63:i]
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PD_NONE: RETURN 8
+	ESAC
+}
+
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+			foundNext64BytesBoundary = true
+		FI
+	ELSE
+		i := j*64
+		tmp := DOWNCONVERT(v1[i+63:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		8: MEM[storeAddr] := tmp[63:0]
+		ESAC
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehpd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorehi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v1" type="__m512d"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PD_NONE: RETURN element[i+63:i]
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PD_NONE: RETURN 8
+	ESAC
+}
+
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+				foundNext64BytesBoundary = true
+			FI
+		ELSE
+			i := j*64
+			tmp := DOWNCONVERT(v1[i+63:i], conv)
+			storeAddr := addr + storeOffset * downSize
+			CASE downSize OF
+			8: MEM[storeAddr] := tmp[63:0]
+			ESAC
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehpd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorelo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="v1" type="__m512d"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PD_NONE: RETURN element[i+63:i]
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PD_NONE: RETURN 8
+	ESAC
+}
+
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 7
+	i := j*63
+	tmp := DOWNCONVERT(v1[i+63:i], conv)
+	storeAddr := addr + storeOffset * downSize
+	CASE downSize OF
+	8: MEM[storeAddr] := tmp[63:0]
+	ESAC
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset * downSize) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorelpd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorelo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v1" type="__m512d"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+DOWNCONVERT(element, convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PD_NONE: RETURN element[i+63:i]
+	ESAC
+}
+
+DOWNCONVERTSIZE(convertTo) {
+	CASE converTo OF
+	_MM_UPCONV_PD_NONE: RETURN 8
+	ESAC
+}
+
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr = mt
+FOR j := 0 to 7
+	IF k[j]
+		i := j*63
+		tmp := DOWNCONVERT(v1[i+63:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		8: MEM[storeAddr] := tmp[63:0]
+		ESAC
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset * downSize) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorelpd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_cvtpd_pslo">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="v2" type="__m512d"/>
+	<description>Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to single-precision (32-bit) floating-point elements and stores them in "dst". The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k := j*32
+	dst[k+31:k] := Float64ToFloat32(v2[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2ps" form="zmm {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_cvtpd_pslo">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v2" type="__m512d"/>
+	<description>Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to single-precision (32-bit) floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Float64ToFloat32(v2[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2ps" form="zmm {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_i32logather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>Loads 8 64-bit integer elements from memory starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" and stores them in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	addr := MEM[mv + index[j] * scale]
+	dst[i+63:i] := addr[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpgatherdq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_i32logather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>Loads 8 64-bit integer elements from memory starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		dst[i+63:i] := addr[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpgatherdq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_i32logather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>Loads 8 double-precision (64-bit) floating-point elements stored at memory locations starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" them in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	addr := MEM[mv + index[j] * scale]
+	dst[i+63:i] := addr[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgatherdpd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_i32logather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<description>Loads 8 double-precision (64-bit) floating-point elements from memory starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		dst[i+63:i] := addr[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgatherdpd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_prefetch_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const*"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetches 16 single-precision (32-bit) floating-point elements in memory starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+cachev := 0
+FOR j := 0 to 15
+	i := j*32
+	addr := MEM[mv + index[j] * scale]
+	cachev[i+31:i] := addr[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="vgatherpf0dps" form="m512 {k}" xed=""/>
+	<instruction name="vgatherpf1dps" form="m512 {k}" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32loscatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void*"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512d"/>
+	<parameter varname="scale" type="int"/>
+	<description>Stores 8 packed double-precision (64-bit) floating-point elements in "v1" and to memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale".</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	addr[i+63:i] := v1[k+63:j]
+ENDFOR
+	</operation>
+	<instruction name="vscatterdpd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32loscatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512d"/>
+	<parameter varname="scale" type="int"/>
+	<description>Stores 8 packed double-precision (64-bit) floating-point elements in "v1" to memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory.</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		i := j*64
+		addr[i+63:i] := v1[k+63:j]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vscatterdpd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_i32loscatter_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void*"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<description>Stores 8 packed 64-bit integer elements located in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale".</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	addr[i+63:i] := v1[k+63:j]
+ENDFOR
+	</operation>
+	<instruction name="vpscatterdq" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i32loscatter_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<description>Stores 8 packed 64-bit integer elements located in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using writemask "k" (elements whose corresponding mask bit is not set are not written to memory).</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		addr[i+63:i] := v1[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vpscatterdq" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_prefetch_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void*"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetches 16 single-precision (32-bit) floating-point elements in memory starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	addr := MEM[mv + index[j] * scale]
+	CASE hint OF
+	_MM_HINT_T0: PrefetchL1WithT0Hint(addr[i+31:i])
+	_MM_HINT_T1: PrefetchL2WithT1Hint(addr[i+31:i])
+	_MM_HINT_T2: PrefetchL2WithT1HintNonTemporal(addr[i+31:i])
+	_MM_HINT_NTA: PrefetchL1WithT0HintNonTemporal(addr[i+31:i])
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="vscatterpf0dps" form="m512 {k}" xed=""/>
+	<instruction name="vscatterpf1dps" form="m512 {k}" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_prefetch_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Prefetches 16 single-precision (32-bit) floating-point elements in memory starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. Only those elements whose corresponding mask bit in "k" is set are loaded into the desired cache.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		CASE hint OF
+		_MM_HINT_T0: PrefetchL1WithT0Hint(addr[i+31:i])
+		_MM_HINT_T1: PrefetchL2WithT1Hint(addr[i+31:i])
+		_MM_HINT_T2: PrefetchL2WithT1HintNonTemporal(addr[i+31:i])
+		_MM_HINT_NTA: PrefetchL1WithT0HintNonTemporal(addr[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vscatterpf0dps" form="m512 {k}" xed=""/>
+	<instruction name="vscatterpf1dps" form="m512 {k}" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_loadunpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64 and expands them into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src".</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*4 % 64) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*32
+		tmp := MEM[addr + loadOffset*4]
+		dst[i+31:i] := tmp[i+31:i]
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_loadunpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mt" type="void const *"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64 and expands them into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*4 % 64) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*32
+			tmp := MEM[addr + loadOffset*4]
+			dst[i+31:i] := tmp[i+31:i]
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_loadunpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src".</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr = mt
+FOR j := 0 to 15
+	i := j*32
+	tmp := MEM[addr + loadOffset*4]
+	dst[i+31:i] := tmp[i+31:i]
+	loadOffset := loadOffset + 1
+	IF (mt + loadOffset * 4) % 64 == 0
+		break
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackld" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_loadunpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt and expands them into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr = mt
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp := MEM[addr + loadOffset*4]
+		dst[i+31:i] := tmp[i+31:i]
+		loadOffset := loadOffset + 1
+		IF (mt + loadOffset * 4) % 64 == 0
+			break
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackld" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_loadunpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64 and expands them into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*8) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*64
+		tmp := MEM[addr + loadOffset*8]
+		dst[i+63:i] := tmp[i+63:i]
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_loadunpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64 and expands them into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*8) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*64
+			tmp := MEM[addr + loadOffset*8]
+			dst[i+63:i] := tmp[i+63:i]
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_loadunpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt and expands them into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr = mt
+FOR j := 0 to 7
+	i := j*64
+	tmp := MEM[addr + loadOffset*8]
+	dst[i+63:i] := tmp[i+63:i]
+	loadOffset := loadOffset + 1
+	IF (addr + loadOffset*8 % 64) == 0
+		break
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpacklq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_loadunpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt and expands them into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr = mt
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp := MEM[addr + loadOffset*8]
+		dst[i+63:i] := tmp[i+63:i]
+		loadOffset := loadOffset + 1
+		IF (addr + loadOffset*8 % 64) == 0
+			break
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpacklq" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_loadunpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64 and expands them into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*4 % 64) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*32
+		tmp := MEM[addr + loadOffset*4]
+		dst[i+31:i] := tmp[i+31:i]
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_loadunpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the high-64-byte-aligned portion of the doubleword stream starting at element-aligned address mt-64 and expands them into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*4 % 64) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*32
+			tmp := MEM[addr + loadOffset*4]
+			dst[i+31:i] := tmp[i+31:i]
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_loadunpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the low-64-byte-aligned portion of the doubleword stream starting at element-aligned address mt and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src".</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr = mt
+FOR j := 0 to 15
+	i := j*32
+	tmp := MEM[addr + loadOffset*4]
+	dst[i+31:i] := tmp[i+31:i]
+	loadOffset := loadOffset + 1
+	IF (mt + loadOffset * 4) % 64 == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpacklps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_loadunpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the low-64-byte-aligned portion of the doubleword stream starting at element-aligned address mt and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr = mt
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp := MEM[addr + loadOffset*4]
+		dst[i+31:i] := tmp[i+31:i]
+		loadOffset := loadOffset + 1
+		IF (mt + loadOffset * 4) % 64 == 0
+			break
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpacklps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_loadunpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64 and expands them into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*8) % 64 == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*64
+		tmp := MEM[addr + loadOffset*8]
+		dst[i+63:i] := tmp[i+63:i]
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhpd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_loadunpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64 and expands them into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*8) % 64 == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*64
+			tmp := MEM[addr + loadOffset*8]
+			dst[i+63:i] := tmp[i+63:i]
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpackhpd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_loadunpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt and expands them into packed double-precision (64-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr = mt
+FOR j := 0 to 7
+	i := j*64
+	tmp := MEM[addr + loadOffset*8]
+	dst[i+63:i] := tmp[i+63:i]
+	loadOffset := loadOffset + 1
+	IF ((addr + 8*loadOffset) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpacklpd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_loadunpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mt" type="void const*"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt and expands them into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr = mt
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp := MEM[addr + loadOffset*8]
+		dst[i+63:i] := tmp[i+63:i]
+		loadOffset := loadOffset + 1
+		IF ((addr + 8*loadOffset) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vloadunpacklpd" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_packstorehi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="v1" type="__m512i"/>
+	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF ((addr + (storeOffset + 1)*4) % 64) == 0
+			foundNext64BytesBoundary = true
+		FI
+	ELSE
+		i := j*32
+		MEM[addr + storeOffset*4] := v1[i+31:i]
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorehi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v1" type="__m512i"/>
+	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF ((addr + (storeOffset + 1)*4) % 64) == 0
+				foundNext64BytesBoundary = true
+			FI
+		ELSE
+			i := j*32
+			MEM[addr + storeOffset*4] := v1[i+31:i]
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_packstorelo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="v1" type="__m512i"/>
+	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
+	<operation>
+storeOffset := 0
+addr = mt
+FOR j := 0 to 15
+	i := j*32
+	MEM[addr + storeOffset*4] := v1[i+31:i]
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset*4) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstoreld" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorelo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v1" type="__m512i"/>
+	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+addr = mt
+FOR j := 0 to 15
+	IF k[j]
+		i := j*32
+		MEM[addr + storeOffset*4] := v1[i+31:i]
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset*4) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstoreld" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_packstorehi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="v1" type="__m512i"/>
+	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF ((addr + (storeOffset + 1)*8) % 64) == 0
+			foundNext64BytesBoundary = true
+		FI
+	ELSE
+		i := j*64
+		MEM[addr + storeOffset*8] := v1[i+63:i]
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehq" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorehi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v1" type="__m512i"/>
+	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF ((addr + (storeOffset + 1)*8) % 64) == 0
+				foundNext64BytesBoundary = true
+			FI
+		ELSE
+			i := j*64
+			MEM[addr + storeOffset*8] := v1[i+63:i]
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehq" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_packstorelo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="v1" type="__m512i"/>
+	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
+	<operation>
+storeOffset := 0
+addr = mt
+FOR j := 0 to 7
+	i := j*64
+	MEM[addr + storeOffset*8] := v1[i+63:i]
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset*8) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorelq" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorelo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v1" type="__m512i"/>
+	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+addr = mt
+FOR j := 0 to 7
+	IF k[j]
+		i := j*64
+		MEM[addr + storeOffset*8] := v1[i+63:i]
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset*8) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorelq" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_packstorehi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="v1" type="__m512"/>
+	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF ((addr + (storeOffset + 1)*4) % 64) == 0
+			foundNext64BytesBoundary = true
+		FI
+	ELSE
+		i := j*32
+		MEM[addr + storeOffset*4] := v1[i+31:i]
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorehi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v1" type="__m512"/>
+	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF ((addr + (storeOffset + 1)*4) % 64) == 0
+				foundNext64BytesBoundary = true
+			FI
+		ELSE
+			i := j*32
+			MEM[addr + storeOffset*4] := v1[i+31:i]
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_packstorelo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="v1" type="__m512"/>
+	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
+	<operation>
+storeOffset := 0
+addr = mt
+FOR j := 0 to 15
+	i := j*32
+	MEM[addr + storeOffset*4] := v1[i+31:i]
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset*4) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorelps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorelo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v1" type="__m512"/>
+	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+addr = mt
+FOR j := 0 to 15
+	IF k[j]
+		i := j*32
+		MEM[addr + storeOffset*4] := v1[i+31:i]
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset*4) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorelps" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_packstorehi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="v1" type="__m512d"/>
+	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF ((addr + (storeOffset + 1)*8) % 64) == 0
+			foundNext64BytesBoundary = true
+		FI
+	ELSE
+		i := j*64
+		MEM[addr + storeOffset*4] := v1[i+63:i]
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehpd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorehi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v1" type="__m512d"/>
+	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := false
+addr = mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF ((addr + (storeOffset + 1)*8) % 64) == 0
+				foundNext64BytesBoundary = true
+			FI
+		ELSE
+			i := j*64
+			MEM[addr + storeOffset*4] := v1[i+63:i]
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorehpd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_packstorelo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="v1" type="__m512d"/>
+	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
+	<operation>
+storeOffset := 0
+addr = mt
+FOR j := 0 to 7
+	i := j*64
+	MEM[addr + storeOffset*8] := v1[i+63:i]
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset*8) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorelpd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorelo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mt" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v1" type="__m512d"/>
+	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+addr = mt
+FOR j := 0 to 7
+	IF k[j]
+		i := j*64
+		MEM[addr + storeOffset*8] := v1[i+63:i]
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset*8) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackstorelpd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="unsigned int" name="_mm_countbits_32">
+	<CPUID>KNCNI</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="r1" type="unsigned int"/>
+	<description>Counts the number of set bits in 32-bit unsigned integer "r1", returning the results in "dst".</description>
+	<operation>
+dst[31:0] := PopCount(r1[31:0])
+	</operation>
+	<instruction name="popcnt" form="r32, r32" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="unsigned __int64" name="_mm_countbits_64">
+	<CPUID>KNCNI</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="r1" type="unsigned __int64"/>
+	<description>Counts the number of set bits in 64-bit unsigned integer "r1", returning the results in "dst".</description>
+	<operation>
+dst[63:0] := PopCount(r1[63:0])
+	</operation>
+	<instruction name="popcnt" form="r64, r64" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kmovlhb">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<description>Inserts the low byte of mask "k2" into the high byte of "dst", and copies the low byte of "k1" to the low byte of "dst".</description>
+	<operation>
+dst[7:0] := k1[7:0]
+dst[15:8] := k2[7:0]
+	</operation>
+	<instruction name="kmerge2l1l" form="k, k" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_cvtfxpnt_roundpd_epi32lo">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs an element-by-element conversion of elements in packed double-precision (64-bit) floating-point vector "v2" to 32-bit integer elements, storing them in the lower half of "dst". The elements in the upper half of "dst" are set to 0.
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k := j*32
+	dst[k+31:k] := Float64ToInt32(v2[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtfxpntpd2dq" form="zmm {k}, m512, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_cvtfxpnt_roundpd_epi32lo">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v2" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Performs an element-by-element conversion of elements in packed double-precision (64-bit) floating-point vector "v2" to 32-bit integer elements, storing them in the lower half of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The elements in the upper half of "dst" are set to 0.
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Float64ToInt32(v2[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtfxpntpd2dq" form="zmm {k}, m512, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_cvtfxpnt_round_adjustepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="v2" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
+	<description>Performs element-by-element conversion of packed 32-bit integer elements in "v2" to packed single-precision (32-bit) floating-point elements and performing an optional exponent adjust using "expadj", storing the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := Int32ToFloat32(v2[i+31:i])
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
+	_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
+	_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
+	_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
+	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
+	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
+	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
+	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtfxpntdq2ps" form="zmm {k}, m512, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_abs_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512"/>
+	<description>Finds the absolute value of each packed single-precision (32-bit) floating-point element in "v2", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ABS(v2[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpandd" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+
+<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_abs_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="v2" type="__m512"/>
+	<description>Finds the absolute value of each packed single-precision (32-bit) floating-point element in "v2", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(v2[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpandd" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_abs_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="v2" type="__m512d"/>
+	<description>Finds the absolute value of each packed double-precision (64-bit) floating-point element in "v2", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ABS(v2[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpandq" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+
+<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_abs_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="v2" type="__m512d"/>
+	<description>Finds the absolute value of each packed double-precision (64-bit) floating-point element in "v2", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(v2[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpandq" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_log2ae23_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a" with absolute error of 2^(-23) and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := Log2ae23(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vlog2ps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_log2ae23_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a" with absolute error of 2^(-23) and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Log2ae23(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vlog2ps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_fmadd_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="c" type="__m512i"/>
+	<description>Multiply packed 32-bit integer elements in "a" and "b", add the intermediate result to packed elements in "c" and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmadd231d" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_fmadd_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="c" type="__m512i"/>
+	<description>Multiply packed 32-bit integer elements in "a" and "b", add the intermediate result to packed elements in "c" and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmadd231d" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask3_fmadd_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="c" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<description>Multiply packed 32-bit integer elements in "a" and "b", add the intermediate result to packed elements in "c" and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmadd231d" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_fmadd233_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply packed 32-bit integer elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	base := (j &amp; ~0x3) * 32
+	scale[31:0] := b[base+63:base+32]
+	bias[31:0]  := b[base+31:base]
+	dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmadd233d" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_fmadd233_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply packed 32-bit integer elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		base := (j &amp; ~0x3) * 32
+		scale[31:0] := b[base+63:base+32]
+		bias[31:0]  := b[base+31:base]
+		dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmadd233d" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_fmadd233_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	base := (j &amp; ~0x3) * 32
+	scale[31:0] := b[base+63:base+32]
+	bias[31:0]  := b[base+31:base]
+	dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vfmadd233ps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_fmadd233_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		base := (j &amp; ~0x3) * 32
+		scale[31:0] := b[base+63:base+32]
+		bias[31:0]  := b[base+31:base]
+		dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vfmadd233ps" form="zmm {k}, zmm, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_maxabs_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Determines the maximum of the absolute elements of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FpMax(Abs(a[i+31:i]), Abs(b[i+31:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgmaxabsps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_maxabs_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Determines the maximum of the absolute elements of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FpMax(Abs(a[i+31:i]), Abs(b[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgmaxabsps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_gmax_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Determines the maximum of each pair of corresponding elements in packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FpMax(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgmaxps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_gmax_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Determines the maximum of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FpMax(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgmaxps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_gmaxabs_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Determines the maximum of the absolute elements of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FpMax(Abs(a[i+31:i]), Abs(b[i+31:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgmaxabsps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_gmaxabs_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Determines the maximum of the absolute elements of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FpMax(Abs(a[i+31:i]), Abs(b[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgmaxabsps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_gmax_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Determines the maximum of each pair of corresponding elements in packed double-precision (64-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := FpMax(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgmaxpd" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_gmax_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Determines the maximum of each pair of corresponding elements of packed double-precision (64-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FpMax(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgmaxpd" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_gmin_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Determines the minimum of each pair of corresponding elements in packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FpMin(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgminps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_gmin_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Determines the maximum of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FpMin(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgminps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_gmin_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Determines the minimum of each pair of corresponding elements in packed double-precision (64-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := FpMin(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgminpd" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_gmin_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Determines the maximum of each pair of corresponding elements of packed double-precision (64-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FpMin(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vgminpd" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mulhi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Performs element-by-element multiplication between packed 32-bit integer elements in "a" and "b" and stores the high 32 bits of each result into "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) &gt;&gt; 32
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulhd" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_mulhi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Performs element-by-element multiplication between packed 32-bit integer elements in "a" and "b" and stores the high 32 bits of each result into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) &gt;&gt; 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulhd" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mulhi_epu32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Performs element-by-element multiplication between packed unsigned 32-bit integer elements in "a" and "b" and stores the high 32 bits of each result into "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) &gt;&gt; 32
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulhud" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_mulhi_epu32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Performs element-by-element multiplication between packed unsigned 32-bit integer elements in "a" and "b" and stores the high 32 bits of each result into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) &gt;&gt; 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulhud" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_permute4f128_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
+	<description>Permutes 128-bit blocks of the packed 32-bit integer vector "a" using constant "imm8". The results are stored in "dst".</description>
+	<operation>
+SELECT4(src, control) {
+	CASE control[1:0] OF
+	0: tmp[127:0] := src[127:0]
+	1: tmp[127:0] := src[255:128]
+	2: tmp[127:0] := src[383:256]
+	3: tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+FOR j := 0 to 3
+	i := j*128
+	n := j*2
+	dst[i+127:i] := SELECT4(a[511:0], imm8[n+1:n])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermf32x4" form="zmm {k}, m512, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_permute4f128_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
+	<description>Permutes 128-bit blocks of the packed 32-bit integer vector "a" using constant "imm8". The results are stored in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control) {
+	CASE control[1:0] OF
+	0: tmp[127:0] := src[127:0]
+	1: tmp[127:0] := src[255:128]
+	2: tmp[127:0] := src[383:256]
+	3: tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp[511:0] := 0
+FOR j := 0 to 4
+	i := j*128
+	n := j*2
+	tmp[i+127:i] := SELECT4(a[511:0], imm8[n+1:n])
+ENDFOR
+FOR j := 0 to 15
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermf32x4" form="zmm {k}, m512, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_rcp23_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Approximates the reciprocals of packed single-precision (32-bit) floating-point elements in "a" to 23 bits of precision, storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrcp23ps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_rcp23_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Approximates the reciprocals of packed single-precision (32-bit) floating-point elements in "a" to 23 bits of precision, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrcp23ps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value using "expadj" and in the direction of "rounding", and store the results as packed single-precision floating-point elements in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ROUND(a[i+31:i])
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
+	_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
+	_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
+	_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
+	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
+	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
+	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
+	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vroundps" form="zmm {k}, m512, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value using "expadj" and in the direction of "rounding", and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ROUND(a[i+31:i])
+		CASE expadj OF
+		_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
+		_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
+		_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
+		_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
+		_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
+		_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
+		_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
+		_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vroundps" form="zmm {k}, m512, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_roundfxpnt_adjust_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
+	<description>Performs element-by-element rounding of packed single-precision (32-bit) floating-point elements in "a" using "expadj" and in the direction of "rounding" and stores results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ROUND(a[i+31:i])
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
+	_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
+	_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
+	_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
+	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
+	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
+	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
+	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrndfxpntps" form="zmm {k}, m512, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_roundfxpnt_adjust_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="rounding" type="int"/>
+	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
+	<description>Performs element-by-element rounding of packed single-precision (32-bit) floating-point elements in "a" using "expadj" and in the direction of "rounding" and stores results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ROUND(a[i+31:i])
+		CASE expadj OF
+		_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
+		_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
+		_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
+		_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
+		_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
+		_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
+		_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
+		_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrndfxpntps" form="zmm {k}, m512, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_roundfxpnt_adjust_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
+	<description>Performs element-by-element rounding of packed double-precision (64-bit) floating-point elements in "a" using "expadj" and in the direction of "rounding" and stores results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ROUND(a[i+63:i])
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
+	_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
+	_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
+	_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
+	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
+	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
+	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
+	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrndfxpntpd" form="zmm {k}, m512, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_roundfxpnt_adjust_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
+	<description>Performs element-by-element rounding of packed double-precision (64-bit) floating-point elements in "a" using "expadj" and in the direction of "rounding" and stores results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ROUND(a[i+63:i])
+		CASE expadj OF
+		_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
+		_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
+		_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
+		_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
+		_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
+		_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
+		_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
+		_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrndfxpntpd" form="zmm {k}, m512, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_rsqrt23_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Calculates the reciprocal square root of packed single-precision (32-bit) floating-point elements in "a" to 23 bits of accuracy and stores the result in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := Sqrt(1.0 / a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrsqrt23ps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_rsqrt23_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Calculates the reciprocal square root of packed single-precision (32-bit) floating-point elements in "a" to 23 bits of accuracy and stores the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Sqrt(1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrsqrt23ps" form="zmm {k}, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_scale_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Scales each single-precision (32-bit) floating-point element in "a" by multiplying it by 2**exponent, where the exponent is the corresponding 32-bit integer element in "b", storing results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] * Pow(2, b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vscaleps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_scale_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Scales each single-precision (32-bit) floating-point element in "a" by multiplying it by 2**exponent, where the exponenet is the corresponding 32-bit integer element in "b", storing results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * Pow(2, b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vscaleps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_scale_round_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scales each single-precision (32-bit) floating-point element in "a" by multiplying it by 2**exponent, where the exponenet is the corresponding 32-bit integer element in "b", storing results in "dst". Intermediate elements are rounded using "rounding".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] * Pow(2, b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vscaleps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_scale_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Scales each single-precision (32-bit) floating-point element in "a" by multiplying it by 2**exp, where the exp is the corresponding 32-bit integer element in "b", storing results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Results are rounded using constant "rounding".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * Pow(2, b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vscaleps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_acos_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ACOS(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_acos_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ACOS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_acos_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ACOS(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_acos_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ACOS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_acosh_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ACOSH(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_acosh_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ACOSH(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_acosh_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ACOSH(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_acosh_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ACOSH(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_asin_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ASIN(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_asin_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ASIN(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_asin_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ASIN(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_asin_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ASIN(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_asinh_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ASINH(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_asinh_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ASINH(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_asinh_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ASINH(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_asinh_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ASINH(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_atan2_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ATAN(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_atan2_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ATAN(a[i+63:i] / b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_atan2_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ATAN(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_atan2_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ATAN(a[i+31:i] / b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_atan_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ATAN(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_atan_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ATAN(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_atan_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ATAN(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_atan_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ATAN(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_atanh_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ATANH(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_atanh_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ATANH(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_atanh_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse hyperblic tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ATANH(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_atanh_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ATANH(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_cbrt_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := CubeRoot(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_cbrt_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := CubeRoot(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_cbrt_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := CubeRoot(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_cbrt_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := CubeRoot(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_cdfnorm_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := CDFNormal(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_cdfnorm_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := CDFNormal(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_cdfnorm_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := CDFNormal(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_cdfnorm_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := CDFNormal(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_cdfnorminv_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := InverseCDFNormal(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_cdfnorminv_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := InverseCDFNormal(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_cdfnorminv_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := InverseCDFNormal(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_cdfnorminv_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := InverseCDFNormal(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_ceil_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := CEIL(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_ceil_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := CEIL(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_ceil_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := CEIL(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_ceil_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := CEIL(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_cos_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := COS(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_cos_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := COS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_cos_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := COS(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_cos_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := COS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_cosd_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := COSD(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_cosd_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := COSD(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_cosd_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := COSD(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_cosd_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := COSD(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_cosh_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := COSH(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_cosh_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := COSH(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_cosh_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := COSH(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_cosh_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := COSH(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_erf_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ERF(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_erf_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ERF(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_erfc_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := 1.0 - ERF(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_erfc_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 1.0 - ERF(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_erf_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ERF(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_erf_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ERF(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_erfc_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := 1.0 - ERF(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_erfc_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := 1.0 - ERF(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_erfinv_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := 1.0 / ERF(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_erfinv_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 1.0 / ERF(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_erfinv_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := 1.0 / ERF(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_erfinv_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := 1.0 / ERF(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_erfcinv_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_erfcinv_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_erfcinv_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := 1.0 / (1.0 - ERF(a[i+31:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_erfcinv_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := 1.0 / (1.0 - ERF(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_exp10_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := 10^(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_exp10_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 10^(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_exp10_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := 10^(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_exp10_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := 10^(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_exp2_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := 2^(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_exp2_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 2^(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_exp2_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := 2^(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_exp2_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := 2^(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_exp_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := e^(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_exp_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := e^(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_exp_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := e^(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_exp_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := e^(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_expm1_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := e^(a[i+63:i]) - 1.0
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_expm1_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := e^(a[i+63:i]) - 1.0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_expm1_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := e^(a[i+31:i]) - 1.0
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_expm1_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := e^(a[i+31:i]) - 1.0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_floor_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := FLOOR(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_floor_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FLOOR(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_floor_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FLOOR(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_floor_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FLOOR(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_hypot_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i]^2 + b[i+63:i]^2)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_hypot_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i]^2 + b[i+63:i]^2)
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_hypot_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i]^2 + b[i+31:i]^2)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_hypot_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i]^2 + b[i+31:i]^2)
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epi32" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_mask_div_epi32" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epi8" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := 8*j
+	dst[i+7:i] := TRUNCATE(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epi16" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	dst[i+15:i] := TRUNCATE(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epi64" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	dst[i+63:i] := TRUNCATE(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_invsqrt_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := InvSQRT(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_invsqrt_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := InvSQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_invsqrt_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := InvSQRT(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_invsqrt_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := InvSQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epi32" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_mask_rem_epi32" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epi8" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := 8*j
+	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epi16" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epi64" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_log10_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := log10(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_log10_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := log10(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_log10_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := log10(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_log10_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := log10(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_log1p_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ln(1.0 + a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_log1p_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ln(1.0 + a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_log1p_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ln(1.0 + a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_log1p_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ln(1.0 + a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_log2_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := log2(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_log2_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := log2(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_log2_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := log2(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+	<instruction name="vlog2ps" form="zmm {k}, zmm" xed=""/>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_log2_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := log2(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vlog2ps" form="zmm {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_log_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ln(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_log_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ln(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_log_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ln(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_log_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ln(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_logb_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_logb_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_logb_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_logb_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_nearbyint_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Rounds each packed double-precision (64-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := NearbyInt(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_nearbyint_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Rounds each packed double-precision (64-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := NearbyInt(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_nearbyint_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Rounds each packed single-precision (32-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := NearbyInt(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_nearbyint_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Rounds each packed single-precision (32-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := NearbyInt(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_pow_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (a[i+63:i])^(b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_pow_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i])^(b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_pow_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i])^(b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_pow_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i])^(b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_recip_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Computes the reciprocal of packed double-precision (64-bit) floating-point elements in "a", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (1 / a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_recip_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Computes the reciprocal of packed double-precision (64-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1 / a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_recip_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Computes the reciprocal of packed single-precision (32-bit) floating-point elements in "a", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (1 / a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_recip_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Computes the reciprocal of packed single-precision (32-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_rint_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Rounds the packed double-precision (64-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RoundToNearestEven(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_rint_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Rounds the packed double-precision (64-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundToNearestEven(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_rint_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Rounds the packed single-precision (32-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RoundToNearestEven(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_rint_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Rounds the packed single-precision (32-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundToNearestEven(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_svml_round_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ROUND(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_svml_round_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ROUND(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i] 
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_sin_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SIN(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_sin_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SIN(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_sin_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SIN(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_sin_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SIN(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_sinh_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SINH(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_sinh_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SINH(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_sinh_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SINH(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_sinh_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SINH(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_sind_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SIND(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_sind_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SIND(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_sind_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SIND(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_sind_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SIND(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_tan_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := TAN(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_tan_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := TAN(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_tan_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := TAN(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_tan_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := TAN(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_tand_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := TAND(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_tand_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := TAND(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_tand_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := TAND(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_tand_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := TAND(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_tanh_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := TANH(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_tanh_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := TANH(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_tanh_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := TANH(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_tanh_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := TANH(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_trunc_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := TRUNCATE(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_trunc_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := TRUNCATE(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_trunc_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := TRUNCATE(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_trunc_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := TRUNCATE(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epu32" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_mask_div_epu32" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epu8" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := 8*j
+	dst[i+7:i] := TRUNCATE(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epu16" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	dst[i+15:i] := TRUNCATE(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epu64" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	dst[i+63:i] := TRUNCATE(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epu32" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_mask_rem_epu32" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epu8" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := 8*j
+	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epu16" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epu64" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="float" name="_mm512_reduce_gmin_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Determines the minimum element of the packed single-precision (32-bit) floating-point elements stored in "a" and stores the result in "dst".</description>
+	<operation>
+min = a[31:0]
+FOR j := 1 to 15
+	i := j*32
+	dst = FpMin(min, a[i+31:i])
+ENDFOR
+dst := min
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="float" name="_mm512_mask_reduce_gmin_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Determines the minimum element of the packed single-precision (32-bit) floating-point elements stored in "a" and stores the result in "dst" using writemask "k" (elements are ignored when the corresponding mask bit is not set).</description>
+	<operation>
+min = a[31:0]
+FOR j := 1 to 15
+	i := j*32
+	IF k[j]
+		CONTINUE
+	ELSE
+		dst = FpMin(min, a[i+31:i])
+	FI
+ENDFOR
+dst := min
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="double" name="_mm512_reduce_gmin_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Determines the minimum element of the packed double-precision (64-bit) floating-point elements stored in "a" and stores the result in "dst".</description>
+	<operation>
+min = a[63:0]
+FOR j := 1 to 7
+	i := j*64
+	dst = FpMin(min, a[i+63:i])
+ENDFOR
+dst := min
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="double" name="_mm512_mask_reduce_gmin_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Determines the minimum element of the packed double-precision (64-bit) floating-point elements stored in "a" and stores the result in "dst". Bitmask "k" is used to exclude certain elements (elements are ignored when the corresponding mask bit is not set).</description>
+	<operation>
+min = a[63:0]
+FOR j := 1 to 7
+	i := j*64
+	IF k[j]
+		CONTINUE
+	ELSE
+		dst = FpMin(min, a[i+63:i])
+	FI
+ENDFOR
+dst := min
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="float" name="_mm512_reduce_gmax_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512"/>
+	<description>Determines the maximum element of the packed single-precision (32-bit) floating-point elements stored in "a" and stores the result in "dst".</description>
+	<operation>
+max = a[31:0]
+FOR j := 1 to 15
+	i := j*32
+	dst = FpMax(max, a[i+31:i])
+ENDFOR
+dst := max
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="float" name="_mm512_mask_reduce_gmax_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Determines the maximum element of the packed single-precision (32-bit) floating-point elements stored in "a" and stores the result in "dst". Bitmask "k" is used to exclude certain elements (elements are ignored when the corresponding mask bit is not set).</description>
+	<operation>
+max = a[31:0]
+FOR j := 1 to 15
+	i := j*32
+	IF k[j]
+		CONTINUE
+	ELSE
+		dst = FpMax(max, a[i+31:i])
+	FI
+ENDFOR
+dst := max
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="double" name="_mm512_reduce_gmax_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Determines the maximum element of the packed double-precision (64-bit) floating-point elements stored in "a" and stores the result in "dst".</description>
+	<operation>
+max = a[63:0]
+FOR j := 1 to 7
+	i := j*64
+	dst = FpMax(max, a[i+63:i])
+ENDFOR
+dst := max
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="double" name="_mm512_mask_reduce_gmax_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Determines the maximum element of the packed double-precision (64-bit) floating-point elements stored in "a" and stores the result in "dst". Bitmask "k" is used to exclude certain elements (elements are ignored when the corresponding mask bit is not set).</description>
+	<operation>
+max = a[63:0]
+FOR j := 1 to 7
+	i := j*64
+	IF k[j]
+		CONTINUE
+	ELSE
+		dst = FpMax(max, a[i+63:i])
+	FI
+ENDFOR
+dst := max
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="int" name="_mm_tzcnti_32">
+	<CPUID>KNCNI</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="a" type="int"/>
+	<parameter varname="x" type="unsigned int"/>
+	<description>Counts the number of trailing bits in unsigned 32-bit integer "x" starting at bit "a" storing the result in "dst".</description>
+	<operation>
+count := 0
+FOR j := a to 31
+	IF NOT(x[j]  1)
+		count := count + 1
+	FI
+ENDFOR
+dst := count
+	</operation>
+	<instruction name="tzcnti" form="r32, r32" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__int64" name="_mm_tzcnti_64">
+	<CPUID>KNCNI</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="a" type="__int64"/>
+	<parameter varname="x" type="unsigned __int64"/>
+	<description>Counts the number of trailing bits in unsigned 64-bit integer "x" starting at bit "a" storing the result in "dst".</description>
+	<operation>
+count := 0
+FOR j := a to 63
+	IF NOT(x[j]  1)
+		count := count + 1
+	FI
+ENDFOR
+dst := count
+	</operation>
+	<instruction name="tzcnti" form="r64, r64" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm_delay_32">
+	<CPUID>KNCNI</CPUID>
+	<category>General Support</category>
+	<parameter varname="r1" type="unsigned int"/>
+	<description>Stalls a thread without blocking other threads for 32-bit unsigned integer "r1" clock cycles.</description>
+	<operation>
+BlockThread(r1)
+	</operation>
+	<instruction name="delay" form="r32" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm_delay_64">
+	<CPUID>KNCNI</CPUID>
+	<category>General Support</category>
+	<parameter varname="r1" type="unsigned __int64"/>
+	<description>Stalls a thread without blocking other threads for 64-bit unsigned integer "r1" clock cycles.</description>
+	<operation>
+BlockThread(r1)
+	</operation>
+	<instruction name="delay" form="r64" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm_spflt_32">
+	<CPUID>KNCNI</CPUID>
+	<category>General Support</category>
+	<parameter varname="r1" type="unsigned int"/>
+	<description>Set performance monitoring filtering mask to 32-bit unsigned integer "r1".</description>
+	<operation>
+SetPerfMonMask(r1[31:0])
+	</operation>
+	<instruction name="spflt" form="r" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm_spflt_64">
+	<CPUID>KNCNI</CPUID>
+	<category>General Support</category>
+	<parameter varname="r1" type="unsigned __int64"/>
+	<description>Set performance monitoring filtering mask to 64-bit unsigned integer "r1".</description>
+	<operation>
+SetPerfMonMask(r1[63:0])
+	</operation>
+	<instruction name="spflt" form="r" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm_clevict">
+	<CPUID>KNCNI</CPUID>
+	<category>General Support</category>
+	<parameter varname="ptr" type="const void *"/>
+	<parameter varname="level" type="int"/>
+	<description>Evicts the cache line containing the address "ptr" from cache level "level" (can be either 0 or 1).</description>
+	<operation>
+CacheLineEvict(ptr, level)
+	</operation>
+	<instruction name="clevict0" form="m" xed=""/>
+	<instruction name="clevict1" form="m" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kandnr">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<description>Performs a bitwise AND operation between NOT of "k2" and "k1", storing the result in "dst".</description>
+	<operation>
+dst[15:0] := NOT(k2[15:0]) &amp; k1[15:0]
+	</operation>
+	<instruction name="kandnr" form="k, k" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kswapb">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<description>Moves high byte from "k2" to low byte of "k1", and moves low byte of "k2" to high byte of "k1".</description>
+	<operation>
+tmp[7:0] := k2[15:8]
+k2[15:8] := k1[7:0]
+k1[7:0]  := tmp[7:0]
+
+tmp[7:0] := k2[7:0]
+k2[7:0]  := k1[15:8]
+k1[15:8] := tmp[7:0]
+	</operation>
+	<instruction name="kmerge2l1h" form="k, k" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="int" name="_mm512_kortestz">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<description>Performs bitwise OR between "k1" and "k2", storing the result in "dst". ZF flag is set if "dst" is 0.</description>
+	<operation>
+dst[15:0] := k1[15:0] | k2[15:0]
+IF dst = 0
+	SetZF()
+FI
+	</operation>
+	<instruction name="kortest" form="k, k" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="int" name="_mm512_kortestc">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<description>Performs bitwise OR between "k1" and "k2", storing the result in "dst". CF flag is set if "dst" consists of all 1's.</description>
+	<operation>
+dst[15:0] := k1[15:0] | k2[15:0]
+IF PopCount(dst[15:0]) = 16
+	SetCF()
+FI
+	</operation>
+	<instruction name="kortest" form="k, k" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm512_kortestz">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<description>Performs bitwise OR between "k1" and "k2", storing the result in "dst". ZF flag is set if "dst" is 0.</description>
+	<operation>
+dst[15:0] := k1[15:0] | k2[15:0]
+IF dst = 0
+	SetZF()
+FI
+	</operation>
+	<instruction name="kortestw" form="k, k" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm512_kortestc">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<description>Performs bitwise OR between "k1" and "k2", storing the result in "dst". CF flag is set if "dst" consists of all 1's.</description>
+	<operation>
+dst[15:0] := k1[15:0] | k2[15:0]
+IF PopCount(dst[15:0]) = 16
+	SetCF()
+FI
+	</operation>
+	<instruction name="kortestw" form="k, k" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="int" name="_mm512_mask2int">
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<description>Converts bit mask "k1" into an integer value, storing the results in "dst".</description>
+	<operation>
+dst := ZeroExtend(k1)
+	</operation>
+	<header>immintrin.h</header>
+	<instruction name="kmov" form="r32, k" xed=""/>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="int" name="_mm512_mask2int">
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<description>Converts bit mask "k1" into an integer value, storing the results in "dst".</description>
+	<operation>
+dst := ZeroExtend(k1)
+	</operation>
+	<header>immintrin.h</header>
+	<instruction name="kmovw" form="r32, k" xed=""/>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_int2mask">
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="mask" type="int"/>
+	<description>Converts integer "mask" into bitmask, storing the result in "dst".</description>
+	<operation>
+dst := mask[15:0]
+	</operation>
+	<header>immintrin.h</header>
+	<instruction name="kmov" form="k, r32" xed=""/>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_int2mask">
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<parameter varname="mask" type="int"/>
+	<description>Converts integer "mask" into bitmask, storing the result in "dst".</description>
+	<operation>
+dst := mask[15:0]
+	</operation>
+	<header>immintrin.h</header>
+	<instruction name="kmovw" form="k, r32" xed=""/>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__int64" name="_mm512_kconcathi_64">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<description>Packs masks "k1" and "k2" into the high 32 bits of "dst". The rest of "dst" is set to 0.</description>
+	<operation>
+dst[63:48] := k1[15:0]
+dst[47:32] := k2[15:0]
+dst[31:0]  := 0
+	</operation>
+	<instruction name="kconcath" form="r, k, k" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__int64" name="_mm512_kconcatlo_64">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<description>Packs masks "k1" and "k2" into the low 32 bits of "dst". The rest of "dst" is set to 0.</description>
+	<operation>
+dst[31:16] := k1[15:0]
+dst[15:0]  := k2[15:0]
+dst[63:32] := 0
+	</operation>
+	<instruction name="kconcatl" form="r, k, k" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kextract_64">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="a" type="__int64"/>
+	<parameter varname="b" type="const int"/>
+	<description>Extracts 16-bit value "b" from 64-bit integer "a", storing the result in "dst".</description>
+	<operation>
+CASE b of
+0: dst[15:0] := a[63:48]
+1: dst[15:0] := a[47:32]
+2: dst[15:0] := a[31:16]
+3: dst[15:0] := a[15:0]
+ESAC
+dst[MAX:15] := 0
+	</operation>
+	<instruction name="kextract" form="k, r, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_fmadd233_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	base := (j &amp; ~0x3) * 32
+	scale[31:0] := b[base+63:base+32]
+	bias[31:0]  := b[base+31:base]
+	dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vfmadd233ps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_fmadd233_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		base := (j &amp; ~0x3) * 32
+		scale[31:0] := b[base+63:base+32]
+		bias[31:0]  := b[base+31:base]
+		dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vfmadd233ps" form="zmm {k}, zmm, m512" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_i64extgather_epi32lo" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const*"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 8 single-precision (32-bit) memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to 32-bit integer elements and stores them in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:
+		dst[i+31:i] := addr[i+31:i]
+	_MM_UPCONV_EPI32_UINT8:
+		n := j*8
+		dst[i+31:i] := UInt8ToInt32(addr[n+7:n])
+	_MM_UPCONV_EPI32_SINT8:
+		n := j*8
+		dst[i+31:i] := SInt8ToInt32(addr[n+7:n])
+	_MM_UPCONV_EPI32_UINT16:
+		n := j*16
+		dst[i+31:i] := UInt16ToInt32(addr[n+15:n])
+	_MM_UPCONV_EPI32_SINT16:
+		n := j*16
+		dst[i+31:i] := SInt16ToInt32(addr[n+15:n])
+	ESAC
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_i64extgather_epi32lo" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const*"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 8 single-precision (32-bit) memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to 32-bit integer elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_EPI32_NONE:
+			dst[i+31:i] := addr[i+31:i]
+		_MM_UPCONV_EPI32_UINT8:
+			n := j*8
+			dst[i+31:i] := UInt8ToInt32(addr[n+7:n])
+		_MM_UPCONV_EPI32_SINT8:
+			n := j*8
+			dst[i+31:i] := SInt8ToInt32(addr[n+7:n])
+		_MM_UPCONV_EPI32_UINT16:
+			n := j*16
+			dst[i+31:i] := UInt16ToInt32(addr[n+15:n])
+		_MM_UPCONV_EPI32_SINT16:
+			n := j*16
+			dst[i+31:i] := SInt16ToInt32(addr[n+15:n])
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_i64extgather_epi64" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const*"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 8 double-precision (64-bit) memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to 64-bit integer elements and stores them in "dst". "hint" indicates to the processor whether the load is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	addr := MEM[mv + index[j] * scale]
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE: dst[i+63:i] := addr[i+63:i]
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_i64extgather_epi64" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const*"/>
+	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 8 double-precision (64-bit) memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to 64-bit integer elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the load is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		CASE conv OF
+		_MM_UPCONV_EPI64_NONE: dst[i+63:i] := addr[i+63:i]
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_i64extgather_pslo" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 8 memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to single-precision (32-bit) floating-point elements and stores them in the lower half of "dst". "hint" indicates to the processor whether the load is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:
+		dst[i+31:i] := addr[i+31:i]
+	_MM_UPCONV_PS_FLOAT16:
+		n := j*16
+		dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
+	_MM_UPCONV_PS_UINT8:
+		n := j*8
+		dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
+	_MM_UPCONV_PS_SINT8:
+		n := j*8
+		dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
+	_MM_UPCONV_PS_UINT16:
+		n := j*16
+		dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
+	_MM_UPCONV_PS_SINT16:
+		n := j*16
+		dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
+	ESAC
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_i64extgather_pslo" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 8 memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to single-precision (32-bit) floating-point elements and stores them in the lower half of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the load is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_PS_NONE:
+			dst[i+31:i] := addr[i+31:i]
+		_MM_UPCONV_PS_FLOAT16:
+			n := j*16
+			dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
+		_MM_UPCONV_PS_UINT8:
+			n := j*8
+			dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
+		_MM_UPCONV_PS_SINT8:
+			n := j*8
+			dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
+		_MM_UPCONV_PS_UINT16:
+			n := j*16
+			dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
+		_MM_UPCONV_PS_SINT16:
+			n := j*16
+			dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_i64extgather_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 8 double-precision (64-bit) floating-point elements stored in memory starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to 64-bit floating-point elements and stores them in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	CASE conv OF
+	_MM_UPCONV_PD_NONE: dst[i+63:i] := addr[i+63:i]
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_i64extgather_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Up-converts 8 double-precision (64-bit) floating-point elements stored in memory starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to 64-bit floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_PD_NONE: dst[i+63:i] := addr[i+63:i]
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32extscatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts 16 packed 32-bit integer elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale". "hint" indicates to the processor whether the data is non-temporal. AVX512 supports _MM_DOWNCONV_EPI32_NONE.</description>
+	<operation>
+FOR j := 0 to 15
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	CASE conv OF
+	_MM_DOWNCONV_EPI32_NONE:
+		addr[i+31:i] := v1[i+31:i]
+	_MM_DOWNCONV_EPI32_UINT8:
+		n := j*8
+		addr[n+7:n] := UInt32ToUInt8(v1[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT8:
+		n := j*8
+		addr[n+7:n] := SInt32ToSInt8(v1[i+31:i])
+	_MM_DOWNCONV_EPI32_UINT16:
+		n := j*16
+		addr[n+15:n] := UInt32ToUInt16(v1[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT16:
+		n := j*16
+		addr[n+15:n] := SInt32ToSInt16(v1[n+15:n])
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="vpscatterdd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32extscatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts 16 packed 32-bit integer elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale". Elements are written using writemask "k" (elements are only written when the corresponding mask bit is set; otherwise, elements are left unchanged in memory). "hint" indicates to the processor whether the data is non-temporal. AVX512 supports _MM_DOWNCONV_EPI32_NONE.</description>
+	<operation>
+FOR j := 0 to 15
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_EPI32_NONE:
+			addr[i+31:i] := v1[i+31:i]
+		_MM_DOWNCONV_EPI32_UINT8:
+			n := j*8
+			addr[n+7:n] := UInt32ToUInt8(v1[i+31:i])
+		_MM_DOWNCONV_EPI32_SINT8:
+			n := j*8
+			addr[n+7:n] := SInt32ToSInt8(v1[i+31:i])
+		_MM_DOWNCONV_EPI32_UINT16:
+			n := j*16
+			addr[n+15:n] := UInt32ToUInt16(v1[i+31:i])
+		_MM_DOWNCONV_EPI32_SINT16:
+			n := j*16
+			addr[n+15:n] := SInt32ToSInt16(v1[n+15:n])
+		ESAC
+	FI 
+ENDFOR
+	</operation>
+	<instruction name="vpscatterdd" form="m512 {k}, zmm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_i64extscatter_pslo" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts 8 packed single-precision (32-bit) floating-point elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	CASE conv OF
+	_MM_DOWNCONV_PS_NONE:
+		addr[i+31:i] := v1[i+31:i]
+	_MM_DOWNCONV_PS_FLOAT16:
+		n := j*16
+		addr[n+15:n] := Float32ToFloat16(v1[i+31:i])
+	_MM_DOWNCONV_PS_UINT8:
+		n := j*8
+		addr[n+7:n] := Float32ToUInt8(v1[i+31:i])
+	_MM_DOWNCONV_PS_SINT8:
+		n := j*8
+		addr[n+7:n] := Float32ToSInt8(v1[i+31:i])
+	_MM_DOWNCONV_PS_UINT16:
+		n := j*16
+		addr[n+15:n] := Float32ToUInt16(v1[i+31:i])
+	_MM_DOWNCONV_PS_SINT16:
+		n := j*16
+		addr[n+15:n] := Float32ToSInt16(v1[i+31:i])
+	ESAC
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i64extscatter_pslo" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts 8 packed single-precision (32-bit) floating-point elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". Elements are only written when the corresponding mask bit is set in "k"; otherwise, elements are unchanged in memory. "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_PS_NONE:
+			addr[i+31:i] := v[i+31:i]
+		_MM_DOWNCONV_PS_FLOAT16:
+			n := j*16
+			addr[n+15:n] := Float32ToFloat16(v1[i+31:i])
+		_MM_DOWNCONV_PS_UINT8:
+			n := j*8
+			addr[n+7:n] := Float32ToUInt8(v1[i+31:i])
+		_MM_DOWNCONV_PS_SINT8:
+			n := j*8
+			addr[n+7:n] := Float32ToSInt8(v1[i+31:i])
+		_MM_DOWNCONV_PS_UINT16:
+			n := j*16
+			addr[n+15:n] := Float32ToUInt16(v1[i+31:i])
+		_MM_DOWNCONV_PS_SINT16:
+			n := j*16
+			addr[n+15:n] := Float32ToSInt16(v1[i+31:i])
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_i64extscatter_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512d"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	CASE conv OF
+	_MM_DOWNCONV_EPI64_NONE:
+		addr[i+63:i] := v1[i+63:i]
+	ESAC
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i64extscatter_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512d"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". Elements are written to memory using writemask "k" (elements are not stored to memory when the corresponding mask bit is not set; the memory location is left unchagned). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_EPI64_NONE:
+			addr[i+63:i] := v1[i+63:i]
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_i64extscatter_epi32lo" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts the low 8 packed 32-bit integer elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	CASE conv OF
+	_MM_DOWNCONV_EPI32_NONE:
+		addr[i+31:i] := v1[i+31:i]
+	_MM_DOWNCONV_EPI32_UINT8:
+		n := j*8
+		addr[n+7:n] := UInt32ToUInt8(v1[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT8:
+		n := j*8
+		addr[n+7:n] := SInt32ToSInt8(v1[i+31:i])
+	_MM_DOWNCONV_EPI32_UINT16:
+		n := j*16
+		addr[n+15:n] := UInt32ToUInt16(v1[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT16:
+		n := j*16
+		addr[n+15:n] := SInt32ToSInt16(v1[n+15:n])
+	ESAC
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i64extscatter_epi32lo" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts the low 8 packed 32-bit integer elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". Elements are written to memory using writemask "k" (elements are only written when the corresponding mask bit is set; otherwise, the memory location is left unchanged). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_EPI32_NONE:
+			addr[i+31:i] := v1[i+31:i]
+		_MM_DOWNCONV_EPI32_UINT8:
+			n := j*8
+			addr[n+7:n] := UInt32ToUInt8(v1[i+31:i])
+		_MM_DOWNCONV_EPI32_SINT8:
+			n := j*8
+			addr[n+7:n] := SInt32ToSInt8(v1[i+31:i])
+		_MM_DOWNCONV_EPI32_UINT16:
+			n := j*16
+			addr[n+15:n] := UInt32ToUInt16(v1[i+31:i])
+		_MM_DOWNCONV_EPI32_SINT16:
+			n := j*16
+			addr[n+15:n] := SInt32ToSInt16(v1[n+15:n])
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_i64extscatter_epi64" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts 8 packed 64-bit integer elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". "hint" indicates to the processor whether the load is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*64
+	CASE conv OF
+	_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v1[i+63:i]
+	ESAC
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i64extscatter_epi64" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
+	<parameter varname="scale" type="int"/>
+	<parameter varname="hint" type="int"/>
+	<description>Down-converts 8 packed 64-bit integer elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory.</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		i := j*64
+		CASE conv OF
+		_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v1[i+63:i]
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mullox_epi64" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiplies elements in packed 64-bit integer vectors "a" and "b" together, storing the lower 64 bits of the result in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] * b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mullox_epi64" sequence="true">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiplies elements in packed 64-bit integer vectors "a" and "b" together, storing the lower 64 bits of the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_permute4f128_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
+	<description>Permutes 128-bit blocks of the packed single-precision (32-bit) floating-point elements in "a" using constant "imm8". The results are stored in "dst".</description>
+	<operation>
+SELECT4(src, control) {
+	CASE control[1:0] OF
+	0: tmp[127:0] := src[127:0]
+	1: tmp[127:0] := src[255:128]
+	2: tmp[127:0] := src[383:256]
+	3: tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+FOR j := 0 to 3
+	i := j*128
+	n := j*2
+	dst[i+127:i] := SELECT4(a[511:0], imm8[n+1:n])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermf32x4" form="zmm {k}, m512, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_permute4f128_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
+	<description>Permutes 128-bit blocks of the packed single-precision (32-bit) floating-point elements in "a" using constant "imm8". The results are stored in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control) {
+	CASE control[1:0] OF
+	0: tmp[127:0] := src[127:0]
+	1: tmp[127:0] := src[255:128]
+	2: tmp[127:0] := src[383:256]
+	3: tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp[511:0] := 0
+FOR j := 0 to 4
+	i := j*128
+	n := j*2
+	tmp[i+127:i] := SELECT4(a[511:0], imm8[n+1:n])
+ENDFOR
+FOR j := 0 to 15
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermf32x4" form="zmm {k}, m512, imm" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_sincos_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="cos_res" type="__m512d *"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Computes the sine and cosine of the packed double-precision (64-bit) floating-point elements in "a" and stores the results of the sine computation in "dst" and the results of the cosine computation in "cos_res".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SIN(a[i+63:i])
+	cos_res[i+63:i] := COS(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+cos_res[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_sincos_pd" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="cos_res" type="__m512d *"/>
+	<parameter varname="sin_src" type="__m512d"/>
+	<parameter varname="cos_src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Computes the sine and cosine of the packed double-precision (64-bit) floating-point elements in "a" and stores the results of the sine computation in "dst" and the results of the cosine computation in "cos_res". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SIN(a[i+63:i])
+		cos_res[i+63:i] := COS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := sin_src[i+63:i]
+		cos_res[i+63:i] := cos_src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+cos_res[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_sincos_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="cos_res" type="__m512 *"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Computes the sine and cosine of the packed single-precision (32-bit) floating-point elements in "a" and stores the results of the sine computation in "dst" and the results of the cosine computation in "cos_res".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SIN(a[i+31:i])
+	cos_res[i+31:i] := COS(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+cos_res[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_sincos_ps" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<parameter varname="cos_res" type="__m512 *"/>
+	<parameter varname="sin_src" type="__m512"/>
+	<parameter varname="cos_src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<description>Computes the sine and cosine of the packed single-precision (32-bit) floating-point elements in "a" and stores the results of the sine computation in "dst" and the results of the cosine computation in "cos_res". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SIN(a[i+31:i])
+		cos_res[i+31:i] := COS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := sin_src[i+31:i]
+		cos_res[i+31:i] := cos_src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+cos_res[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_i64gather_epi32lo" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="scale" type="int"/>
+	<description>Loads 8 32-bit integer memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" to "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	dst[i+31:i] := addr[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_i64gather_epi32lo" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="scale" type="int"/>
+	<description>Loads 8 32-bit integer memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		dst[i+31:i] := addr[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_i64gather_pslo" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="scale" type="int"/>
+	<description>Loads 8 single-precision (32-bit) floating-point memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" to "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	addr := MEM[mv + index[j] * scale]
+	i := j*32
+	dst[i+31:i] := addr[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_i64gather_pslo" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="mv" type="void const *"/>
+	<parameter varname="scale" type="int"/>
+	<description>Loads 8 single-precision (32-bit) floating-point memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		dst[i+31:i] := addr[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_i64scatter_pslo" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v" type="__m512"/>
+	<parameter varname="scale" type="int"/>
+	<description>Stores 8 packed single-precision (32-bit) floating-point elements in "v" in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	addr := MEM[mv + index[j] * scale]
+	addr[i+31:i] := v[i+31:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i64scatter_pslo" sequence="true">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512"/>
+	<parameter varname="scale" type="int"/>
+	<description>Stores 8 packed single-precision (32-bit) floating-point elements in "v1" in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using writemask "k" (elements are only written to memory when the corresponding mask bit is set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		addr[i+31:i] := v1[i+31:i]
+	FI	
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_i64scatter_epi32lo" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<description>Stores 8 packed 32-bit integer elements in "v1" in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	addr := MEM[mv + index[j] * scale]
+	addr[i+31:i] := v1[i+31:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i64scatter_epi32lo" sequence="true">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<parameter varname="mv" type="void *"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="index" type="__m512i"/>
+	<parameter varname="v1" type="__m512i"/>
+	<parameter varname="scale" type="int"/>
+	<description>Stores 8 packed 32-bit integer elements in "v1" in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using writemask "k" (elements are only written to memory when the corresponding mask bit is set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		addr := MEM[mv + index[j] * scale]
+		addr[i+31:i] := v1[i+31:i]
+	FI	
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kmerge2l1h">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<description>Move the high element from "k1" to the low element of "k1", and insert the low element of "k2" into the high element of "k1".</description>
+	<operation>
+tmp[7:0] := k1[15:8]
+k1[15:8] := k2[7:0]
+k1[7:0]  := tmp[7:0]
+	</operation>
+	<instruction name="kmerge2l1h" form="k, k" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kmerge2l1l">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="k2" type="__mmask16"/>
+	<description>Insert the low element of "k2" into the high element of "k1".</description>
+	<operation>
+k1[15:8] := k2[7:0]
+	</operation>
+	<instruction name="kmerge2l1l" form="k, k" xed=""/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='TRUE' rettype='float' name='_mm512_cvtss_f32'>
+	<type>Floating Point</type>
+	<CPUID>AVX512</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m512' />
+	<description>Copy the lower single-precision (32-bit) floating-point element of "a" to "dst".</description>
+	<operation>dst[31:0] := a[31:0]</operation>
+	<instruction name='movss' form='m32, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='TRUE' rettype='double' name='_mm512_cvtsd_f64'>
+	<type>Floating Point</type>
+	<CPUID>AVX512</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m512d'/>
+	<description>Copy the lower double-precision (64-bit) floating-point element of "a" to "dst".</description>
+	<operation>dst[63:0] := a[63:0]</operation>
+	<instruction name='movsd' form='m64, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='TRUE' rettype='int' name='_mm512_cvtsi512_si32'>
+	<type>Integer</type>
+	<CPUID>AVX512</CPUID>
+	<category>Convert</category>
+	<parameter varname='a' type='__m512i'/>
+	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+	</operation>
+	<instruction name='movd' form='r32, xmm'/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='int' name='_mm512_4dpwssd_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX512_4VNNIW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='src' type='_m512i'/>
+	<parameter varname='a0' type='_m512i'/>
+	<parameter varname='a1' type='_m512i'/>
+	<parameter varname='a2' type='_m512i'/>
+	<parameter varname='a3' type='_m512i'/>
+	<parameter varname='b' type='_m128i *'/>
+	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	FOR m := 0 to 3
+		lim_base := m*32
+		i := j*32
+		tl := b[lim_base+15:lim_base]
+		tu := b[lim_base+31:lim_base+16]
+		lword := a{m}[i+15:i] * tl
+		uword := a{m}[i+31:i+16] * tu
+		dst[i+31:i] := src[i+31:i] + lword + uword
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vp4dpwssd' form='zmm {k}, zmm+3, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='int' name='_mm512_mask_4dpwssd_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX512_4VNNIW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='src' type='_m512i'/>
+	<parameter varname='k' type='_mmask16'/>
+	<parameter varname='a0' type='_m512i'/>
+	<parameter varname='a1' type='_m512i'/>
+	<parameter varname='a2' type='_m512i'/>
+	<parameter varname='a3' type='_m512i'/>
+	<parameter varname='b' type='_m128i *'/>
+	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation with mask, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set)..</description>
+	<operation>
+FOR j := 0 to 15
+	IF mask[j]
+		FOR m := 0 to 3
+			lim_base := m*32
+			i := j*32
+			tl := b[lim_base+15:lim_base]
+			tu := b[lim_base+31:lim_base+16]
+			lword := a{m}[i+15:i] * tl
+			uword := a{m}[i+31:i+16] * tu
+			dst[i+31:i] := src[i+31:i] + lword + uword
+		ENDFOR
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vp4dpwssd' form='zmm {k}, zmm+3, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='int' name='_mm512_maskz_4dpwssd_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX512_4VNNIW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='k' type='_mmask16'/>
+	<parameter varname='src' type='_m512i'/>
+	<parameter varname='a0' type='_m512i'/>
+	<parameter varname='a1' type='_m512i'/>
+	<parameter varname='a2' type='_m512i'/>
+	<parameter varname='a3' type='_m512i'/>
+	<parameter varname='b' type='_m128i *'/>
+	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation with mask, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	IF mask[j]
+		FOR m := 0 to 3
+			lim_base := m*32
+			i := j*32
+			tl := b[lim_base+15:lim_base]
+			tu := b[lim_base+31:lim_base+16]
+			lword := a{m}[i+15:i] * tl
+			uword := a{m}[i+31:i+16] * tu
+			dst[i+31:i] := src[i+31:i] + lword + uword
+		ENDFOR
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vp4dpwssd' form='zmm {k}, zmm+3, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='int' name='_mm512_4dpwssds_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX512_4VNNIW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='src' type='_m512i'/>
+	<parameter varname='a0' type='_m512i'/>
+	<parameter varname='a1' type='_m512i'/>
+	<parameter varname='a2' type='_m512i'/>
+	<parameter varname='a3' type='_m512i'/>
+	<parameter varname='b' type='_m128i *'/>
+	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation and signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	FOR m := 0 to 3
+		lim_base := m*32
+		i := j*32
+		tl := b[lim_base+15:lim_base]
+		tu := b[lim_base+31:lim_base+16]
+		lword := a{m}[i+15:i] * tl
+		uword := a{m}[i+31:i+16] * tu
+		dst[i+31:i] := SIGNED_DWORD_SATURATE(src[i+31:i] + lword + uword)
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vp4dpwssds' form='zmm {k}, zmm+3, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='int' name='_mm512_mask_4dpwssds_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX512_4VNNIW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='src' type='_m512i'/>
+	<parameter varname='k' type='_mmask16'/>
+	<parameter varname='a0' type='_m512i'/>
+	<parameter varname='a1' type='_m512i'/>
+	<parameter varname='a2' type='_m512i'/>
+	<parameter varname='a3' type='_m512i'/>
+	<parameter varname='b' type='_m128i *'/>
+	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation with mask and signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set)..</description>
+	<operation>
+FOR j := 0 to 15
+	IF mask[i]
+		FOR m := 0 to 3
+			lim_base := m*32
+			i := j*32
+			tl := b[lim_base+15:lim_base]
+			tu := b[lim_base+31:lim_base+16]
+			lword := a{m}[i+15:i] * tl
+			uword := a{m}[i+31:i+16] * tu
+			dst[i+31:i] := SIGNED_DWORD_SATURATE(src[i+31:i] + lword + uword)
+		ENDFOR
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vp4dpwssds' form='zmm {k}, zmm+3, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='int' name='_mm512_maskz_4dpwssds_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX512_4VNNIW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='src' type='_m512i'/>
+	<parameter varname='k' type='_mmask16'/>
+	<parameter varname='a0' type='_m512i'/>
+	<parameter varname='a1' type='_m512i'/>
+	<parameter varname='a2' type='_m512i'/>
+	<parameter varname='a3' type='_m512i'/>
+	<parameter varname='b' type='_m128i *'/>
+	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation with mask and signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set)..</description>
+	<operation>
+FOR j := 0 to 15
+	IF mask[i]
+		FOR m := 0 to 3
+			lim_base := m*32
+			i := j*32
+			tl := b[lim_base+15:lim_base]
+			tu := b[lim_base+31:lim_base+16]
+			lword := a{m}[i+15:i] * tl
+			uword := a{m}[i+31:i+16] * tu
+			dst[i+31:i] := SIGNED_DWORD_SATURATE(src[i+31:i] + lword + uword)
+		ENDFOR
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vp4dpwssds' form='zmm {k}, zmm+3, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512' name='_mm512_4fmadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS </CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='_m512'/>
+	<parameter varname='b0' type='_m512i'/>
+	<parameter varname='b1' type='_m512i'/>
+	<parameter varname='b2' type='_m512i'/>
+	<parameter varname='b3' type='_m512i'/>
+	<parameter varname='c' type='_m128i *'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by the 4 corresponding packed elements in "c", accumulating with the corresponding elements in "a". Store the results in "dst".</description>
+	<operation>
+dst := a
+FOR m := 0 to 3
+	FOR j := 0 to 15
+		i = j*32
+		n = m*32
+		dst[i+31:i] := RoundFPControl_MXCSR(dst[i+31:i] + b{m}[i+31:i] * c[n+31:n])
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0 
+	</operation>
+	<instruction name='v4fmaddps' form='zmm {k}, zmm+3, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512' name='_mm512_mask_4fmadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='_m512'/>
+	<parameter varname='k' type='_mmask16'/>
+	<parameter varname='b0' type='_m512i'/>
+	<parameter varname='b1' type='_m512i'/>
+	<parameter varname='b2' type='_m512i'/>
+	<parameter varname='b3' type='_m512i'/>
+	<parameter varname='c' type='_m128i *'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by the 4 corresponding packed elements in "c", accumulating with the corresponding elements in "a". Store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+dst := a
+FOR m := 0 to 3
+	FOR j := 0 to 15
+		i = j*32
+		n = m*32
+		IF mask[j]
+			dst[i+31:i] := RoundFPControl_MXCSR(dst[i+31:i] + b{m}[i+31:i] * c[n+31:n])
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0 
+	</operation>
+	<instruction name='v4fmaddps' form='zmm {k}, zmm+3, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512' name='_mm512_maskz_4fmadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='_m512'/>
+	<parameter varname='k' type='_mmask16'/>
+	<parameter varname='b0' type='_m512i'/>
+	<parameter varname='b1' type='_m512i'/>
+	<parameter varname='b2' type='_m512i'/>
+	<parameter varname='b3' type='_m512i'/>
+	<parameter varname='c' type='_m128i *'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by the 4 corresponding packed elements in "c", accumulating with the corresponding elements in "a". Store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+dst := a
+FOR m := 0 to 3
+	FOR j := 0 to 15
+		i = j*32
+		n = m*32
+		IF mask[j]
+			dst[i+31:i] := RoundFPControl_MXCSR(dst[i+31:i] + b{m}[i+31:i] * c[n+31:n])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0 
+	</operation>
+	<instruction name='v4fmaddps' form='zmm {k}, zmm+3, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512' name='_mm512_4fnmadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='_m512'/>
+	<parameter varname='b0' type='_m512i'/>
+	<parameter varname='b1' type='_m512i'/>
+	<parameter varname='b2' type='_m512i'/>
+	<parameter varname='b3' type='_m512i'/>
+	<parameter varname='c' type='_m128i *'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by the 4 corresponding packed elements in "c", accumulating the negated intermediate result with the corresponding elements in "a". Store the results in "dst".</description>
+	<operation>
+dst := a
+FOR m := 0 to 3
+	FOR j := 0 to 15
+		i = j*32
+		n = m*32
+		dst[i+31:i] := RoundFPControl_MXCSR(dst[i+31:i] - b{m}[i+31:i] * c[n+31:n])
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0 
+	</operation>
+	<instruction name='v4fnmaddps' form='zmm {k}, zmm+3, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512' name='_mm512_mask_4fnmadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='_m512'/>
+	<parameter varname='k' type='_mmask16'/>
+	<parameter varname='b0' type='_m512i'/>
+	<parameter varname='b1' type='_m512i'/>
+	<parameter varname='b2' type='_m512i'/>
+	<parameter varname='b3' type='_m512i'/>
+	<parameter varname='c' type='_m128i *'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by the 4 corresponding packed elements in "c", accumulating the negated intermediate result with the corresponding elements in "a". Store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+dst := a
+FOR m := 0 to 3
+	FOR j := 0 to 15
+		i = j*32
+		n = m*32
+		IF mask[j]
+			dst[i+31:i] := RoundFPControl_MXCSR(dst[i+31:i] - b{m}[i+31:i] * c[n+31:n])
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0 
+	</operation>
+	<instruction name='v4fnmaddps' form='zmm {k}, zmm+3, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512' name='_mm512_maskz_4fnmadd_ps'>
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname='a' type='_m512'/>
+	<parameter varname='k' type='_mmask16'/>
+	<parameter varname='b0' type='_m512i'/>
+	<parameter varname='b1' type='_m512i'/>
+	<parameter varname='b2' type='_m512i'/>
+	<parameter varname='b3' type='_m512i'/>
+	<parameter varname='c' type='_m128i *'/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by the 4 corresponding packed elements in "c", accumulating the negated intermediate result with the corresponding elements in "a". Store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+dst := a
+FOR m := 0 to 3
+	FOR j := 0 to 15
+		i = j*32
+		n = m*32
+		IF mask[j]
+			dst[i+31:i] := RoundFPControl_MXCSR(dst[i+31:i] - b{m}[i+31:i] * c[n+31:n])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0 
+	</operation>
+	<instruction name='v4fnmaddps' form='zmm {k}, zmm+3, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m128' name='_mm_4fmadd_ss'>
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b0" type="__m128"/>
+	<parameter varname="b1" type="__m128"/>
+	<parameter varname="b2" type="__m128"/>
+	<parameter varname="b3" type="__m128"/>
+	<parameter varname="c" type="__m128 *"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by corresponding element in "c", accumulating  with the lower element in "a". Store the result in the lower element of "dst".</description>
+	<operation>
+dst := a
+FOR j := 0 to 3
+	i := j*32
+	dst[31:0] := RoundFPControl_MXCSR(dst[31:0] + b{j}[31:0] * c[i+31:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name='v4fmaddss' form='xmm {k}, xmm, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m128' name='_mm_mask_4fmadd_ss'>
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b0" type="__m128"/>
+	<parameter varname="b1" type="__m128"/>
+	<parameter varname="b2" type="__m128"/>
+	<parameter varname="b3" type="__m128"/>
+	<parameter varname="c" type="__m128 *"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by corresponding element in "c", accumulating  with the lower element in "a". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set).</description>
+	<operation>
+dst := a
+IF k[0]
+	FOR j := 0 to 3
+		i := j*32
+		dst[31:0] := RoundFPControl_MXCSR(dst[31:0] + b{j}[31:0] * c[i+31:i])
+	ENDFOR
+FI
+dst[MAX:32] := 0
+	</operation>
+	<instruction name='v4fmaddss' form='xmm {k}, xmm, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m128' name='_mm_maskz_4fmadd_ss'>
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b0" type="__m128"/>
+	<parameter varname="b1" type="__m128"/>
+	<parameter varname="b2" type="__m128"/>
+	<parameter varname="b3" type="__m128"/>
+	<parameter varname="c" type="__m128 *"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by corresponding element in "c", accumulating  with the lower element in "a". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set).</description>
+	<operation>
+dst := a
+IF k[0]
+	FOR j := 0 to 3
+		i := j*32
+		dst[31:0] := RoundFPControl_MXCSR(dst[31:0] + b{j}[31:0] * c[i+31:i])
+	ENDFOR
+ELSE
+	dst[31:0] := 0
+FI
+dst[MAX:32] := 0
+	</operation>
+	<instruction name='v4fmaddss' form='xmm {k}, xmm, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m128' name='_mm_4fnmadd_ss'>
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b0" type="__m128"/>
+	<parameter varname="b1" type="__m128"/>
+	<parameter varname="b2" type="__m128"/>
+	<parameter varname="b3" type="__m128"/>
+	<parameter varname="c" type="__m128 *"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by corresponding element in "c", accumulating  the negated intermediate result with the lower element in "a". Store the result in the lower element of "dst".</description>
+	<operation>
+dst := a
+FOR j := 0 to 3
+	i := j*32
+	dst[31:0] := RoundFPControl_MXCSR(dst[31:0] - b{j}[31:0] * c[i+31:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name='v4fnmaddss' form='xmm {k}, xmm, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m128' name='_mm_mask_4fnmadd_ss'>
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b0" type="__m128"/>
+	<parameter varname="b1" type="__m128"/>
+	<parameter varname="b2" type="__m128"/>
+	<parameter varname="b3" type="__m128"/>
+	<parameter varname="c" type="__m128 *"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by corresponding element in "c", accumulating  the negated intermediate result with the lower element in "a". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set).</description>
+	<operation>
+dst := a
+IF k[0]
+	FOR j := 0 to 3
+		i := j*32
+		dst[31:0] := RoundFPControl_MXCSR(dst[31:0] - b{j}[31:0] * c[i+31:i])
+	ENDFOR
+FI
+dst[MAX:32] := 0
+	</operation>
+	<instruction name='v4fnmaddss' form='xmm {k}, xmm, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m128' name='_mm_maskz_4fnmadd_ss'>
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b0" type="__m128"/>
+	<parameter varname="b1" type="__m128"/>
+	<parameter varname="b2" type="__m128"/>
+	<parameter varname="b3" type="__m128"/>
+	<parameter varname="c" type="__m128 *"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by corresponding element in "c", accumulating  the negated intermediate result with the lower element in "a". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set).</description>
+	<operation>
+dst := a
+IF k[0]
+	FOR j := 0 to 3
+		i := j*32
+		dst[31:0] := RoundFPControl_MXCSR(dst[31:0] - b{j}[31:0] * c[i+31:i])
+	ENDFOR
+ELSE
+	dst[31:0] := 0
+FI
+dst[MAX:32] := 0
+	</operation>
+	<instruction name='v4fnmaddss' form='xmm {k}, xmm, m128' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512i' name='_mm512_popcnt_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpopcntd' form='zmm {k}, zmm' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512i' name='_mm512_mask_popcnt_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POPCNT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpopcntd' form='zmm {k}, zmm' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512i' name='_mm512_maskz_popcnt_epi32'>
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POPCNT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpopcntd' form='zmm {k}, zmm' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512i' name='_mm512_popcnt_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpopcntq' form='zmm {k}, zmm' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512i' name='_mm512_mask_popcnt_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POPCNT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpopcntq' form='zmm {k}, zmm' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512i' name='_mm512_maskz_popcnt_epi64'>
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POPCNT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name='vpopcntq' form='zmm {k}, zmm' xed=''/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_kunpackd">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__mmask64"/>
+	<parameter varname="b" type="__mmask64"/>
+	<description>Unpack and interleave 32 bits from masks "a" and "b", and store the 64-bit result in "k".</description>
+	<operation>
+k[31:0] := a[31:0]
+k[63:32] := b[31:0]
+k[MAX:64] := 0
+	</operation>
+	<instruction name="kunpckdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_kunpackw">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__mmask32"/>
+	<parameter varname="b" type="__mmask32"/>
+	<description>Unpack and interleave 16 bits from masks "a" and "b", and store the 32-bit result in "k".</description>
+	<operation>
+k[15:0] := a[15:0]
+k[31:16] := b[15:0]
+k[MAX:32] := 0
+	</operation>
+	<instruction name="kunpckwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vaddpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vaddpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vaddpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vaddpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vaddps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vaddps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vaddps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vaddps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 32 bytes (8 elements) in "dst".</description>
+	<operation>
+temp[511:256] := a[255:0]
+temp[255:0] := b[255:0]
+temp[511:0] := temp[511:0] &gt;&gt; (32*count)
+dst[255:0] := temp[255:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="valignd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 32 bytes (8 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+temp[511:256] := a[255:0]
+temp[255:0] := b[255:0]
+temp[511:0] := temp[511:0] &gt;&gt; (32*count)
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := temp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="valignd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 32 bytes (8 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+temp[511:256] := a[255:0]
+temp[255:0] := b[255:0]
+temp[511:0] := temp[511:0] &gt;&gt; (32*count)
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := temp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="valignd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 16 bytes (4 elements) in "dst".</description>
+	<operation>
+temp[255:128] := a[127:0]
+temp[127:0] := b[127:0]
+temp[255:0] := temp[255:0] &gt;&gt; (32*count)
+dst[127:0] := temp[127:0]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="valignd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 16 bytes (4 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+temp[255:128] := a[127:0]
+temp[127:0] := b[127:0]
+temp[255:0] := temp[255:0] &gt;&gt; (32*count)
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := temp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="valignd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 16 bytes (4 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+temp[255:128] := a[127:0]
+temp[127:0] := b[127:0]
+temp[255:0] := temp[255:0] &gt;&gt; (32*count)
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := temp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="valignd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 32 bytes (4 elements) in "dst".</description>
+	<operation>
+temp[511:256] := a[255:0]
+temp[255:0] := b[255:0]
+temp[511:0] := temp[511:0] &gt;&gt; (64*count)
+dst[255:0] := temp[255:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="valignq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 32 bytes (4 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+temp[511:256] := a[255:0]
+temp[255:0] := b[255:0]
+temp[511:0] := temp[511:0] &gt;&gt; (64*count)
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := temp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="valignq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 32 bytes (4 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+temp[511:256] := a[255:0]
+temp[255:0] := b[255:0]
+temp[511:0] := temp[511:0] &gt;&gt; (64*count)
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := temp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="valignq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 16 bytes (2 elements) in "dst".</description>
+	<operation>
+temp[255:128] := a[127:0]
+temp[127:0] := b[127:0]
+temp[255:0] := temp[255:0] &gt;&gt; (64*count)
+dst[127:0] := temp[127:0]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="valignq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 16 bytes (2 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+temp[255:128] := a[127:0]
+temp[127:0] := b[127:0]
+temp[255:0] := temp[255:0] &gt;&gt; (64*count)
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := temp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="valignq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 16 bytes (2 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+temp[255:128] := a[127:0]
+temp[127:0] := b[127:0]
+temp[255:0] := temp[255:0] &gt;&gt; (64*count)
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := temp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="valignq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vandnpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vandnpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vandnpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vandnpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vandnpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vandnpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vandnpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vandnps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vandnps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vandnps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vandnps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vandnps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vandnps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vandnps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0 
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_blend_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vblendmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_blend_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vblendmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_blend_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vblendmps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_blend_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vblendmps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_broadcast_f32x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j mod 2)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcastf32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_broadcast_f32x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j mod 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcastf32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_broadcast_f32x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j mod 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcastf32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_broadcast_f32x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 2)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcastf32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_broadcast_f32x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcastf32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_broadcast_f32x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcastf32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_broadcast_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j mod 4)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcastf32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_broadcast_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j mod 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcastf32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_broadcast_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j mod 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcastf32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_broadcast_f32x8">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<description>Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 8)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcastf32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_broadcast_f32x8">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 8)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcastf32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_broadcast_f32x8">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 8)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcastf32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_broadcast_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	n := (j mod 2)*64
+	dst[i+63:i] := a[n+63:n]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcastf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_broadcast_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	n := (j mod 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := src[n+63:n]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcastf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_broadcast_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	n := (j mod 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcastf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_broadcast_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j mod 2)*64
+	dst[i+63:i] := a[n+63:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcastf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_broadcast_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j mod 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := src[n+63:n]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcastf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_broadcast_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j mod 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcastf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_broadcast_i32x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j mod 2)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcasti32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcast_i32x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j mod 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[n+31:n]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcasti32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcast_i32x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j mod 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcasti32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcast_i32x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 2)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcasti32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcast_i32x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[n+31:n]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcasti32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcast_i32x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcasti32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_broadcast_i32x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	n := (j mod 2)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vbroadcasti32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_broadcast_i32x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	n := (j mod 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[n+31:n]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vbroadcasti32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_broadcast_i32x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	n := (j mod 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vbroadcasti32x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_broadcast_i32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j mod 4)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcasti32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcast_i32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j mod 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[n+31:n]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcasti32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcast_i32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j mod 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcasti32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcast_i32x8">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 8)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcasti32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcast_i32x8">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 8)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[n+31:n]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcasti32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcast_i32x8">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j mod 8)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcasti32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_broadcast_i64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	n := (j mod 2)*64
+	dst[i+63:i] := a[n+63:n]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcasti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcast_i64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	n := (j mod 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := src[n+63:n]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcasti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcast_i64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	n := (j mod 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcasti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcast_i64x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j mod 2)*64
+	dst[i+63:i] := a[n+63:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcasti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcast_i64x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j mod 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := src[n+63:n]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcasti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcast_i64x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j mod 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vbroadcasti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_broadcastsd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcastsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_broadcastsd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcastsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcastss"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vbroadcastss"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vbroadcastss"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vbroadcastss"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmp_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vcmppd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmp_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vcmppd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vcmppd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vcmppd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmp_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vcmpps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmp_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vcmpps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vcmpps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vcmpps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_compress_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := src[255:m]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcompresspd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_compressstoreu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 64
+m := base_addr
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vcompresspd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_compress_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcompresspd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_compress_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := src[127:m]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcompresspd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_compressstoreu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 64
+m := base_addr
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vcompresspd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_compress_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcompresspd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_compress_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := src[255:m]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcompressps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_compressstoreu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 32
+m := base_addr
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vcompressps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_compress_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcompressps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_compress_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := src[127:m]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcompressps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_compressstoreu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 32
+m := base_addr
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vcompressps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_compress_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcompressps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+	ELSE
+		dst[m+63:m] := src[m+63:m]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtdq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+	ELSE
+		dst[m+63:m] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtdq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+	ELSE
+		dst[m+63:m] := src[m+63:m]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtdq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+	ELSE
+		dst[m+63:m] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtdq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtdq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtdq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtdq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtdq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtpd2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtpd2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtpd2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtpd2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_mask_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtpd2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_maskz_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtpd2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtpd2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtpd2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvt_roundpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvt_roundpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvt_roundpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[k+63:k])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtpd2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtpd2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtpd2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[k+63:k])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtpd2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtpd2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtpd2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvt_roundpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvt_roundpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvt_roundpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtph2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtph2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtph2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtph2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtps2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtps2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2ph"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2ph"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2ph"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2ph"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtps2ph"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtps2ph"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtps2ph"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtps2ph"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvt_roundps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvt_roundps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	 [round_note]
+	 </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvt_roundps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtps2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtps2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtps2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvt_roundps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvt_roundps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvt_roundps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvt_roundepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvt_roundepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvt_roundepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_mask_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_maskz_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_cvt_roundepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_cvt_roundepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_cvt_roundepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttpd2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttpd2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvttpd2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvttpd2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtt_roundpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtt_roundpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtt_roundpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttpd2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[k+63:k])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttpd2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttpd2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttpd2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[k+63:k])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvttpd2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvttpd2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvttpd2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtt_roundpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtt_roundpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtt_roundpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttpd2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttps2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*i
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_IntegerTruncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttps2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttps2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*i
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_IntegerTruncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttps2dq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtt_roundps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtt_roundps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtt_roundps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttps2qq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UnsignedInt32_Truncate(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttps2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttps2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttps2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UnsignedInt32_Truncate(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttps2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttps2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttps2udq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvttps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtt_roundps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtt_roundps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtt_roundps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="sae" type="int"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvttps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvttps2uqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtudq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtudq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtudq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtudq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtudq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtudq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtuqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtuqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtuqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvt_roundepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtuqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtuqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvt_roundepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtuqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtuqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvt_roundepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtuqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vcvtuqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtuqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtuqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtuqq2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtuqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_mask_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtuqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_maskz_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vcvtuqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_cvt_roundepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtuqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtuqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_cvt_roundepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtuqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtuqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_cvt_roundepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtuqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vcvtuqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtuqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtuqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vcvtuqq2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*128
+	tmp[i+31:i] := select(b[i+127:i], imm8[1:0])
+	tmp[i+63:i+32] := select(b[i+127:i], imm8[3:2])
+	tmp[i+95:i+64] := select(b[i+127:i], imm8[5:4])
+	tmp[i+127:i+96] := select(b[i+127:i], imm8[7:6])
+ENDFOR
+
+FOR j := 0 to 3
+	i := j*64
+	dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
+				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
+				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
+				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
+				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vdbpsadbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*128
+	tmp[i+31:i] := select(b[i+127:i], imm8[1:0])
+	tmp[i+63:i+32] := select(b[i+127:i], imm8[3:2])
+	tmp[i+95:i+64] := select(b[i+127:i], imm8[5:4])
+	tmp[i+127:i+96] := select(b[i+127:i], imm8[7:6])
+ENDFOR
+
+FOR j := 0 to 3
+	i := j*64
+	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
+				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
+				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
+				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
+				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vdbpsadbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*128
+	tmp[i+31:i] := select(b[i+127:i], imm8[1:0])
+	tmp[i+63:i+32] := select(b[i+127:i], imm8[3:2])
+	tmp[i+95:i+64] := select(b[i+127:i], imm8[5:4])
+	tmp[i+127:i+96] := select(b[i+127:i], imm8[7:6])
+ENDFOR
+
+FOR j := 0 to 3
+	i := j*64
+	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
+				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
+				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
+				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
+				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vdbpsadbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*128
+	tmp[i+31:i] := select(b[i+127:i], imm8[1:0])
+	tmp[i+63:i+32] := select(b[i+127:i], imm8[3:2])
+	tmp[i+95:i+64] := select(b[i+127:i], imm8[5:4])
+	tmp[i+127:i+96] := select(b[i+127:i], imm8[7:6])
+ENDFOR
+
+FOR j := 0 to 7
+	i := j*64
+	dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
+				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
+				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
+				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
+				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vdbpsadbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*128
+	tmp[i+31:i] := select(b[i+127:i], imm8[1:0])
+	tmp[i+63:i+32] := select(b[i+127:i], imm8[3:2])
+	tmp[i+95:i+64] := select(b[i+127:i], imm8[5:4])
+	tmp[i+127:i+96] := select(b[i+127:i], imm8[7:6])
+ENDFOR
+
+FOR j := 0 to 7
+	i := j*64
+	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
+				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
+				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
+				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
+				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vdbpsadbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*128
+	tmp[i+31:i] := select(b[i+127:i], imm8[1:0])
+	tmp[i+63:i+32] := select(b[i+127:i], imm8[3:2])
+	tmp[i+95:i+64] := select(b[i+127:i], imm8[5:4])
+	tmp[i+127:i+96] := select(b[i+127:i], imm8[7:6])
+ENDFOR
+
+FOR j := 0 to 7
+	i := j*64
+	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
+				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
+				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
+				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
+				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vdbpsadbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
+	</description>
+	<operation>
+tmp[31:0] := select(b[127:0], imm8[1:0])
+tmp[63:32] := select(b[127:0], imm8[3:2])
+tmp[95:64] := select(b[127:0], imm8[5:4])
+tmp[127:96] := select(b[127:0], imm8[7:6])
+
+FOR j := 0 to 1
+	i := j*64
+	dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
+				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
+				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
+				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
+				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vdbpsadbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
+	</description>
+	<operation>
+tmp[31:0] := select(b[127:0], imm8[1:0])
+tmp[63:32] := select(b[127:0], imm8[3:2])
+tmp[95:64] := select(b[127:0], imm8[5:4])
+tmp[127:96] := select(b[127:0], imm8[7:6])
+
+FOR j := 0 to 1
+	i := j*64
+	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
+				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
+				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
+				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
+				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vdbpsadbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
+	</description>
+	<operation>
+tmp[31:0] := select(b[127:0], imm8[1:0])
+tmp[63:32] := select(b[127:0], imm8[3:2])
+tmp[95:64] := select(b[127:0], imm8[5:4])
+tmp[127:96] := select(b[127:0], imm8[7:6])
+
+FOR j := 0 to 1
+	i := j*64
+	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
+				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
+				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
+				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
+				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vdbpsadbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vdivpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vdivpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vdivpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vdivpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vdivps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vdivps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vdivps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vdivps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_expand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vexpandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_expandloadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vexpandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_expand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vexpandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_expandloadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vexpandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_expand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vexpandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_expandloadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vexpandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_expand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vexpandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_expandloadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vexpandpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_expand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vexpandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_expandloadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vexpandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_expand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vexpandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_expandloadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vexpandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_expand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vexpandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_expandloadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vexpandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_expand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vexpandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_expandloadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vexpandps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_extractf32x4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextractf32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_mask_extractf32x4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextractf32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_maskz_extractf32x4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextractf32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_extractf32x8_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[255:0] := a[255:0]
+1: dst[255:0] := a[511:256]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vextractf32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_extractf32x8_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vextractf32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_extractf32x8_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vextractf32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm256_extractf64x2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextractf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm256_mask_extractf64x2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextractf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm256_maskz_extractf64x2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextractf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm512_extractf64x2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+2: dst[127:0] := a[383:256]
+3: dst[127:0] := a[511:384]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextractf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm512_mask_extractf64x2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextractf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm512_maskz_extractf64x2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextractf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_extracti32x4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextracti32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_extracti32x4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextracti32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_extracti32x4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextracti32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_extracti32x8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[255:0] := a[255:0]
+1: dst[255:0] := a[511:256]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vextracti32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_extracti32x8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vextracti32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_extracti32x8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vextracti32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_extracti64x2_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextracti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_extracti64x2_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextracti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_extracti64x2_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextracti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_extracti64x2_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[7:0] of
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+2: dst[127:0] := a[383:256]
+3: dst[127:0] := a[511:384]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextracti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_extracti64x2_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextracti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_extracti64x2_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+CASE imm8[7:0] of
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vextracti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfixupimmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfixupimmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfixupimmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfixupimmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.	</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfixupimmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN := 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1&#x2044;2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[63:0]
+}
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfixupimmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfixupimmps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfixupimmps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfixupimmps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfixupimmps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfixupimmps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>
+enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, 
+	SNAN_TOKEN L= 1, 
+	ZERO_VALUE_TOKEN := 2, 
+	ONE_VALUE_TOKEN := 3, 
+	NEG_INF_TOKEN := 4, 
+	POS_INF_TOKEN := 5, 
+	NEG_VALUE_TOKEN := 6, 
+	POS_VALUE_TOKEN := 7
+}
+FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) of
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1&#x2044;2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0] of TOKEN_TYPE)
+	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
+	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
+	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
+	ONE_VALUE_TOKEN: if imm8[3] then set #IE
+	SNAN_TOKEN: if imm8[4] then set #IE
+	NEG_INF_TOKEN: if imm8[5] then set #IE
+	NEG_VALUE_TOKEN: if imm8[6] then set #IE
+	POS_INF_TOKEN: if imm8[7] then set #IE
+	ESAC
+	RETURN dest[31:0]
+}
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfixupimmps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask3_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmadd132pd"/>
+	<instruction name="vfmadd213pd"/>
+	<instruction name="vfmadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmadd132pd"/>
+	<instruction name="vfmadd213pd"/>
+	<instruction name="vfmadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmadd132pd"/>
+	<instruction name="vfmadd213pd"/>
+	<instruction name="vfmadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmadd132pd"/>
+	<instruction name="vfmadd213pd"/>
+	<instruction name="vfmadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmadd132pd"/>
+	<instruction name="vfmadd213pd"/>
+	<instruction name="vfmadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmadd132pd"/>
+	<instruction name="vfmadd213pd"/>
+	<instruction name="vfmadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask3_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmadd132ps"/>
+	<instruction name="vfmadd213ps"/>
+	<instruction name="vfmadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmadd132ps"/>
+	<instruction name="vfmadd213ps"/>
+	<instruction name="vfmadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmadd132ps"/>
+	<instruction name="vfmadd213ps"/>
+	<instruction name="vfmadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmadd132ps"/>
+	<instruction name="vfmadd213ps"/>
+	<instruction name="vfmadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmadd132ps"/>
+	<instruction name="vfmadd213ps"/>
+	<instruction name="vfmadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmadd132ps"/>
+	<instruction name="vfmadd213ps"/>
+	<instruction name="vfmadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask3_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmaddsub132pd"/>
+	<instruction name="vfmaddsub213pd"/>
+	<instruction name="vfmaddsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmaddsub132pd"/>
+	<instruction name="vfmaddsub213pd"/>
+	<instruction name="vfmaddsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmaddsub132pd"/>
+	<instruction name="vfmaddsub213pd"/>
+	<instruction name="vfmaddsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmaddsub132pd"/>
+	<instruction name="vfmaddsub213pd"/>
+	<instruction name="vfmaddsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmaddsub132pd"/>
+	<instruction name="vfmaddsub213pd"/>
+	<instruction name="vfmaddsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmaddsub132pd"/>
+	<instruction name="vfmaddsub213pd"/>
+	<instruction name="vfmaddsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask3_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmaddsub132ps"/>
+	<instruction name="vfmaddsub213ps"/>
+	<instruction name="vfmaddsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmaddsub132ps"/>
+	<instruction name="vfmaddsub213ps"/>
+	<instruction name="vfmaddsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmaddsub132ps"/>
+	<instruction name="vfmaddsub213ps"/>
+	<instruction name="vfmaddsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmaddsub132ps"/>
+	<instruction name="vfmaddsub213ps"/>
+	<instruction name="vfmaddsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmaddsub132ps"/>
+	<instruction name="vfmaddsub213ps"/>
+	<instruction name="vfmaddsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmaddsub132ps"/>
+	<instruction name="vfmaddsub213ps"/>
+	<instruction name="vfmaddsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask3_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmsub132pd"/>
+	<instruction name="vfmsub213pd"/>
+	<instruction name="vfmsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmsub132pd"/>
+	<instruction name="vfmsub213pd"/>
+	<instruction name="vfmsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmsub132pd"/>
+	<instruction name="vfmsub213pd"/>
+	<instruction name="vfmsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmsub132pd"/>
+	<instruction name="vfmsub213pd"/>
+	<instruction name="vfmsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmsub132pd"/>
+	<instruction name="vfmsub213pd"/>
+	<instruction name="vfmsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmsub132pd"/>
+	<instruction name="vfmsub213pd"/>
+	<instruction name="vfmsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask3_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmsub132ps"/>
+	<instruction name="vfmsub213ps"/>
+	<instruction name="vfmsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmsub132ps"/>
+	<instruction name="vfmsub213ps"/>
+	<instruction name="vfmsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmsub132ps"/>
+	<instruction name="vfmsub213ps"/>
+	<instruction name="vfmsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmsub132ps"/>
+	<instruction name="vfmsub213ps"/>
+	<instruction name="vfmsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmsub132ps"/>
+	<instruction name="vfmsub213ps"/>
+	<instruction name="vfmsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmsub132ps"/>
+	<instruction name="vfmsub213ps"/>
+	<instruction name="vfmsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask3_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmsubadd132pd"/>
+	<instruction name="vfmsubadd213pd"/>
+	<instruction name="vfmsubadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmsubadd132pd"/>
+	<instruction name="vfmsubadd213pd"/>
+	<instruction name="vfmsubadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmsubadd132pd"/>
+	<instruction name="vfmsubadd213pd"/>
+	<instruction name="vfmsubadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmsubadd132pd"/>
+	<instruction name="vfmsubadd213pd"/>
+	<instruction name="vfmsubadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1 
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmsubadd132pd"/>
+	<instruction name="vfmsubadd213pd"/>
+	<instruction name="vfmsubadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF (j is even) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmsubadd132pd"/>
+	<instruction name="vfmsubadd213pd"/>
+	<instruction name="vfmsubadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask3_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmsubadd132ps"/>
+	<instruction name="vfmsubadd213ps"/>
+	<instruction name="vfmsubadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmsubadd132ps"/>
+	<instruction name="vfmsubadd213ps"/>
+	<instruction name="vfmsubadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfmsubadd132ps"/>
+	<instruction name="vfmsubadd213ps"/>
+	<instruction name="vfmsubadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmsubadd132ps"/>
+	<instruction name="vfmsubadd213ps"/>
+	<instruction name="vfmsubadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmsubadd132ps"/>
+	<instruction name="vfmsubadd213ps"/>
+	<instruction name="vfmsubadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF (j is even) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfmsubadd132ps"/>
+	<instruction name="vfmsubadd213ps"/>
+	<instruction name="vfmsubadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask3_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfnmadd132pd"/>
+	<instruction name="vfnmadd213pd"/>
+	<instruction name="vfnmadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfnmadd132pd"/>
+	<instruction name="vfnmadd213pd"/>
+	<instruction name="vfnmadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfnmadd132pd"/>
+	<instruction name="vfnmadd213pd"/>
+	<instruction name="vfnmadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfnmadd132pd"/>
+	<instruction name="vfnmadd213pd"/>
+	<instruction name="vfnmadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfnmadd132pd"/>
+	<instruction name="vfnmadd213pd"/>
+	<instruction name="vfnmadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfnmadd132pd"/>
+	<instruction name="vfnmadd213pd"/>
+	<instruction name="vfnmadd231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask3_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfnmadd132ps"/>
+	<instruction name="vfnmadd213ps"/>
+	<instruction name="vfnmadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfnmadd132ps"/>
+	<instruction name="vfnmadd213ps"/>
+	<instruction name="vfnmadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfnmadd132ps"/>
+	<instruction name="vfnmadd213ps"/>
+	<instruction name="vfnmadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfnmadd132ps"/>
+	<instruction name="vfnmadd213ps"/>
+	<instruction name="vfnmadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfnmadd132ps"/>
+	<instruction name="vfnmadd213ps"/>
+	<instruction name="vfnmadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfnmadd132ps"/>
+	<instruction name="vfnmadd213ps"/>
+	<instruction name="vfnmadd231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask3_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfnmsub132pd"/>
+	<instruction name="vfnmsub213pd"/>
+	<instruction name="vfnmsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfnmsub132pd"/>
+	<instruction name="vfnmsub213pd"/>
+	<instruction name="vfnmsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="c" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfnmsub132pd"/>
+	<instruction name="vfnmsub213pd"/>
+	<instruction name="vfnmsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfnmsub132pd"/>
+	<instruction name="vfnmsub213pd"/>
+	<instruction name="vfnmsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfnmsub132pd"/>
+	<instruction name="vfnmsub213pd"/>
+	<instruction name="vfnmsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="c" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfnmsub132pd"/>
+	<instruction name="vfnmsub213pd"/>
+	<instruction name="vfnmsub231pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask3_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfnmsub132ps"/>
+	<instruction name="vfnmsub213ps"/>
+	<instruction name="vfnmsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfnmsub132ps"/>
+	<instruction name="vfnmsub213ps"/>
+	<instruction name="vfnmsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="c" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vfnmsub132ps"/>
+	<instruction name="vfnmsub213ps"/>
+	<instruction name="vfnmsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfnmsub132ps"/>
+	<instruction name="vfnmsub213ps"/>
+	<instruction name="vfnmsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfnmsub132ps"/>
+	<instruction name="vfnmsub213ps"/>
+	<instruction name="vfnmsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="c" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vfnmsub132ps"/>
+	<instruction name="vfnmsub213ps"/>
+	<instruction name="vfnmsub231ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_fpclass_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
+	[fpclass_note]
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vfpclasspd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_fpclass_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
+	[fpclass_note]
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vfpclasspd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_fpclass_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
+	[fpclass_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vfpclasspd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_fpclass_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
+	[fpclass_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vfpclasspd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_fpclass_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
+	[fpclass_note]
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vfpclasspd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_fpclass_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
+	[fpclass_note]
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vfpclasspd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_fpclass_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
+	[fpclass_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vfpclassps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_fpclass_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
+	[fpclass_note]
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vfpclassps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_fpclass_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
+	[fpclass_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vfpclassps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_mask_fpclass_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
+	[fpclass_note]
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vfpclassps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_fpclass_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
+	[fpclass_note]
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vfpclassps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_fpclass_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
+	[fpclass_note]
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vfpclassps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_fpclass_sd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test the lower double-precision (64-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k".
+	[fpclass_note]
+	</description>
+	<operation>
+k[0] := CheckFPClass_FP64(a[63:0], imm8[7:0])
+k[MAX:1] := 0
+	</operation>
+	<instruction name="vfpclasssd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_fpclass_sd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test the lower double-precision (64-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set).
+	[fpclass_note]
+	</description>
+	<operation>
+IF k1[0]
+	k[0] := CheckFPClass_FP64(a[63:0], imm8[7:0])
+ELSE
+	k[0] := 0
+FI
+k[MAX:1] := 0
+	</operation>
+	<instruction name="vfpclasssd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_fpclass_ss_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test the lower single-precision (32-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k.
+	[fpclass_note]
+	</description>
+	<operation>
+k[0] := CheckFPClass_FP32(a[31:0], imm8[7:0])
+k[MAX:1] := 0
+	</operation>
+	<instruction name="vfpclassss"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_fpclass_ss_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Test the lower single-precision (32-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set).
+	[fpclass_note]
+	</description>
+	<operation>
+IF k1[0]
+	k[0] := CheckFPClass_FP32(a[31:0], imm8[7:0])
+ELSE
+	k[0] := 0
+FI
+k[MAX:1] := 0
+	</operation>
+	<instruction name="vfpclassss"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mmask_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	IF k[j]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+k[MAX:4] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgatherdpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mmask_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	IF k[j]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+k[MAX:2] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgatherdpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mmask_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>
+	Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+k[MAX:8] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgatherdps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mmask_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>
+	Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+k[MAX:4] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgatherdps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mmask_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+k[MAX:4] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgatherqpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mmask_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>
+	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+k[MAX:2] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgatherqpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_mmask_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>
+	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+k[MAX:4] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgatherqps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mmask_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>
+	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+k[MAX:2] := 0
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vgatherqps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgetexppd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgetexppd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgetexppd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgetexppd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgetexppd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgetexppd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgetexpps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgetexpps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgetexpps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgetexpps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgetexpps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgetexpps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgetmantpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgetmantpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgetmantpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgetmantpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgetmantpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgetmantpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgetmantps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgetmantps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vgetmantps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgetmantps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgetmantps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
+	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vgetmantps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_insertf32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE (imm8[1:0]) of
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vinsertf32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_insertf32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vinsertf32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_insertf32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vinsertf32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_insertf32x8">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "dst", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".
+	</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE (imm8[7:0]) OF
+0: dst[255:0] := b[255:0]
+1: dst[511:256] := b[255:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vinsertf32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_insertf32x8">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[7:0]) OF
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vinsertf32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_insertf32x8">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[7:0]) OF
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vinsertf32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_insertf64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE imm8[7:0] of
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vinsertf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_insertf64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vinsertf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_insertf64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vinsertf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_insertf64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE imm8[7:0] of
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+2: dst[383:256] := b[127:0]
+3: dst[511:384] := b[127:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vinsertf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_insertf64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vinsertf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_insertf64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vinsertf64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_inserti32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE (imm8[1:0]) of
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vinserti32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_inserti32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vinserti32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_inserti32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vinserti32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_inserti32x8">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "dst", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE imm8[7:0] of
+0: dst[255:0] := b[255:0]
+1: dst[511:256] := b[255:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vinserti32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_inserti32x8">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[7:0]) OF
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vinserti32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_inserti32x8">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[7:0]) OF
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vinserti32x8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_inserti64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE imm8[7:0] of
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vinserti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_inserti64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vinserti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_inserti64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vinserti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_inserti64x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE imm8[7:0] of
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+2: dst[383:256] := b[127:0]
+3: dst[511:384] := b[127:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vinserti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_inserti64x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vinserti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_inserti64x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) of
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vinserti64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmaxpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmaxpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmaxpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmaxpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmaxps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmaxps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmaxps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmaxps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vminpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vminpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vminpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vminpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vminps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vminps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vminps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vminps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovapd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_mov_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovapd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_store_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovapd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovapd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_mov_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovapd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovapd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_mov_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovapd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_store_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovapd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovapd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_mov_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovapd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovaps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_mov_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovaps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_store_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovaps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovaps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_mov_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovaps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovaps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_mov_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovaps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_store_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovaps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovaps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_mov_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovaps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+tmp[191:128] := a[191:128]
+tmp[255:192] := a[191:128]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovddup"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+tmp[191:128] := a[191:128]
+tmp[255:192] := a[191:128]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovddup"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovddup"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovddup"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqa32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mov_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqa32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_store_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovdqa32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqa32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mov_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqa32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqa32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mov_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqa32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_store_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovdqa32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqa32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mov_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqa32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqa64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mov_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqa64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_store_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovdqa64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqa64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mov_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqa64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqa64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mov_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqa64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_store_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovdqa64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqa64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mov_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqa64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mov_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Store packed 16-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mov_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mov_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Store packed 16-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mov_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mov_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Store packed 16-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mov_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqu16"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovdqu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovdqu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqu64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_storeu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovdqu64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqu64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqu64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_storeu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovdqu64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqu64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mov_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Store packed 8-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mov_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mov_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Store packed 8-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mov_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mov_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Store packed 8-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mov_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovdqu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[63:32] 
+tmp[63:32] := a[63:32] 
+tmp[95:64] := a[127:96] 
+tmp[127:96] := a[127:96]
+tmp[159:128] := a[191:160] 
+tmp[191:160] := a[191:160] 
+tmp[223:192] := a[255:224] 
+tmp[255:224] := a[255:224]
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovshdup"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+tmp[31:0] := a[63:32] 
+tmp[63:32] := a[63:32] 
+tmp[95:64] := a[127:96] 
+tmp[127:96] := a[127:96]
+tmp[159:128] := a[191:160] 
+tmp[191:160] := a[191:160] 
+tmp[223:192] := a[255:224] 
+tmp[255:224] := a[255:224]
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovshdup"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[63:32] 
+tmp[63:32] := a[63:32] 
+tmp[95:64] := a[127:96] 
+tmp[127:96] := a[127:96]
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovshdup"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+tmp[31:0] := a[63:32] 
+tmp[63:32] := a[63:32] 
+tmp[95:64] := a[127:96] 
+tmp[127:96] := a[127:96]
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovshdup"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[31:0] 
+tmp[63:32] := a[31:0] 
+tmp[95:64] := a[95:64] 
+tmp[127:96] := a[95:64]
+tmp[159:128] := a[159:128] 
+tmp[191:160] := a[159:128] 
+tmp[223:192] := a[223:192] 
+tmp[255:224] := a[223:192]
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovsldup"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+tmp[31:0] := a[31:0] 
+tmp[63:32] := a[31:0] 
+tmp[95:64] := a[95:64] 
+tmp[127:96] := a[95:64]
+tmp[159:128] := a[159:128] 
+tmp[191:160] := a[159:128] 
+tmp[223:192] := a[223:192] 
+tmp[255:224] := a[223:192]
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0	
+	</operation>
+	<instruction name="vmovsldup"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[31:0] 
+tmp[63:32] := a[31:0] 
+tmp[95:64] := a[95:64] 
+tmp[127:96] := a[95:64]
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovsldup"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+tmp[31:0] := a[31:0] 
+tmp[63:32] := a[31:0] 
+tmp[95:64] := a[95:64] 
+tmp[127:96] := a[95:64]
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0	
+	</operation>
+	<instruction name="vmovsldup"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary. </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovupd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_storeu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovupd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovupd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary. </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovupd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_storeu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovupd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovupd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovups"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_storeu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovups"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmovups"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovups"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_storeu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="mem_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vmovups"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmovups"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmulpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmulpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmulpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmulpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  RM.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmulps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vmulps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  RM.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmulps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vmulps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := ABS(a[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpabsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := ABS(a[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpabsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := ABS(a[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpabsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := ABS(a[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpabsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := ABS(a[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpabsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := ABS(a[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpabsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := ABS(a[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpabsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpabsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpabsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpabsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpabsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ABS(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpabsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpabsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpabsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ABS(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpabsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpabsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpabsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ABS(a[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpabsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ABS(a[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpabsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := ABS(a[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpabsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ABS(a[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpabsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ABS(a[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpabsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ABS(a[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpabsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ABS(a[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpabsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
+tmp_dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
+tmp_dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
+tmp_dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
+tmp_dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
+tmp_dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
+tmp_dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
+tmp_dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
+tmp_dst[143:128] := Saturate_Int32_To_Int16 (a[159:128])
+tmp_dst[159:144] := Saturate_Int32_To_Int16 (a[191:160])
+tmp_dst[175:160] := Saturate_Int32_To_Int16 (a[223:192])
+tmp_dst[191:176] := Saturate_Int32_To_Int16 (a[255:224])
+tmp_dst[207:192] := Saturate_Int32_To_Int16 (b[159:128])
+tmp_dst[223:208] := Saturate_Int32_To_Int16 (b[191:160])
+tmp_dst[239:224] := Saturate_Int32_To_Int16 (b[223:192])
+tmp_dst[255:240] := Saturate_Int32_To_Int16 (b[255:224])
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpackssdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+tmp_dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
+tmp_dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
+tmp_dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
+tmp_dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
+tmp_dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
+tmp_dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
+tmp_dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
+tmp_dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
+tmp_dst[143:128] := Saturate_Int32_To_Int16 (a[159:128])
+tmp_dst[159:144] := Saturate_Int32_To_Int16 (a[191:160])
+tmp_dst[175:160] := Saturate_Int32_To_Int16 (a[223:192])
+tmp_dst[191:176] := Saturate_Int32_To_Int16 (a[255:224])
+tmp_dst[207:192] := Saturate_Int32_To_Int16 (b[159:128])
+tmp_dst[223:208] := Saturate_Int32_To_Int16 (b[191:160])
+tmp_dst[239:224] := Saturate_Int32_To_Int16 (b[223:192])
+tmp_dst[255:240] := Saturate_Int32_To_Int16 (b[255:224])
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpackssdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
+tmp_dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
+tmp_dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
+tmp_dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
+tmp_dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
+tmp_dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
+tmp_dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
+tmp_dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
+tmp_dst[143:128] := Saturate_Int32_To_Int16 (a[159:128])
+tmp_dst[159:144] := Saturate_Int32_To_Int16 (a[191:160])
+tmp_dst[175:160] := Saturate_Int32_To_Int16 (a[223:192])
+tmp_dst[191:176] := Saturate_Int32_To_Int16 (a[255:224])
+tmp_dst[207:192] := Saturate_Int32_To_Int16 (b[159:128])
+tmp_dst[223:208] := Saturate_Int32_To_Int16 (b[191:160])
+tmp_dst[239:224] := Saturate_Int32_To_Int16 (b[223:192])
+tmp_dst[255:240] := Saturate_Int32_To_Int16 (b[255:224])
+tmp_dst[271:256] := Saturate_Int32_To_Int16 (a[287:256])
+tmp_dst[287:272] := Saturate_Int32_To_Int16 (a[319:288])
+tmp_dst[303:288] := Saturate_Int32_To_Int16 (a[351:320])
+tmp_dst[319:304] := Saturate_Int32_To_Int16 (a[383:352])
+tmp_dst[335:320] := Saturate_Int32_To_Int16 (b[287:256])
+tmp_dst[351:336] := Saturate_Int32_To_Int16 (b[319:288])
+tmp_dst[367:352] := Saturate_Int32_To_Int16 (b[351:320])
+tmp_dst[383:368] := Saturate_Int32_To_Int16 (b[383:352])
+tmp_dst[399:384] := Saturate_Int32_To_Int16 (a[415:384])
+tmp_dst[415:400] := Saturate_Int32_To_Int16 (a[447:416])
+tmp_dst[431:416] := Saturate_Int32_To_Int16 (a[479:448])
+tmp_dst[447:432] := Saturate_Int32_To_Int16 (a[511:480])
+tmp_dst[463:448] := Saturate_Int32_To_Int16 (b[415:384])
+tmp_dst[479:464] := Saturate_Int32_To_Int16 (b[447:416])
+tmp_dst[495:480] := Saturate_Int32_To_Int16 (b[479:448])
+tmp_dst[511:496] := Saturate_Int32_To_Int16 (b[511:480])
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackssdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+tmp_dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
+tmp_dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
+tmp_dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
+tmp_dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
+tmp_dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
+tmp_dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
+tmp_dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
+tmp_dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
+tmp_dst[143:128] := Saturate_Int32_To_Int16 (a[159:128])
+tmp_dst[159:144] := Saturate_Int32_To_Int16 (a[191:160])
+tmp_dst[175:160] := Saturate_Int32_To_Int16 (a[223:192])
+tmp_dst[191:176] := Saturate_Int32_To_Int16 (a[255:224])
+tmp_dst[207:192] := Saturate_Int32_To_Int16 (b[159:128])
+tmp_dst[223:208] := Saturate_Int32_To_Int16 (b[191:160])
+tmp_dst[239:224] := Saturate_Int32_To_Int16 (b[223:192])
+tmp_dst[255:240] := Saturate_Int32_To_Int16 (b[255:224])
+tmp_dst[271:256] := Saturate_Int32_To_Int16 (a[287:256])
+tmp_dst[287:272] := Saturate_Int32_To_Int16 (a[319:288])
+tmp_dst[303:288] := Saturate_Int32_To_Int16 (a[351:320])
+tmp_dst[319:304] := Saturate_Int32_To_Int16 (a[383:352])
+tmp_dst[335:320] := Saturate_Int32_To_Int16 (b[287:256])
+tmp_dst[351:336] := Saturate_Int32_To_Int16 (b[319:288])
+tmp_dst[367:352] := Saturate_Int32_To_Int16 (b[351:320])
+tmp_dst[383:368] := Saturate_Int32_To_Int16 (b[383:352])
+tmp_dst[399:384] := Saturate_Int32_To_Int16 (a[415:384])
+tmp_dst[415:400] := Saturate_Int32_To_Int16 (a[447:416])
+tmp_dst[431:416] := Saturate_Int32_To_Int16 (a[479:448])
+tmp_dst[447:432] := Saturate_Int32_To_Int16 (a[511:480])
+tmp_dst[463:448] := Saturate_Int32_To_Int16 (b[415:384])
+tmp_dst[479:464] := Saturate_Int32_To_Int16 (b[447:416])
+tmp_dst[495:480] := Saturate_Int32_To_Int16 (b[479:448])
+tmp_dst[511:496] := Saturate_Int32_To_Int16 (b[511:480])
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackssdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
+dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
+dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
+dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
+dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
+dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
+dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
+dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
+dst[143:128] := Saturate_Int32_To_Int16 (a[159:128])
+dst[159:144] := Saturate_Int32_To_Int16 (a[191:160])
+dst[175:160] := Saturate_Int32_To_Int16 (a[223:192])
+dst[191:176] := Saturate_Int32_To_Int16 (a[255:224])
+dst[207:192] := Saturate_Int32_To_Int16 (b[159:128])
+dst[223:208] := Saturate_Int32_To_Int16 (b[191:160])
+dst[239:224] := Saturate_Int32_To_Int16 (b[223:192])
+dst[255:240] := Saturate_Int32_To_Int16 (b[255:224])
+dst[271:256] := Saturate_Int32_To_Int16 (a[287:256])
+dst[287:272] := Saturate_Int32_To_Int16 (a[319:288])
+dst[303:288] := Saturate_Int32_To_Int16 (a[351:320])
+dst[319:304] := Saturate_Int32_To_Int16 (a[383:352])
+dst[335:320] := Saturate_Int32_To_Int16 (b[287:256])
+dst[351:336] := Saturate_Int32_To_Int16 (b[319:288])
+dst[367:352] := Saturate_Int32_To_Int16 (b[351:320])
+dst[383:368] := Saturate_Int32_To_Int16 (b[383:352])
+dst[399:384] := Saturate_Int32_To_Int16 (a[415:384])
+dst[415:400] := Saturate_Int32_To_Int16 (a[447:416])
+dst[431:416] := Saturate_Int32_To_Int16 (a[479:448])
+dst[447:432] := Saturate_Int32_To_Int16 (a[511:480])
+dst[463:448] := Saturate_Int32_To_Int16 (b[415:384])
+dst[479:464] := Saturate_Int32_To_Int16 (b[447:416])
+dst[495:480] := Saturate_Int32_To_Int16 (b[479:448])
+dst[511:496] := Saturate_Int32_To_Int16 (b[511:480])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackssdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
+tmp_dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
+tmp_dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
+tmp_dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
+tmp_dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
+tmp_dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
+tmp_dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
+tmp_dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpackssdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+tmp_dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
+tmp_dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
+tmp_dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
+tmp_dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
+tmp_dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
+tmp_dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
+tmp_dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
+tmp_dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpackssdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
+tmp_dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
+tmp_dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
+tmp_dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
+tmp_dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
+tmp_dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
+tmp_dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
+tmp_dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
+tmp_dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
+tmp_dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
+tmp_dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
+tmp_dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
+tmp_dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
+tmp_dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
+tmp_dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
+tmp_dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
+tmp_dst[135:128] := Saturate_Int16_To_Int8 (a[143:128])
+tmp_dst[143:136] := Saturate_Int16_To_Int8 (a[159:144])
+tmp_dst[151:144] := Saturate_Int16_To_Int8 (a[175:160])
+tmp_dst[159:152] := Saturate_Int16_To_Int8 (a[191:176])
+tmp_dst[167:160] := Saturate_Int16_To_Int8 (a[207:192])
+tmp_dst[175:168] := Saturate_Int16_To_Int8 (a[223:208])
+tmp_dst[183:176] := Saturate_Int16_To_Int8 (a[239:224])
+tmp_dst[191:184] := Saturate_Int16_To_Int8 (a[255:240])
+tmp_dst[199:192] := Saturate_Int16_To_Int8 (b[143:128])
+tmp_dst[207:200] := Saturate_Int16_To_Int8 (b[159:144])
+tmp_dst[215:208] := Saturate_Int16_To_Int8 (b[175:160])
+tmp_dst[223:216] := Saturate_Int16_To_Int8 (b[191:176])
+tmp_dst[231:224] := Saturate_Int16_To_Int8 (b[207:192])
+tmp_dst[239:232] := Saturate_Int16_To_Int8 (b[223:208])
+tmp_dst[247:240] := Saturate_Int16_To_Int8 (b[239:224])
+tmp_dst[255:248] := Saturate_Int16_To_Int8 (b[255:240])
+
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpacksswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+tmp_dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
+tmp_dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
+tmp_dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
+tmp_dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
+tmp_dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
+tmp_dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
+tmp_dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
+tmp_dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
+tmp_dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
+tmp_dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
+tmp_dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
+tmp_dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
+tmp_dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
+tmp_dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
+tmp_dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
+tmp_dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
+tmp_dst[135:128] := Saturate_Int16_To_Int8 (a[143:128])
+tmp_dst[143:136] := Saturate_Int16_To_Int8 (a[159:144])
+tmp_dst[151:144] := Saturate_Int16_To_Int8 (a[175:160])
+tmp_dst[159:152] := Saturate_Int16_To_Int8 (a[191:176])
+tmp_dst[167:160] := Saturate_Int16_To_Int8 (a[207:192])
+tmp_dst[175:168] := Saturate_Int16_To_Int8 (a[223:208])
+tmp_dst[183:176] := Saturate_Int16_To_Int8 (a[239:224])
+tmp_dst[191:184] := Saturate_Int16_To_Int8 (a[255:240])
+tmp_dst[199:192] := Saturate_Int16_To_Int8 (b[143:128])
+tmp_dst[207:200] := Saturate_Int16_To_Int8 (b[159:144])
+tmp_dst[215:208] := Saturate_Int16_To_Int8 (b[175:160])
+tmp_dst[223:216] := Saturate_Int16_To_Int8 (b[191:176])
+tmp_dst[231:224] := Saturate_Int16_To_Int8 (b[207:192])
+tmp_dst[239:232] := Saturate_Int16_To_Int8 (b[223:208])
+tmp_dst[247:240] := Saturate_Int16_To_Int8 (b[239:224])
+tmp_dst[255:248] := Saturate_Int16_To_Int8 (b[255:240])
+
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpacksswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
+tmp_dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
+tmp_dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
+tmp_dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
+tmp_dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
+tmp_dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
+tmp_dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
+tmp_dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
+tmp_dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
+tmp_dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
+tmp_dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
+tmp_dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
+tmp_dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
+tmp_dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
+tmp_dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
+tmp_dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
+tmp_dst[135:128] := Saturate_Int16_To_Int8 (a[143:128])
+tmp_dst[143:136] := Saturate_Int16_To_Int8 (a[159:144])
+tmp_dst[151:144] := Saturate_Int16_To_Int8 (a[175:160])
+tmp_dst[159:152] := Saturate_Int16_To_Int8 (a[191:176])
+tmp_dst[167:160] := Saturate_Int16_To_Int8 (a[207:192])
+tmp_dst[175:168] := Saturate_Int16_To_Int8 (a[223:208])
+tmp_dst[183:176] := Saturate_Int16_To_Int8 (a[239:224])
+tmp_dst[191:184] := Saturate_Int16_To_Int8 (a[255:240])
+tmp_dst[199:192] := Saturate_Int16_To_Int8 (b[143:128])
+tmp_dst[207:200] := Saturate_Int16_To_Int8 (b[159:144])
+tmp_dst[215:208] := Saturate_Int16_To_Int8 (b[175:160])
+tmp_dst[223:216] := Saturate_Int16_To_Int8 (b[191:176])
+tmp_dst[231:224] := Saturate_Int16_To_Int8 (b[207:192])
+tmp_dst[239:232] := Saturate_Int16_To_Int8 (b[223:208])
+tmp_dst[247:240] := Saturate_Int16_To_Int8 (b[239:224])
+tmp_dst[255:248] := Saturate_Int16_To_Int8 (b[255:240])
+tmp_dst[263:256] := Saturate_Int16_To_Int8 (a[271:256])
+tmp_dst[271:264] := Saturate_Int16_To_Int8 (a[287:272])
+tmp_dst[279:272] := Saturate_Int16_To_Int8 (a[303:288])
+tmp_dst[287:280] := Saturate_Int16_To_Int8 (a[319:304])
+tmp_dst[295:288] := Saturate_Int16_To_Int8 (a[335:320])
+tmp_dst[303:296] := Saturate_Int16_To_Int8 (a[351:336])
+tmp_dst[311:304] := Saturate_Int16_To_Int8 (a[367:352])
+tmp_dst[319:312] := Saturate_Int16_To_Int8 (a[383:368])
+tmp_dst[327:320] := Saturate_Int16_To_Int8 (b[271:256])
+tmp_dst[335:328] := Saturate_Int16_To_Int8 (b[287:272])
+tmp_dst[343:336] := Saturate_Int16_To_Int8 (b[303:288])
+tmp_dst[351:344] := Saturate_Int16_To_Int8 (b[319:304])
+tmp_dst[359:352] := Saturate_Int16_To_Int8 (b[335:320])
+tmp_dst[367:360] := Saturate_Int16_To_Int8 (b[351:336])
+tmp_dst[375:368] := Saturate_Int16_To_Int8 (b[367:352])
+tmp_dst[383:376] := Saturate_Int16_To_Int8 (b[383:368])
+tmp_dst[391:384] := Saturate_Int16_To_Int8 (a[399:384])
+tmp_dst[399:392] := Saturate_Int16_To_Int8 (a[415:400])
+tmp_dst[407:400] := Saturate_Int16_To_Int8 (a[431:416])
+tmp_dst[415:408] := Saturate_Int16_To_Int8 (a[447:432])
+tmp_dst[423:416] := Saturate_Int16_To_Int8 (a[463:448])
+tmp_dst[431:424] := Saturate_Int16_To_Int8 (a[479:464])
+tmp_dst[439:432] := Saturate_Int16_To_Int8 (a[495:480])
+tmp_dst[447:440] := Saturate_Int16_To_Int8 (a[511:496])
+tmp_dst[455:448] := Saturate_Int16_To_Int8 (b[399:384])
+tmp_dst[463:456] := Saturate_Int16_To_Int8 (b[415:400])
+tmp_dst[471:464] := Saturate_Int16_To_Int8 (b[431:416])
+tmp_dst[479:472] := Saturate_Int16_To_Int8 (b[447:432])
+tmp_dst[487:480] := Saturate_Int16_To_Int8 (b[463:448])
+tmp_dst[495:488] := Saturate_Int16_To_Int8 (b[479:464])
+tmp_dst[503:496] := Saturate_Int16_To_Int8 (b[495:480])
+tmp_dst[511:504] := Saturate_Int16_To_Int8 (b[511:496])
+
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpacksswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+tmp_dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
+tmp_dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
+tmp_dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
+tmp_dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
+tmp_dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
+tmp_dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
+tmp_dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
+tmp_dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
+tmp_dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
+tmp_dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
+tmp_dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
+tmp_dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
+tmp_dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
+tmp_dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
+tmp_dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
+tmp_dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
+tmp_dst[135:128] := Saturate_Int16_To_Int8 (a[143:128])
+tmp_dst[143:136] := Saturate_Int16_To_Int8 (a[159:144])
+tmp_dst[151:144] := Saturate_Int16_To_Int8 (a[175:160])
+tmp_dst[159:152] := Saturate_Int16_To_Int8 (a[191:176])
+tmp_dst[167:160] := Saturate_Int16_To_Int8 (a[207:192])
+tmp_dst[175:168] := Saturate_Int16_To_Int8 (a[223:208])
+tmp_dst[183:176] := Saturate_Int16_To_Int8 (a[239:224])
+tmp_dst[191:184] := Saturate_Int16_To_Int8 (a[255:240])
+tmp_dst[199:192] := Saturate_Int16_To_Int8 (b[143:128])
+tmp_dst[207:200] := Saturate_Int16_To_Int8 (b[159:144])
+tmp_dst[215:208] := Saturate_Int16_To_Int8 (b[175:160])
+tmp_dst[223:216] := Saturate_Int16_To_Int8 (b[191:176])
+tmp_dst[231:224] := Saturate_Int16_To_Int8 (b[207:192])
+tmp_dst[239:232] := Saturate_Int16_To_Int8 (b[223:208])
+tmp_dst[247:240] := Saturate_Int16_To_Int8 (b[239:224])
+tmp_dst[255:248] := Saturate_Int16_To_Int8 (b[255:240])
+tmp_dst[263:256] := Saturate_Int16_To_Int8 (a[271:256])
+tmp_dst[271:264] := Saturate_Int16_To_Int8 (a[287:272])
+tmp_dst[279:272] := Saturate_Int16_To_Int8 (a[303:288])
+tmp_dst[287:280] := Saturate_Int16_To_Int8 (a[319:304])
+tmp_dst[295:288] := Saturate_Int16_To_Int8 (a[335:320])
+tmp_dst[303:296] := Saturate_Int16_To_Int8 (a[351:336])
+tmp_dst[311:304] := Saturate_Int16_To_Int8 (a[367:352])
+tmp_dst[319:312] := Saturate_Int16_To_Int8 (a[383:368])
+tmp_dst[327:320] := Saturate_Int16_To_Int8 (b[271:256])
+tmp_dst[335:328] := Saturate_Int16_To_Int8 (b[287:272])
+tmp_dst[343:336] := Saturate_Int16_To_Int8 (b[303:288])
+tmp_dst[351:344] := Saturate_Int16_To_Int8 (b[319:304])
+tmp_dst[359:352] := Saturate_Int16_To_Int8 (b[335:320])
+tmp_dst[367:360] := Saturate_Int16_To_Int8 (b[351:336])
+tmp_dst[375:368] := Saturate_Int16_To_Int8 (b[367:352])
+tmp_dst[383:376] := Saturate_Int16_To_Int8 (b[383:368])
+tmp_dst[391:384] := Saturate_Int16_To_Int8 (a[399:384])
+tmp_dst[399:392] := Saturate_Int16_To_Int8 (a[415:400])
+tmp_dst[407:400] := Saturate_Int16_To_Int8 (a[431:416])
+tmp_dst[415:408] := Saturate_Int16_To_Int8 (a[447:432])
+tmp_dst[423:416] := Saturate_Int16_To_Int8 (a[463:448])
+tmp_dst[431:424] := Saturate_Int16_To_Int8 (a[479:464])
+tmp_dst[439:432] := Saturate_Int16_To_Int8 (a[495:480])
+tmp_dst[447:440] := Saturate_Int16_To_Int8 (a[511:496])
+tmp_dst[455:448] := Saturate_Int16_To_Int8 (b[399:384])
+tmp_dst[463:456] := Saturate_Int16_To_Int8 (b[415:400])
+tmp_dst[471:464] := Saturate_Int16_To_Int8 (b[431:416])
+tmp_dst[479:472] := Saturate_Int16_To_Int8 (b[447:432])
+tmp_dst[487:480] := Saturate_Int16_To_Int8 (b[463:448])
+tmp_dst[495:488] := Saturate_Int16_To_Int8 (b[479:464])
+tmp_dst[503:496] := Saturate_Int16_To_Int8 (b[495:480])
+tmp_dst[511:504] := Saturate_Int16_To_Int8 (b[511:496])
+
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpacksswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".
+	</description>
+	<operation>
+dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
+dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
+dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
+dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
+dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
+dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
+dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
+dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
+dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
+dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
+dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
+dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
+dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
+dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
+dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
+dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
+dst[135:128] := Saturate_Int16_To_Int8 (a[143:128])
+dst[143:136] := Saturate_Int16_To_Int8 (a[159:144])
+dst[151:144] := Saturate_Int16_To_Int8 (a[175:160])
+dst[159:152] := Saturate_Int16_To_Int8 (a[191:176])
+dst[167:160] := Saturate_Int16_To_Int8 (a[207:192])
+dst[175:168] := Saturate_Int16_To_Int8 (a[223:208])
+dst[183:176] := Saturate_Int16_To_Int8 (a[239:224])
+dst[191:184] := Saturate_Int16_To_Int8 (a[255:240])
+dst[199:192] := Saturate_Int16_To_Int8 (b[143:128])
+dst[207:200] := Saturate_Int16_To_Int8 (b[159:144])
+dst[215:208] := Saturate_Int16_To_Int8 (b[175:160])
+dst[223:216] := Saturate_Int16_To_Int8 (b[191:176])
+dst[231:224] := Saturate_Int16_To_Int8 (b[207:192])
+dst[239:232] := Saturate_Int16_To_Int8 (b[223:208])
+dst[247:240] := Saturate_Int16_To_Int8 (b[239:224])
+dst[255:248] := Saturate_Int16_To_Int8 (b[255:240])
+dst[263:256] := Saturate_Int16_To_Int8 (a[271:256])
+dst[271:264] := Saturate_Int16_To_Int8 (a[287:272])
+dst[279:272] := Saturate_Int16_To_Int8 (a[303:288])
+dst[287:280] := Saturate_Int16_To_Int8 (a[319:304])
+dst[295:288] := Saturate_Int16_To_Int8 (a[335:320])
+dst[303:296] := Saturate_Int16_To_Int8 (a[351:336])
+dst[311:304] := Saturate_Int16_To_Int8 (a[367:352])
+dst[319:312] := Saturate_Int16_To_Int8 (a[383:368])
+dst[327:320] := Saturate_Int16_To_Int8 (b[271:256])
+dst[335:328] := Saturate_Int16_To_Int8 (b[287:272])
+dst[343:336] := Saturate_Int16_To_Int8 (b[303:288])
+dst[351:344] := Saturate_Int16_To_Int8 (b[319:304])
+dst[359:352] := Saturate_Int16_To_Int8 (b[335:320])
+dst[367:360] := Saturate_Int16_To_Int8 (b[351:336])
+dst[375:368] := Saturate_Int16_To_Int8 (b[367:352])
+dst[383:376] := Saturate_Int16_To_Int8 (b[383:368])
+dst[391:384] := Saturate_Int16_To_Int8 (a[399:384])
+dst[399:392] := Saturate_Int16_To_Int8 (a[415:400])
+dst[407:400] := Saturate_Int16_To_Int8 (a[431:416])
+dst[415:408] := Saturate_Int16_To_Int8 (a[447:432])
+dst[423:416] := Saturate_Int16_To_Int8 (a[463:448])
+dst[431:424] := Saturate_Int16_To_Int8 (a[479:464])
+dst[439:432] := Saturate_Int16_To_Int8 (a[495:480])
+dst[447:440] := Saturate_Int16_To_Int8 (a[511:496])
+dst[455:448] := Saturate_Int16_To_Int8 (b[399:384])
+dst[463:456] := Saturate_Int16_To_Int8 (b[415:400])
+dst[471:464] := Saturate_Int16_To_Int8 (b[431:416])
+dst[479:472] := Saturate_Int16_To_Int8 (b[447:432])
+dst[487:480] := Saturate_Int16_To_Int8 (b[463:448])
+dst[495:488] := Saturate_Int16_To_Int8 (b[479:464])
+dst[503:496] := Saturate_Int16_To_Int8 (b[495:480])
+dst[511:504] := Saturate_Int16_To_Int8 (b[511:496])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpacksswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
+tmp_dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
+tmp_dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
+tmp_dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
+tmp_dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
+tmp_dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
+tmp_dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
+tmp_dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
+tmp_dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
+tmp_dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
+tmp_dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
+tmp_dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
+tmp_dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
+tmp_dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
+tmp_dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
+tmp_dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
+
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpacksswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+tmp_dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
+tmp_dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
+tmp_dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
+tmp_dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
+tmp_dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
+tmp_dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
+tmp_dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
+tmp_dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
+tmp_dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
+tmp_dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
+tmp_dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
+tmp_dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
+tmp_dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
+tmp_dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
+tmp_dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
+tmp_dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
+
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpacksswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
+tmp_dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
+tmp_dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
+tmp_dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
+tmp_dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
+tmp_dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
+tmp_dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
+tmp_dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
+tmp_dst[143:128] := Saturate_Int32_To_UnsignedInt16 (a[159:128])
+tmp_dst[159:144] := Saturate_Int32_To_UnsignedInt16 (a[191:160])
+tmp_dst[175:160] := Saturate_Int32_To_UnsignedInt16 (a[223:192])
+tmp_dst[191:176] := Saturate_Int32_To_UnsignedInt16 (a[255:224])
+tmp_dst[207:192] := Saturate_Int32_To_UnsignedInt16 (b[159:128])
+tmp_dst[223:208] := Saturate_Int32_To_UnsignedInt16 (b[191:160])
+tmp_dst[239:224] := Saturate_Int32_To_UnsignedInt16 (b[223:192])
+tmp_dst[255:240] := Saturate_Int32_To_UnsignedInt16 (b[255:224])
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpackusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
+tmp_dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
+tmp_dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
+tmp_dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
+tmp_dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
+tmp_dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
+tmp_dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
+tmp_dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
+tmp_dst[143:128] := Saturate_Int32_To_UnsignedInt16 (a[159:128])
+tmp_dst[159:144] := Saturate_Int32_To_UnsignedInt16 (a[191:160])
+tmp_dst[175:160] := Saturate_Int32_To_UnsignedInt16 (a[223:192])
+tmp_dst[191:176] := Saturate_Int32_To_UnsignedInt16 (a[255:224])
+tmp_dst[207:192] := Saturate_Int32_To_UnsignedInt16 (b[159:128])
+tmp_dst[223:208] := Saturate_Int32_To_UnsignedInt16 (b[191:160])
+tmp_dst[239:224] := Saturate_Int32_To_UnsignedInt16 (b[223:192])
+tmp_dst[255:240] := Saturate_Int32_To_UnsignedInt16 (b[255:224])
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpackusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
+tmp_dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
+tmp_dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
+tmp_dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
+tmp_dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
+tmp_dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
+tmp_dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
+tmp_dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
+tmp_dst[143:128] := Saturate_Int32_To_UnsignedInt16 (a[159:128])
+tmp_dst[159:144] := Saturate_Int32_To_UnsignedInt16 (a[191:160])
+tmp_dst[175:160] := Saturate_Int32_To_UnsignedInt16 (a[223:192])
+tmp_dst[191:176] := Saturate_Int32_To_UnsignedInt16 (a[255:224])
+tmp_dst[207:192] := Saturate_Int32_To_UnsignedInt16 (b[159:128])
+tmp_dst[223:208] := Saturate_Int32_To_UnsignedInt16 (b[191:160])
+tmp_dst[239:224] := Saturate_Int32_To_UnsignedInt16 (b[223:192])
+tmp_dst[255:240] := Saturate_Int32_To_UnsignedInt16 (b[255:224])
+tmp_dst[271:256] := Saturate_Int32_To_UnsignedInt16 (a[287:256])
+tmp_dst[287:272] := Saturate_Int32_To_UnsignedInt16 (a[319:288])
+tmp_dst[303:288] := Saturate_Int32_To_UnsignedInt16 (a[351:320])
+tmp_dst[319:304] := Saturate_Int32_To_UnsignedInt16 (a[383:352])
+tmp_dst[335:320] := Saturate_Int32_To_UnsignedInt16 (b[287:256])
+tmp_dst[351:336] := Saturate_Int32_To_UnsignedInt16 (b[319:288])
+tmp_dst[367:352] := Saturate_Int32_To_UnsignedInt16 (b[351:320])
+tmp_dst[383:368] := Saturate_Int32_To_UnsignedInt16 (b[383:352])
+tmp_dst[399:384] := Saturate_Int32_To_UnsignedInt16 (a[415:384])
+tmp_dst[415:400] := Saturate_Int32_To_UnsignedInt16 (a[447:416])
+tmp_dst[431:416] := Saturate_Int32_To_UnsignedInt16 (a[479:448])
+tmp_dst[447:432] := Saturate_Int32_To_UnsignedInt16 (a[511:480])
+tmp_dst[463:448] := Saturate_Int32_To_UnsignedInt16 (b[415:384])
+tmp_dst[479:464] := Saturate_Int32_To_UnsignedInt16 (b[447:416])
+tmp_dst[495:480] := Saturate_Int32_To_UnsignedInt16 (b[479:448])
+tmp_dst[511:496] := Saturate_Int32_To_UnsignedInt16 (b[511:480])
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
+tmp_dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
+tmp_dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
+tmp_dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
+tmp_dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
+tmp_dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
+tmp_dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
+tmp_dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
+tmp_dst[143:128] := Saturate_Int32_To_UnsignedInt16 (a[159:128])
+tmp_dst[159:144] := Saturate_Int32_To_UnsignedInt16 (a[191:160])
+tmp_dst[175:160] := Saturate_Int32_To_UnsignedInt16 (a[223:192])
+tmp_dst[191:176] := Saturate_Int32_To_UnsignedInt16 (a[255:224])
+tmp_dst[207:192] := Saturate_Int32_To_UnsignedInt16 (b[159:128])
+tmp_dst[223:208] := Saturate_Int32_To_UnsignedInt16 (b[191:160])
+tmp_dst[239:224] := Saturate_Int32_To_UnsignedInt16 (b[223:192])
+tmp_dst[255:240] := Saturate_Int32_To_UnsignedInt16 (b[255:224])
+tmp_dst[271:256] := Saturate_Int32_To_UnsignedInt16 (a[287:256])
+tmp_dst[287:272] := Saturate_Int32_To_UnsignedInt16 (a[319:288])
+tmp_dst[303:288] := Saturate_Int32_To_UnsignedInt16 (a[351:320])
+tmp_dst[319:304] := Saturate_Int32_To_UnsignedInt16 (a[383:352])
+tmp_dst[335:320] := Saturate_Int32_To_UnsignedInt16 (b[287:256])
+tmp_dst[351:336] := Saturate_Int32_To_UnsignedInt16 (b[319:288])
+tmp_dst[367:352] := Saturate_Int32_To_UnsignedInt16 (b[351:320])
+tmp_dst[383:368] := Saturate_Int32_To_UnsignedInt16 (b[383:352])
+tmp_dst[399:384] := Saturate_Int32_To_UnsignedInt16 (a[415:384])
+tmp_dst[415:400] := Saturate_Int32_To_UnsignedInt16 (a[447:416])
+tmp_dst[431:416] := Saturate_Int32_To_UnsignedInt16 (a[479:448])
+tmp_dst[447:432] := Saturate_Int32_To_UnsignedInt16 (a[511:480])
+tmp_dst[463:448] := Saturate_Int32_To_UnsignedInt16 (b[415:384])
+tmp_dst[479:464] := Saturate_Int32_To_UnsignedInt16 (b[447:416])
+tmp_dst[495:480] := Saturate_Int32_To_UnsignedInt16 (b[479:448])
+tmp_dst[511:496] := Saturate_Int32_To_UnsignedInt16 (b[511:480])
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
+dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
+dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
+dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
+dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
+dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
+dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
+dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
+dst[143:128] := Saturate_Int32_To_UnsignedInt16 (a[159:128])
+dst[159:144] := Saturate_Int32_To_UnsignedInt16 (a[191:160])
+dst[175:160] := Saturate_Int32_To_UnsignedInt16 (a[223:192])
+dst[191:176] := Saturate_Int32_To_UnsignedInt16 (a[255:224])
+dst[207:192] := Saturate_Int32_To_UnsignedInt16 (b[159:128])
+dst[223:208] := Saturate_Int32_To_UnsignedInt16 (b[191:160])
+dst[239:224] := Saturate_Int32_To_UnsignedInt16 (b[223:192])
+dst[255:240] := Saturate_Int32_To_UnsignedInt16 (b[255:224])
+dst[271:256] := Saturate_Int32_To_UnsignedInt16 (a[287:256])
+dst[287:272] := Saturate_Int32_To_UnsignedInt16 (a[319:288])
+dst[303:288] := Saturate_Int32_To_UnsignedInt16 (a[351:320])
+dst[319:304] := Saturate_Int32_To_UnsignedInt16 (a[383:352])
+dst[335:320] := Saturate_Int32_To_UnsignedInt16 (b[287:256])
+dst[351:336] := Saturate_Int32_To_UnsignedInt16 (b[319:288])
+dst[367:352] := Saturate_Int32_To_UnsignedInt16 (b[351:320])
+dst[383:368] := Saturate_Int32_To_UnsignedInt16 (b[383:352])
+dst[399:384] := Saturate_Int32_To_UnsignedInt16 (a[415:384])
+dst[415:400] := Saturate_Int32_To_UnsignedInt16 (a[447:416])
+dst[431:416] := Saturate_Int32_To_UnsignedInt16 (a[479:448])
+dst[447:432] := Saturate_Int32_To_UnsignedInt16 (a[511:480])
+dst[463:448] := Saturate_Int32_To_UnsignedInt16 (b[415:384])
+dst[479:464] := Saturate_Int32_To_UnsignedInt16 (b[447:416])
+dst[495:480] := Saturate_Int32_To_UnsignedInt16 (b[479:448])
+dst[511:496] := Saturate_Int32_To_UnsignedInt16 (b[511:480])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
+tmp_dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
+tmp_dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
+tmp_dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
+tmp_dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
+tmp_dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
+tmp_dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
+tmp_dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpackusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
+tmp_dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
+tmp_dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
+tmp_dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
+tmp_dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
+tmp_dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
+tmp_dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
+tmp_dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpackusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
+tmp_dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
+tmp_dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
+tmp_dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
+tmp_dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
+tmp_dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
+tmp_dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
+tmp_dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
+tmp_dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
+tmp_dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
+tmp_dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
+tmp_dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
+tmp_dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
+tmp_dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
+tmp_dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
+tmp_dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
+tmp_dst[135:128] := Saturate_Int16_To_UnsignedInt8 (a[143:128])
+tmp_dst[143:136] := Saturate_Int16_To_UnsignedInt8 (a[159:144])
+tmp_dst[151:144] := Saturate_Int16_To_UnsignedInt8 (a[175:160])
+tmp_dst[159:152] := Saturate_Int16_To_UnsignedInt8 (a[191:176])
+tmp_dst[167:160] := Saturate_Int16_To_UnsignedInt8 (a[207:192])
+tmp_dst[175:168] := Saturate_Int16_To_UnsignedInt8 (a[223:208])
+tmp_dst[183:176] := Saturate_Int16_To_UnsignedInt8 (a[239:224])
+tmp_dst[191:184] := Saturate_Int16_To_UnsignedInt8 (a[255:240])
+tmp_dst[199:192] := Saturate_Int16_To_UnsignedInt8 (b[143:128])
+tmp_dst[207:200] := Saturate_Int16_To_UnsignedInt8 (b[159:144])
+tmp_dst[215:208] := Saturate_Int16_To_UnsignedInt8 (b[175:160])
+tmp_dst[223:216] := Saturate_Int16_To_UnsignedInt8 (b[191:176])
+tmp_dst[231:224] := Saturate_Int16_To_UnsignedInt8 (b[207:192])
+tmp_dst[239:232] := Saturate_Int16_To_UnsignedInt8 (b[223:208])
+tmp_dst[247:240] := Saturate_Int16_To_UnsignedInt8 (b[239:224])
+tmp_dst[255:248] := Saturate_Int16_To_UnsignedInt8 (b[255:240])
+
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpackuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
+tmp_dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
+tmp_dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
+tmp_dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
+tmp_dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
+tmp_dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
+tmp_dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
+tmp_dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
+tmp_dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
+tmp_dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
+tmp_dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
+tmp_dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
+tmp_dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
+tmp_dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
+tmp_dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
+tmp_dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
+tmp_dst[135:128] := Saturate_Int16_To_UnsignedInt8 (a[143:128])
+tmp_dst[143:136] := Saturate_Int16_To_UnsignedInt8 (a[159:144])
+tmp_dst[151:144] := Saturate_Int16_To_UnsignedInt8 (a[175:160])
+tmp_dst[159:152] := Saturate_Int16_To_UnsignedInt8 (a[191:176])
+tmp_dst[167:160] := Saturate_Int16_To_UnsignedInt8 (a[207:192])
+tmp_dst[175:168] := Saturate_Int16_To_UnsignedInt8 (a[223:208])
+tmp_dst[183:176] := Saturate_Int16_To_UnsignedInt8 (a[239:224])
+tmp_dst[191:184] := Saturate_Int16_To_UnsignedInt8 (a[255:240])
+tmp_dst[199:192] := Saturate_Int16_To_UnsignedInt8 (b[143:128])
+tmp_dst[207:200] := Saturate_Int16_To_UnsignedInt8 (b[159:144])
+tmp_dst[215:208] := Saturate_Int16_To_UnsignedInt8 (b[175:160])
+tmp_dst[223:216] := Saturate_Int16_To_UnsignedInt8 (b[191:176])
+tmp_dst[231:224] := Saturate_Int16_To_UnsignedInt8 (b[207:192])
+tmp_dst[239:232] := Saturate_Int16_To_UnsignedInt8 (b[223:208])
+tmp_dst[247:240] := Saturate_Int16_To_UnsignedInt8 (b[239:224])
+tmp_dst[255:248] := Saturate_Int16_To_UnsignedInt8 (b[255:240])
+
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpackuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
+tmp_dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
+tmp_dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
+tmp_dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
+tmp_dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
+tmp_dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
+tmp_dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
+tmp_dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
+tmp_dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
+tmp_dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
+tmp_dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
+tmp_dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
+tmp_dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
+tmp_dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
+tmp_dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
+tmp_dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
+tmp_dst[135:128] := Saturate_Int16_To_UnsignedInt8 (a[143:128])
+tmp_dst[143:136] := Saturate_Int16_To_UnsignedInt8 (a[159:144])
+tmp_dst[151:144] := Saturate_Int16_To_UnsignedInt8 (a[175:160])
+tmp_dst[159:152] := Saturate_Int16_To_UnsignedInt8 (a[191:176])
+tmp_dst[167:160] := Saturate_Int16_To_UnsignedInt8 (a[207:192])
+tmp_dst[175:168] := Saturate_Int16_To_UnsignedInt8 (a[223:208])
+tmp_dst[183:176] := Saturate_Int16_To_UnsignedInt8 (a[239:224])
+tmp_dst[191:184] := Saturate_Int16_To_UnsignedInt8 (a[255:240])
+tmp_dst[199:192] := Saturate_Int16_To_UnsignedInt8 (b[143:128])
+tmp_dst[207:200] := Saturate_Int16_To_UnsignedInt8 (b[159:144])
+tmp_dst[215:208] := Saturate_Int16_To_UnsignedInt8 (b[175:160])
+tmp_dst[223:216] := Saturate_Int16_To_UnsignedInt8 (b[191:176])
+tmp_dst[231:224] := Saturate_Int16_To_UnsignedInt8 (b[207:192])
+tmp_dst[239:232] := Saturate_Int16_To_UnsignedInt8 (b[223:208])
+tmp_dst[247:240] := Saturate_Int16_To_UnsignedInt8 (b[239:224])
+tmp_dst[255:248] := Saturate_Int16_To_UnsignedInt8 (b[255:240])
+tmp_dst[263:256] := Saturate_Int16_To_UnsignedInt8 (a[271:256])
+tmp_dst[271:264] := Saturate_Int16_To_UnsignedInt8 (a[287:272])
+tmp_dst[279:272] := Saturate_Int16_To_UnsignedInt8 (a[303:288])
+tmp_dst[287:280] := Saturate_Int16_To_UnsignedInt8 (a[319:304])
+tmp_dst[295:288] := Saturate_Int16_To_UnsignedInt8 (a[335:320])
+tmp_dst[303:296] := Saturate_Int16_To_UnsignedInt8 (a[351:336])
+tmp_dst[311:304] := Saturate_Int16_To_UnsignedInt8 (a[367:352])
+tmp_dst[319:312] := Saturate_Int16_To_UnsignedInt8 (a[383:368])
+tmp_dst[327:320] := Saturate_Int16_To_UnsignedInt8 (b[271:256])
+tmp_dst[335:328] := Saturate_Int16_To_UnsignedInt8 (b[287:272])
+tmp_dst[343:336] := Saturate_Int16_To_UnsignedInt8 (b[303:288])
+tmp_dst[351:344] := Saturate_Int16_To_UnsignedInt8 (b[319:304])
+tmp_dst[359:352] := Saturate_Int16_To_UnsignedInt8 (b[335:320])
+tmp_dst[367:360] := Saturate_Int16_To_UnsignedInt8 (b[351:336])
+tmp_dst[375:368] := Saturate_Int16_To_UnsignedInt8 (b[367:352])
+tmp_dst[383:376] := Saturate_Int16_To_UnsignedInt8 (b[383:368])
+tmp_dst[391:384] := Saturate_Int16_To_UnsignedInt8 (a[399:384])
+tmp_dst[399:392] := Saturate_Int16_To_UnsignedInt8 (a[415:400])
+tmp_dst[407:400] := Saturate_Int16_To_UnsignedInt8 (a[431:416])
+tmp_dst[415:408] := Saturate_Int16_To_UnsignedInt8 (a[447:432])
+tmp_dst[423:416] := Saturate_Int16_To_UnsignedInt8 (a[463:448])
+tmp_dst[431:424] := Saturate_Int16_To_UnsignedInt8 (a[479:464])
+tmp_dst[439:432] := Saturate_Int16_To_UnsignedInt8 (a[495:480])
+tmp_dst[447:440] := Saturate_Int16_To_UnsignedInt8 (a[511:496])
+tmp_dst[455:448] := Saturate_Int16_To_UnsignedInt8 (b[399:384])
+tmp_dst[463:456] := Saturate_Int16_To_UnsignedInt8 (b[415:400])
+tmp_dst[471:464] := Saturate_Int16_To_UnsignedInt8 (b[431:416])
+tmp_dst[479:472] := Saturate_Int16_To_UnsignedInt8 (b[447:432])
+tmp_dst[487:480] := Saturate_Int16_To_UnsignedInt8 (b[463:448])
+tmp_dst[495:488] := Saturate_Int16_To_UnsignedInt8 (b[479:464])
+tmp_dst[503:496] := Saturate_Int16_To_UnsignedInt8 (b[495:480])
+tmp_dst[511:504] := Saturate_Int16_To_UnsignedInt8 (b[511:496])
+
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
+tmp_dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
+tmp_dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
+tmp_dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
+tmp_dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
+tmp_dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
+tmp_dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
+tmp_dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
+tmp_dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
+tmp_dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
+tmp_dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
+tmp_dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
+tmp_dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
+tmp_dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
+tmp_dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
+tmp_dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
+tmp_dst[135:128] := Saturate_Int16_To_UnsignedInt8 (a[143:128])
+tmp_dst[143:136] := Saturate_Int16_To_UnsignedInt8 (a[159:144])
+tmp_dst[151:144] := Saturate_Int16_To_UnsignedInt8 (a[175:160])
+tmp_dst[159:152] := Saturate_Int16_To_UnsignedInt8 (a[191:176])
+tmp_dst[167:160] := Saturate_Int16_To_UnsignedInt8 (a[207:192])
+tmp_dst[175:168] := Saturate_Int16_To_UnsignedInt8 (a[223:208])
+tmp_dst[183:176] := Saturate_Int16_To_UnsignedInt8 (a[239:224])
+tmp_dst[191:184] := Saturate_Int16_To_UnsignedInt8 (a[255:240])
+tmp_dst[199:192] := Saturate_Int16_To_UnsignedInt8 (b[143:128])
+tmp_dst[207:200] := Saturate_Int16_To_UnsignedInt8 (b[159:144])
+tmp_dst[215:208] := Saturate_Int16_To_UnsignedInt8 (b[175:160])
+tmp_dst[223:216] := Saturate_Int16_To_UnsignedInt8 (b[191:176])
+tmp_dst[231:224] := Saturate_Int16_To_UnsignedInt8 (b[207:192])
+tmp_dst[239:232] := Saturate_Int16_To_UnsignedInt8 (b[223:208])
+tmp_dst[247:240] := Saturate_Int16_To_UnsignedInt8 (b[239:224])
+tmp_dst[255:248] := Saturate_Int16_To_UnsignedInt8 (b[255:240])
+tmp_dst[263:256] := Saturate_Int16_To_UnsignedInt8 (a[271:256])
+tmp_dst[271:264] := Saturate_Int16_To_UnsignedInt8 (a[287:272])
+tmp_dst[279:272] := Saturate_Int16_To_UnsignedInt8 (a[303:288])
+tmp_dst[287:280] := Saturate_Int16_To_UnsignedInt8 (a[319:304])
+tmp_dst[295:288] := Saturate_Int16_To_UnsignedInt8 (a[335:320])
+tmp_dst[303:296] := Saturate_Int16_To_UnsignedInt8 (a[351:336])
+tmp_dst[311:304] := Saturate_Int16_To_UnsignedInt8 (a[367:352])
+tmp_dst[319:312] := Saturate_Int16_To_UnsignedInt8 (a[383:368])
+tmp_dst[327:320] := Saturate_Int16_To_UnsignedInt8 (b[271:256])
+tmp_dst[335:328] := Saturate_Int16_To_UnsignedInt8 (b[287:272])
+tmp_dst[343:336] := Saturate_Int16_To_UnsignedInt8 (b[303:288])
+tmp_dst[351:344] := Saturate_Int16_To_UnsignedInt8 (b[319:304])
+tmp_dst[359:352] := Saturate_Int16_To_UnsignedInt8 (b[335:320])
+tmp_dst[367:360] := Saturate_Int16_To_UnsignedInt8 (b[351:336])
+tmp_dst[375:368] := Saturate_Int16_To_UnsignedInt8 (b[367:352])
+tmp_dst[383:376] := Saturate_Int16_To_UnsignedInt8 (b[383:368])
+tmp_dst[391:384] := Saturate_Int16_To_UnsignedInt8 (a[399:384])
+tmp_dst[399:392] := Saturate_Int16_To_UnsignedInt8 (a[415:400])
+tmp_dst[407:400] := Saturate_Int16_To_UnsignedInt8 (a[431:416])
+tmp_dst[415:408] := Saturate_Int16_To_UnsignedInt8 (a[447:432])
+tmp_dst[423:416] := Saturate_Int16_To_UnsignedInt8 (a[463:448])
+tmp_dst[431:424] := Saturate_Int16_To_UnsignedInt8 (a[479:464])
+tmp_dst[439:432] := Saturate_Int16_To_UnsignedInt8 (a[495:480])
+tmp_dst[447:440] := Saturate_Int16_To_UnsignedInt8 (a[511:496])
+tmp_dst[455:448] := Saturate_Int16_To_UnsignedInt8 (b[399:384])
+tmp_dst[463:456] := Saturate_Int16_To_UnsignedInt8 (b[415:400])
+tmp_dst[471:464] := Saturate_Int16_To_UnsignedInt8 (b[431:416])
+tmp_dst[479:472] := Saturate_Int16_To_UnsignedInt8 (b[447:432])
+tmp_dst[487:480] := Saturate_Int16_To_UnsignedInt8 (b[463:448])
+tmp_dst[495:488] := Saturate_Int16_To_UnsignedInt8 (b[479:464])
+tmp_dst[503:496] := Saturate_Int16_To_UnsignedInt8 (b[495:480])
+tmp_dst[511:504] := Saturate_Int16_To_UnsignedInt8 (b[511:496])
+
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
+dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
+dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
+dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
+dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
+dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
+dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
+dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
+dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
+dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
+dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
+dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
+dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
+dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
+dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
+dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
+dst[135:128] := Saturate_Int16_To_UnsignedInt8 (a[143:128])
+dst[143:136] := Saturate_Int16_To_UnsignedInt8 (a[159:144])
+dst[151:144] := Saturate_Int16_To_UnsignedInt8 (a[175:160])
+dst[159:152] := Saturate_Int16_To_UnsignedInt8 (a[191:176])
+dst[167:160] := Saturate_Int16_To_UnsignedInt8 (a[207:192])
+dst[175:168] := Saturate_Int16_To_UnsignedInt8 (a[223:208])
+dst[183:176] := Saturate_Int16_To_UnsignedInt8 (a[239:224])
+dst[191:184] := Saturate_Int16_To_UnsignedInt8 (a[255:240])
+dst[199:192] := Saturate_Int16_To_UnsignedInt8 (b[143:128])
+dst[207:200] := Saturate_Int16_To_UnsignedInt8 (b[159:144])
+dst[215:208] := Saturate_Int16_To_UnsignedInt8 (b[175:160])
+dst[223:216] := Saturate_Int16_To_UnsignedInt8 (b[191:176])
+dst[231:224] := Saturate_Int16_To_UnsignedInt8 (b[207:192])
+dst[239:232] := Saturate_Int16_To_UnsignedInt8 (b[223:208])
+dst[247:240] := Saturate_Int16_To_UnsignedInt8 (b[239:224])
+dst[255:248] := Saturate_Int16_To_UnsignedInt8 (b[255:240])
+dst[263:256] := Saturate_Int16_To_UnsignedInt8 (a[271:256])
+dst[271:264] := Saturate_Int16_To_UnsignedInt8 (a[287:272])
+dst[279:272] := Saturate_Int16_To_UnsignedInt8 (a[303:288])
+dst[287:280] := Saturate_Int16_To_UnsignedInt8 (a[319:304])
+dst[295:288] := Saturate_Int16_To_UnsignedInt8 (a[335:320])
+dst[303:296] := Saturate_Int16_To_UnsignedInt8 (a[351:336])
+dst[311:304] := Saturate_Int16_To_UnsignedInt8 (a[367:352])
+dst[319:312] := Saturate_Int16_To_UnsignedInt8 (a[383:368])
+dst[327:320] := Saturate_Int16_To_UnsignedInt8 (b[271:256])
+dst[335:328] := Saturate_Int16_To_UnsignedInt8 (b[287:272])
+dst[343:336] := Saturate_Int16_To_UnsignedInt8 (b[303:288])
+dst[351:344] := Saturate_Int16_To_UnsignedInt8 (b[319:304])
+dst[359:352] := Saturate_Int16_To_UnsignedInt8 (b[335:320])
+dst[367:360] := Saturate_Int16_To_UnsignedInt8 (b[351:336])
+dst[375:368] := Saturate_Int16_To_UnsignedInt8 (b[367:352])
+dst[383:376] := Saturate_Int16_To_UnsignedInt8 (b[383:368])
+dst[391:384] := Saturate_Int16_To_UnsignedInt8 (a[399:384])
+dst[399:392] := Saturate_Int16_To_UnsignedInt8 (a[415:400])
+dst[407:400] := Saturate_Int16_To_UnsignedInt8 (a[431:416])
+dst[415:408] := Saturate_Int16_To_UnsignedInt8 (a[447:432])
+dst[423:416] := Saturate_Int16_To_UnsignedInt8 (a[463:448])
+dst[431:424] := Saturate_Int16_To_UnsignedInt8 (a[479:464])
+dst[439:432] := Saturate_Int16_To_UnsignedInt8 (a[495:480])
+dst[447:440] := Saturate_Int16_To_UnsignedInt8 (a[511:496])
+dst[455:448] := Saturate_Int16_To_UnsignedInt8 (b[399:384])
+dst[463:456] := Saturate_Int16_To_UnsignedInt8 (b[415:400])
+dst[471:464] := Saturate_Int16_To_UnsignedInt8 (b[431:416])
+dst[479:472] := Saturate_Int16_To_UnsignedInt8 (b[447:432])
+dst[487:480] := Saturate_Int16_To_UnsignedInt8 (b[463:448])
+dst[495:488] := Saturate_Int16_To_UnsignedInt8 (b[479:464])
+dst[503:496] := Saturate_Int16_To_UnsignedInt8 (b[495:480])
+dst[511:504] := Saturate_Int16_To_UnsignedInt8 (b[511:496])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpackuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
+tmp_dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
+tmp_dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
+tmp_dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
+tmp_dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
+tmp_dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
+tmp_dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
+tmp_dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
+tmp_dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
+tmp_dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
+tmp_dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
+tmp_dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
+tmp_dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
+tmp_dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
+tmp_dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
+tmp_dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
+
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpackuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
+tmp_dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
+tmp_dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
+tmp_dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
+tmp_dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
+tmp_dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
+tmp_dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
+tmp_dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
+tmp_dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
+tmp_dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
+tmp_dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
+tmp_dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
+tmp_dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
+tmp_dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
+tmp_dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
+tmp_dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
+
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpackuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] + b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] + b[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := a[i+7:i] + b[i+7:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] + b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] + b[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] + b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] + b[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] :=0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] + b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] + b[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpaddw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := a[i+15:i] + b[i+15:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] + b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] + b[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpaddw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] + b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] + b[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpaddw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*128
+	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128) OR b[i+127:i]) &gt;&gt; (count[7:0]*8)
+	tmp_dst[i+127:i] := tmp[127:0]
+ENDFOR
+
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpalignr"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*128
+	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128) OR b[i+127:i]) &gt;&gt; (count[7:0]*8)
+	tmp_dst[i+127:i] := tmp[127:0]
+ENDFOR
+
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpalignr"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*128
+	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128) OR b[i+127:i]) &gt;&gt; (count[7:0]*8)
+	dst[i+127:i] := tmp[127:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpalignr"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*128
+	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128) OR b[i+127:i]) &gt;&gt; (count[7:0]*8)
+	tmp_dst[i+127:i] := tmp[127:0]
+ENDFOR
+
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpalignr"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*128
+	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128) OR b[i+127:i]) &gt;&gt; (count[7:0]*8)
+	tmp_dst[i+127:i] := tmp[127:0]
+ENDFOR
+
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpalignr"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+tmp_dst[255:0] := ((a[127:0] &lt;&lt; 128) OR b[127:0]) &gt;&gt; (count[7:0]*8)
+
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpalignr"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="count" type="const int"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[255:0] := ((a[127:0] &lt;&lt; 128) OR b[127:0]) &gt;&gt; (count[7:0]*8)
+
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpalignr"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] BITWISE AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpandd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] BITWISE AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpandd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpandd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpandd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpandnd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpandnd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpandnd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpandnd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpandnq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpandnq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpandnq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpandnq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpandq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpandq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpandq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpandq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpavgb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpavgb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpavgb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpavgb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpavgb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpavgb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpavgb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpavgw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpavgw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpavgw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpavgw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpavgw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpavgw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpavgw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_blend_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := b[i+7:i]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpblendmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_blend_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := b[i+7:i]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpblendmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_blend_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := b[i+7:i]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpblendmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_blend_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpblendmd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_blend_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpblendmd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_blend_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpblendmq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_blend_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpblendmq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_blend_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := b[i+15:i]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpblendmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_blend_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := b[i+15:i]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpblendmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_blend_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := b[i+15:i]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpblendmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="char"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="char"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpbroadcastb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpbroadcastb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="char"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpbroadcastb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpbroadcastb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="char"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpbroadcastb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="char"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="char"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="int"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="int"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="int"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="int"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_broadcastmb_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ZeroExtend(k[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastmb2q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_broadcastmb_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ZeroExtend(k[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastmb2q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_broadcastmw_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<description>Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ZeroExtend(k[15:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastmw2d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_broadcastmw_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<description>Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ZeroExtend(k[15:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastmw2d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__int64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__int64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__int64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__int64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="short"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="short"/>
+	<description>Broadcast 16-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpbroadcastw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpbroadcastw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpbroadcastw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="short"/>
+	<description>Broadcast 16-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpbroadcastw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpbroadcastw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="short"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpbroadcastw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="short"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="short"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpbroadcastw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmp_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpeq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpge_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpgt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmple_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmplt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpneq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmp_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpeq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpge_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpgt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmple_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmplt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpneq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmp_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpeq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpge_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpgt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmple_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmplt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpneq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmp_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpeq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpge_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpgt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmple_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmplt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpneq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmp_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpeq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpge_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpgt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmple_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmplt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpneq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmp_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpeq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpge_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpgt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmple_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmplt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpneq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmp_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpeq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpge_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpgt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmple_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpneq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmp_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpeq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpge_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpgt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmple_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpneq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpeq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpge_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpgt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmple_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpneq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpeq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpge_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpgt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	parameter varname='b' type='__m128i'/&gt;
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmple_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	parameter varname='b' type='__m128i'/&gt;
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	parameter varname='b' type='__m128i'/&gt;
+	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpneq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	parameter varname='b' type='__m128i'/&gt;
+	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmp_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpeq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpge_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpgt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmple_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmplt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpneq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmp_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpeq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpge_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpgt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmple_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmplt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpneq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpeq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpge_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpgt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmple_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmplt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpneq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpeq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpge_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpgt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmple_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmplt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpneq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmp_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpeq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpge_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpgt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmple_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmplt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpneq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmp_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpeq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpge_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpgt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmple_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmplt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpneq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmp_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpeq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpge_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpgt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmple_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmplt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpneq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmp_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpeq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpge_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpgt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmple_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmplt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpneq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmp_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpeq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpge_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpgt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmple_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmplt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpneq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmp_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpeq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpge_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpgt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmple_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmplt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpneq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmp_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpeq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpge_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpgt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmple_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmplt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpneq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmp_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpeq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpge_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpgt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmple_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmplt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpneq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpeq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpge_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpgt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmple_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmplt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpneq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpeq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpge_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpgt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	parameter varname='b' type='__m128i'/&gt;
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmple_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	parameter varname='b' type='__m128i'/&gt;
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmplt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	parameter varname='b' type='__m128i'/&gt;
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpneq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	parameter varname='b' type='__m128i'/&gt;
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmp_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpeq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpge_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpgt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmple_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmplt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpneq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmp_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpeq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpge_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpgt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmple_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmplt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpneq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpeq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpge_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpgt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmple_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmplt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpneq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpeq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpge_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpgt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmple_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmplt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpneq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpcmpuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmp_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpeq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpge_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpgt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmple_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmplt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpneq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmp_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpeq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpge_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpgt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmple_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+			k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmplt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpneq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmp_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpeq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpge_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpgt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmple_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmplt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpneq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmp_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpeq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpge_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpgt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmple_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmplt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpneq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpeq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpge_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpgt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmple_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmplt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpneq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpeq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpge_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpgt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;== b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmple_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmplt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpneq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmp_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpeq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpge_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpgt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmple_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmplt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpneq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmp_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpeq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpge_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpgt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmple_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmplt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpneq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmp_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpeq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpge_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpgt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmple_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmplt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpneq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmp_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpeq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpge_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpgt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmple_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmplt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpneq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpeq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpge_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpgt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmple_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmplt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpneq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Compare packed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE (imm8[7:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpeq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpge_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpgt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;== b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmple_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmplt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpneq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpcmpw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_compress_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := src[255:m]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpcompressd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_compressstoreu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 32
+m := base_addr
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vpcompressd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_compress_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpcompressd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_compress_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := src[127:m]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpcompressd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_compressstoreu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 32
+m := base_addr
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vpcompressd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_compress_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpcompressd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_compress_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := src[255:m]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpcompressq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_compressstoreu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 64
+m := base_addr
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vpcompressq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_compress_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpcompressq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_compress_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := src[127:m]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpcompressq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_compressstoreu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 64
+m := base_addr
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="vpcompressq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_compress_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpcompressq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	FOR k := 0 to j-1
+		m := k*32
+		dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+	ENDFOR
+	dst[i+31:i+j] := 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpconflictd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>	
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[i]
+		FOR l := 0 to j-1
+			m := l*32
+			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+		ENDFOR
+		dst[i+31:i+j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpconflictd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[i]
+		FOR l := 0 to j-1
+			m := l*32
+			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+		ENDFOR
+		dst[i+31:i+j] := 0
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpconflictd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	FOR k := 0 to j-1
+		m := k*32
+		dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+	ENDFOR
+	dst[i+31:i+j] := 0
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpconflictd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>	
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[i]
+		FOR l := 0 to j-1
+			m := l*32
+			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+		ENDFOR
+		dst[i+31:i+j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpconflictd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[i]
+		FOR l := 0 to j-1
+			m := l*32
+			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+		ENDFOR
+		dst[i+31:i+j] := 0
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpconflictd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	FOR k := 0 to j-1
+		m := k*64
+		dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+	ENDFOR
+	dst[i+63:i+j] := 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpconflictq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*64
+			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+		ENDFOR
+		dst[i+63:i+j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpconflictq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*64
+			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+		ENDFOR
+		dst[i+63:i+j] := 0
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpconflictq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	FOR k := 0 to j-1
+		m := k*64
+		dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+	ENDFOR
+	dst[i+63:i+j] := 0
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpconflictq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*64
+			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+		ENDFOR
+		dst[i+63:i+j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpconflictq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*64
+			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+		ENDFOR
+		dst[i+63:i+j] := 0
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpconflictq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutexvar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutexvar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutexvar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask2_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := idx[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermt2d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := (idx[i+3]) ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2d"/>
+	<instruction name="vpermt2d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2d"/>
+	<instruction name="vpermt2d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask2_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := idx[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermt2d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	IF k[j]
+		dst[i+31:i] := (idx[i+2]) ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2d"/>
+	<instruction name="vpermt2d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+2:i]*32
+	dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2d"/>
+	<instruction name="vpermt2d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask2_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := idx[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermt2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := (idx[i+2]) ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2pd"/>
+	<instruction name="vpermt2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2pd"/>
+	<instruction name="vpermt2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask2_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set)</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := idx[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermt2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	IF k[j]
+		dst[i+63:i] := (idx[i+1]) ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2pd"/>
+	<instruction name="vpermt2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2pd"/>
+	<instruction name="vpermt2pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask2_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := idx[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermt2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := (idx[i+3]) ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2ps"/>
+	<instruction name="vpermt2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2ps"/>
+	<instruction name="vpermt2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask2_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := idx[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermt2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	IF k[j]
+		dst[i+31:i] := (idx[i+2]) ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2ps"/>
+	<instruction name="vpermt2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2ps"/>
+	<instruction name="vpermt2ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask2_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := idx[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermt2q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := (idx[i+2]) ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2q"/>
+	<instruction name="vpermt2q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2q"/>
+	<instruction name="vpermt2q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask2_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := idx[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermt2q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	IF k[j]
+		dst[i+63:i] := (idx[i+1]) ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2q"/>
+	<instruction name="vpermt2q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2q"/>
+	<instruction name="vpermt2q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask2_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+3:i]
+		dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := idx[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+3:i]
+		dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermt2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+3:i]
+		dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2w"/>
+	<instruction name="vpermt2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	off := 16*idx[i+3:i]
+	dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2w"/>
+	<instruction name="vpermt2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask2_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+4:i]
+		dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := idx[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermi2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+4:i]
+		dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermt2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+4:i]
+		dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermi2w"/>
+	<instruction name="vpermt2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	off := 16*idx[i+4:i]
+	dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermi2w"/>
+	<instruction name="vpermt2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask2_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+2:i]
+		dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := idx[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+2:i]
+		dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermt2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+2:i]
+		dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2w"/>
+	<instruction name="vpermt2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	off := 16*idx[i+2:i]
+	dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2w"/>
+	<instruction name="vpermt2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]
+IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]
+IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]
+IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]
+IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]
+IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]
+IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]
+IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermilpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+IF (b[1] == 0) tmp_dst[63:0] := a[63:0]
+IF (b[1] == 1) tmp_dst[63:0] := a[127:64]
+IF (b[65] == 0) tmp_dst[127:64] := a[63:0]
+IF (b[65] == 1) tmp_dst[127:64] := a[127:64]
+IF (b[129] == 0) tmp_dst[191:128] := a[191:128]
+IF (b[129] == 1) tmp_dst[191:128] := a[255:192]
+IF (b[193] == 0) tmp_dst[255:192] := a[191:128]
+IF (b[193] == 1) tmp_dst[255:192] := a[255:192]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermilpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]
+IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]
+IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]
+IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]
+IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]
+IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]
+IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]
+IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermilpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+IF (b[1] == 0) tmp_dst[63:0] := a[63:0]
+IF (b[1] == 1) tmp_dst[63:0] := a[127:64]
+IF (b[65] == 0) tmp_dst[127:64] := a[63:0]
+IF (b[65] == 1) tmp_dst[127:64] := a[127:64]
+IF (b[129] == 0) tmp_dst[191:128] := a[191:128]
+IF (b[129] == 1) tmp_dst[191:128] := a[255:192]
+IF (b[193] == 0) tmp_dst[255:192] := a[191:128]
+IF (b[193] == 1) tmp_dst[255:192] := a[255:192]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermilpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]
+IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]
+IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]
+IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermilpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+IF (b[1] == 0) tmp_dst[63:0] := a[63:0]
+IF (b[1] == 1) tmp_dst[63:0] := a[127:64]
+IF (b[65] == 0) tmp_dst[127:64] := a[63:0]
+IF (b[65] == 1) tmp_dst[127:64] := a[127:64]
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermilpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]
+IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]
+IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]
+IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermilpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+IF (b[1] == 0) tmp_dst[63:0] := a[63:0]
+IF (b[1] == 1) tmp_dst[63:0] := a[127:64]
+IF (b[65] == 0) tmp_dst[127:64] := a[63:0]
+IF (b[65] == 1) tmp_dst[127:64] := a[127:64]
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermilpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermilps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
+tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
+tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
+tmp_dst[159:128] := SELECT4(a[255:128], b[129:128])
+tmp_dst[191:160] := SELECT4(a[255:128], b[161:160])
+tmp_dst[223:192] := SELECT4(a[255:128], b[193:192])
+tmp_dst[255:224] := SELECT4(a[255:128], b[225:224])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermilps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermilps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
+tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
+tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
+tmp_dst[159:128] := SELECT4(a[255:128], b[129:128])
+tmp_dst[191:160] := SELECT4(a[255:128], b[161:160])
+tmp_dst[223:192] := SELECT4(a[255:128], b[193:192])
+tmp_dst[255:224] := SELECT4(a[255:128], b[225:224])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermilps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermilps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
+tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
+tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermilps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermilps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
+tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
+tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermilps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_permutex_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_permutexvar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	id := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_permutex_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_permutexvar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	id := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_permutex_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_permutexvar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	id := idx[i+1:i]*64
+	dst[i+63:i] := a[id+63:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_permutexvar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_permutexvar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_permutexvar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutex_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 64-bit integers in "a" across lanes lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutexvar_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	id := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutex_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutexvar_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	id := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutex_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+
+dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutexvar_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	id := idx[i+1:i]*64
+	dst[i+63:i] := a[id+63:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	id := idx[i+3:i]*16
+	IF k[j]
+		dst[i+15:i] := a[id+15:id]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	id := idx[i+3:i]*16
+	IF k[j]
+		dst[i+15:i] := a[id+15:id]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	id := idx[i+3:i]*16
+	dst[i+15:i] := a[id+15:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	id := idx[i+4:i]*16
+	IF k[j]
+		dst[i+15:i] := a[id+15:id]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	id := idx[i+4:i]*16
+	IF k[j]
+		dst[i+15:i] := a[id+15:id]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	id := idx[i+4:i]*16
+	dst[i+15:i] := a[id+15:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	id := idx[i+2:i]*16
+	IF k[j]
+		dst[i+15:i] := a[id+15:id]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	id := idx[i+2:i]*16
+	IF k[j]
+		dst[i+15:i] := a[id+15:id]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	id := idx[i+2:i]*16
+	dst[i+15:i] := a[id+15:id]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_expand_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpexpandd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_expandloadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpexpandd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_expand_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpexpandd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_expandloadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpexpandd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_expand_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpexpandd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_expandloadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpexpandd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_expand_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpexpandd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_expandloadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpexpandd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_expand_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpexpandq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_expandloadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpexpandq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_expand_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpexpandq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_expandloadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpexpandq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_expand_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpexpandq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_expandloadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpexpandq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_expand_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpexpandq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_expandloadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="mem_addr" type="void const*"/>
+	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpexpandq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mmask_i32gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>
+	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+k[MAX:8] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpgatherdd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mmask_i32gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>
+	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+k[MAX:4] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpgatherdd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mmask_i32gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	IF k[j]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+k[MAX:4] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpgatherdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mmask_i32gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	IF k[j]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+k[MAX:2] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpgatherdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mmask_i64gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+k[MAX:4] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpgatherqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mmask_i64gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+k[MAX:2] := 0
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpgatherqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mmask_i64gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+k[MAX:4] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpgatherqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mmask_i64gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="base_addr" type="void const*"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
+		k[j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+k[MAX:2] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpgatherqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	tmp := 31
+	dst[i+31:i] := 0
+	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+		tmp := tmp - 1
+		dst[i+31:i] := dst[i+31:i] + 1
+	OD
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vplzcntd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		tmp := 31
+		dst[i+31:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+31:i] := dst[i+31:i] + 1
+		OD
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vplzcntd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		tmp := 31
+		dst[i+31:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+31:i] := dst[i+31:i] + 1
+		OD
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vplzcntd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	tmp := 31
+	dst[i+31:i] := 0
+	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+		tmp := tmp - 1
+		dst[i+31:i] := dst[i+31:i] + 1
+	OD
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vplzcntd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		tmp := 31
+		dst[i+31:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+31:i] := dst[i+31:i] + 1
+		OD
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vplzcntd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		tmp := 31
+		dst[i+31:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+31:i] := dst[i+31:i] + 1
+		OD
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vplzcntd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	tmp := 63
+	dst[i+63:i] := 0
+	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+		tmp := tmp - 1
+		dst[i+63:i] := dst[i+63:i] + 1
+	OD
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vplzcntq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp := 63
+		dst[i+63:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+63:i] := dst[i+63:i] + 1
+		OD
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vplzcntq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp := 63
+		dst[i+63:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+63:i] := dst[i+63:i] + 1
+		OD
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vplzcntq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	tmp := 63
+	dst[i+63:i] := 0
+	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+		tmp := tmp - 1
+		dst[i+63:i] := dst[i+63:i] + 1
+	OD
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vplzcntq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp := 63
+		dst[i+63:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+63:i] := dst[i+63:i] + 1
+		OD
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vplzcntq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp := 63
+		dst[i+63:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+63:i] := dst[i+63:i] + 1
+		OD
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vplzcntq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaddubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaddubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaddubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaddubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaddubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaddubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaddubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaddwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaddwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	st[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaddwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaddwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaddwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaddwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaddwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &gt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &gt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &gt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaxsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &gt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaxsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst". </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF a[i+7:i] &gt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaxsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &gt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &gt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &gt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &gt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &gt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &gt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &gt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &gt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF a[i+63:i] &gt; b[i+63:i]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &gt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &gt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF a[i+63:i] &gt; b[i+63:i]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &gt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &gt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &gt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaxsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &gt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaxsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF a[i+15:i] &gt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaxsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &gt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &gt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &gt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &gt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &gt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaxub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &gt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaxub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF a[i+7:i] &gt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaxub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &gt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &gt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &gt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &gt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &gt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &gt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &gt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &gt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF a[i+63:i] &gt; b[i+63:i]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &gt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &gt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF a[i+63:i] &gt; b[i+63:i]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &gt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &gt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmaxuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &gt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaxuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &gt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaxuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF a[i+15:i] &gt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmaxuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &gt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &gt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmaxuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &lt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &lt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &lt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpminsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &lt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpminsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF a[i+7:i] &lt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpminsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &lt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &lt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &lt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &lt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &lt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &lt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminsd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &lt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &lt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF a[i+63:i] &lt; b[i+63:i]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &lt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &lt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF a[i+63:i] &lt; b[i+63:i]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminsq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &lt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &lt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &lt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpminsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &lt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpminsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF a[i+15:i] &lt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpminsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &lt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &lt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &lt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &lt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &lt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpminub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &lt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpminub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF a[i+7:i] &lt; b[i+7:i]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := b[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpminub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &lt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		IF a[i+7:i] &lt; b[i+7:i]
+			dst[i+7:i] := a[i+7:i]
+		ELSE
+			dst[i+7:i] := b[i+7:i]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminub"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &lt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &lt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &lt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF a[i+31:i] &lt; b[i+31:i]
+			dst[i+31:i] := a[i+31:i]
+		ELSE
+			dst[i+31:i] := b[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminud"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &lt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &lt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF a[i+63:i] &lt; b[i+63:i]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &lt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+   </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF a[i+63:i] &lt; b[i+63:i]
+			dst[i+63:i] := a[i+63:i]
+		ELSE
+			dst[i+63:i] := b[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF a[i+63:i] &lt; b[i+63:i]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminuq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &lt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &lt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpminuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &lt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpminuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &lt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpminuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF a[i+15:i] &lt; b[i+15:i]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := b[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpminuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &lt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF a[i+15:i] &lt; b[i+15:i]
+			dst[i+15:i] := a[i+15:i]
+		ELSE
+			dst[i+15:i] := b[i+15:i]
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpminuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_movepi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF a[i+7]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovb2m"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_movepi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF a[i+7]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovb2m"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_movepi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF a[i+7]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpmovb2m"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_movepi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF a[i+31]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpmovd2m"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_movepi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF a[i+31]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpmovd2m"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_movepi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF a[i+31]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpmovd2m"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Truncate_Int32_To_Int8(a[i+31:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate_Int32_To_Int8(a[i+31:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Truncate_Int32_To_Int8(a[i+31:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate_Int32_To_Int8(a[i+31:i])
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Truncate_Int32_To_Int16(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate_Int32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Truncate_Int32_To_Int16(a[i+31:i])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate_Int32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Truncate_Int32_To_Int16(a[i+31:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate_Int32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Truncate_Int32_To_Int16(a[i+31:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate_Int32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_movm_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<description>Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := 0xFF
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovm2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_movm_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask64"/>
+	<description>Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := 0xFF
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmovm2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_movm_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<description>Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := 0xFF
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovm2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_movm_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := 0xFFFFFFFF
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovm2d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_movm_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<description>Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := 0xFFFFFFFF
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmovm2d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_movm_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := 0xFFFFFFFF
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovm2d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_movm_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 0xFFFFFFFFffffffff
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovm2q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_movm_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 0xFFFFFFFFffffffff
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmovm2q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_movm_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 0xFFFFFFFFffffffff
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovm2q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_movm_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<description>Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := 0xFFFF
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovm2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_movm_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<description>Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := 0xFFFF
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmovm2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_movm_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<description>Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := 0xFFFF
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovm2w"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_movepi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF a[i+63]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpmovq2m"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_movepi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF a[i+63]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpmovq2m"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_movepi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF a[i+63]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpmovq2m"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Truncate_Int64_To_Int8(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate_Int64_To_Int8(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Truncate_Int64_To_Int8(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate_Int64_To_Int8(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Truncate_Int64_To_Int32(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Truncate_Int64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Truncate_Int64_To_Int32(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Truncate_Int64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Truncate_Int64_To_Int32(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Truncate_Int64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Truncate_Int64_To_Int32(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Truncate_Int64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Truncate_Int64_To_Int16(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate_Int64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Truncate_Int64_To_Int16(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate_Int64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Truncate_Int64_To_Int16(a[i+63:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate_Int64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Truncate_Int64_To_Int16(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate_Int64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Saturate_Int32_To_Int8(a[i+31:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtsepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_Int32_To_Int8(a[i+31:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Saturate_Int32_To_Int8(a[i+31:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovsdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovsdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtsepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_Int32_To_Int8(a[i+31:i])
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovsdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovsdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Saturate_Int32_To_Int16(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_Int32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtsepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate_Int32_To_Int16(a[i+31:i])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_Int32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Saturate_Int32_To_Int16(a[i+31:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_Int32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtsepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate_Int32_To_Int16(a[i+31:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_Int32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Saturate_Int64_To_Int8(a[i+63:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovsqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovsqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtsepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_Int64_To_Int8(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovsqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovsqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Saturate_Int64_To_Int8(a[i+63:i])
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="vpmovsqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="vpmovsqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtsepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_Int64_To_Int8(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="vpmovsqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="vpmovsqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Saturate_Int64_To_Int32(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate_Int64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtsepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Saturate_Int64_To_Int32(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate_Int64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Saturate_Int64_To_Int32(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate_Int64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtsepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Saturate_Int64_To_Int32(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate_Int64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Saturate_Int64_To_Int16(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_Int64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtsepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate_Int64_To_Int16(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_Int64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovsqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Saturate_Int64_To_Int16(a[i+63:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovsqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_Int64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovsqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtsepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate_Int64_To_Int16(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovsqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_Int64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovsqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtsepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_Int16_To_Int8(a[i+15:i])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtsepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_Int16_To_Int8(a[i+15:i])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtsepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_Int16_To_Int8(a[i+15:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[l+7:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovsxbd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[l+7:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovsxbd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[l+7:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsxbd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[l+7:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsxbd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+7:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovsxbq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+7:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovsxbq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+7:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsxbq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+7:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsxbq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := SignExtend(a[i+7:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovsxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := SignExtend(a[i+7:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovsxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	l := j*16
+	dst[l+15:l] := SignExtend(a[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmovsxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := SignExtend(a[i+7:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmovsxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := SignExtend(a[i+7:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmovsxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := SignExtend(a[i+7:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := SignExtend(a[i+7:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovsxdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovsxdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsxdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsxdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*16
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[l+15:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovsxwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[l+15:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovsxwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	l := j*16
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[l+15:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsxwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[l+15:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsxwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+15:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovsxwq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+15:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovsxwq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+15:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsxwq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Sign extend packed 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[l+15:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovsxwq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtusepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovusdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovusdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtusepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovusdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovusdb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtusepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtusepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusdw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovusqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovusqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtusepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovusqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovusqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="vpmovusqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="vpmovusqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtusepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="vpmovusqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="vpmovusqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovusqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovusqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtusepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovusqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovusqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtusepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtusepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovusqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovusqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovusqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtusepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovusqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovusqw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtusepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtusepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtusepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovuswb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_movepi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF a[i+15]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vpmovw2m"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_movepi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF a[i+15]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vpmovw2m"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_movepi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF a[i+15]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpmovw2m"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovwb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovwb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate_Int16_To_Int8(a[i+15:i])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovwb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovwb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovwb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovwb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate_Int16_To_Int8(a[i+15:i])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovwb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovwb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovwb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovwb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate_Int16_To_Int8(a[i+15:i])
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovwb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="vpmovwb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[l+7:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovzxbd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[l+7:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovzxbd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[l+7:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovzxbd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in th elow 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[l+7:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovzxbd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+7:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovzxbq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+7:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovzxbq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+7:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovzxbq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+7:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovzxbq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := ZeroExtend(a[i+7:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovzxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := ZeroExtend(a[i+7:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovzxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="a" type="__m256i"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	l := j*16
+	dst[l+15:l] := ZeroExtend(a[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmovzxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := ZeroExtend(a[i+7:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmovzxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := ZeroExtend(a[i+7:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmovzxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := ZeroExtend(a[i+7:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovzxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := ZeroExtend(a[i+7:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovzxbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovzxdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+31:l])
+	ELSE 
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovzxdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovzxdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+31:l])
+	ELSE 
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovzxdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[l+15:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovzxwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[l+15:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovzxwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[l+15:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovzxwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[l+15:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovzxwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+15:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovzxwq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+15:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmovzxwq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+15:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovzxwq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Zero extend packed unsigned 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[l+15:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmovzxwq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmuldq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmuldq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmuldq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmuldq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
+		dst[i+15:i] := tmp[16:1]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmulhrsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
+		dst[i+15:i] := tmp[16:1]
+	ELSE
+		dst[i+15:i] := 9
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmulhrsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
+		dst[i+15:i] := tmp[16:1]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulhrsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
+		dst[i+15:i] := tmp[16:1]
+	ELSE
+		dst[i+15:i] := 9
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulhrsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
+	dst[i+15:i] := tmp[16:1]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulhrsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
+		dst[i+15:i] := tmp[16:1]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmulhrsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
+		dst[i+15:i] := tmp[16:1]
+	ELSE
+		dst[i+15:i] := 9
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmulhrsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmulhuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := o
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmulhuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulhuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := o
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulhuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulhuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmulhuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := o
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmulhuw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmulhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmulhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>	
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmulhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmulhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		tmp[63:0] := a[i+31:i] * b[i+31:i]
+		dst[i+31:i] := tmp[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmulld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		tmp[63:0] := a[i+31:i] * b[i+31:i]
+		dst[i+31:i] := tmp[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmulld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp[63:0] := a[i+31:i] * b[i+31:i]
+		dst[i+31:i] := tmp[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmulld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		tmp[63:0] := a[i+31:i] * b[i+31:i]
+		dst[i+31:i] := tmp[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmulld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		tmp[63:0] := a[i+31:i] * b[i+31:i]
+		dst[i+31:i] := tmp[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmulld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := a[i+63:i] * b[i+63:i]
+		dst[i+63:i] := tmp[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmullq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := a[i+63:i] * b[i+63:i]
+		dst[i+63:i] := tmp[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmullq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	tmp[127:0] := a[i+63:i] * b[i+63:i]
+	dst[i+63:i] := tmp[63:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmullq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := a[i+63:i] * b[i+63:i]
+		dst[i+63:i] := tmp[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmullq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := a[i+63:i] * b[i+63:i]
+		dst[i+63:i] := tmp[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmullq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	tmp[127:0] := a[i+63:i] * b[i+63:i]
+	dst[i+63:i] := tmp[63:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmullq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := a[i+63:i] * b[i+63:i]
+		dst[i+63:i] := tmp[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmullq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := a[i+63:i] * b[i+63:i]
+		dst[i+63:i] := tmp[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmullq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	tmp[127:0] := a[i+63:i] * b[i+63:i]
+	dst[i+63:i] := tmp[63:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmullq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmullw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmullw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmullw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmullw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[15:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmullw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmullw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmullw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmuludq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmuludq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmuludq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmuludq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vporq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vporq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vporq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vporq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprold"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprold"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprold"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprold"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprold"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprold"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprolq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprolq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprolq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprolq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprolq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprolq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprolvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprolvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprolvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprolvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprolvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
+	<operation>
+LEFT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprolvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprolvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprolvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprolvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprolvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprolvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
+	<operation>
+LEFT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprolvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprorq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprorq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprorq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprorq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprorq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprorq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprorvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprorvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprorvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprorvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprorvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
+	<operation>
+RIGHT_ROTATE_DWORDS(src, count_src){
+	count := count_src modulo 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprorvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprorvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprorvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vprorvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprorvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprorvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
+	<operation>
+RIGHT_ROTATE_QWORDS(src, count_src){
+	count := count_src modulo 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vprorvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
+ENDFOR
+FOR j := 0 to 7
+	i := j*64
+	dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56]
+	dst[i+63:i+16] := 0
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsadbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_i32scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="vpscatterdd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i32scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vpscatterdd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_i32scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="vpscatterdd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i32scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpscatterdd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_i32scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="vpscatterdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i32scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpscatterdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_i32scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="vpscatterdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i32scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpscatterdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_i64scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	l := j*64
+	MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="vpscatterqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i64scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	l := j*64
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpscatterqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_i64scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	l := j*64
+	MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="vpscatterqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i64scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	l := j*64
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpscatterqd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_i64scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="vpscatterqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i64scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vpscatterqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_i64scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="vpscatterqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i64scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vpscatterqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		IF b[i+7] == 1
+			dst[i+7:i] := 0
+		ELSE
+			index[4:0] := b[i+3:i] + (j &amp; 0x10)
+			dst[i+7:i] := a[index*8+7:index*8]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpshufb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		IF b[i+7] == 1
+			dst[i+7:i] := 0
+		ELSE
+			index[4:0] := b[i+3:i] + (j &amp; 0x10)
+			dst[i+7:i] := a[index*8+7:index*8]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpshufb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 8-bit integers in "a" within 128-bit lanes using the control in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		IF b[i+7] == 1
+			dst[i+7:i] := 0
+		ELSE
+			index[5:0] := b[i+3:i] + (j &amp; 0x30)
+			dst[i+7:i] := a[index*8+7:index*8]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpshufb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		IF b[i+7] == 1
+			dst[i+7:i] := 0
+		ELSE
+			index[5:0] := b[i+3:i] + (j &amp; 0x30)
+			dst[i+7:i] := a[index*8+7:index*8]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpshufb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF b[i+7] == 1
+		dst[i+7:i] := 0
+	ELSE
+		index[5:0] := b[i+3:i] + (j &amp; 0x30)
+		dst[i+7:i] := a[index*8+7:index*8]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpshufb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		IF b[i+7] == 1
+			dst[i+7:i] := 0
+		ELSE
+			index[3:0] := b[i+3:i]
+			dst[i+7:i] := a[index*8+7:index*8]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpshufb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		IF b[i+7] == 1
+			dst[i+7:i] := 0
+		ELSE
+			index[3:0] := b[i+3:i]
+			dst[i+7:i] := a[index*8+7:index*8]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpshufb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
+	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpshufd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
+	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpshufd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
+	<description>Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpshufd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
+	<description>Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpshufd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := a[63:0]
+tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+tmp_dst[191:128] := a[191:128]
+tmp_dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
+tmp_dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
+tmp_dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
+tmp_dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpshufhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := a[63:0]
+tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+tmp_dst[191:128] := a[191:128]
+tmp_dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
+tmp_dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
+tmp_dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
+tmp_dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpshufhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := a[63:0]
+tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+tmp_dst[191:128] := a[191:128]
+tmp_dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
+tmp_dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
+tmp_dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
+tmp_dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
+tmp_dst[319:256] := a[319:256]
+tmp_dst[335:320] := (a &gt;&gt; (imm8[1:0] * 16))[335:320]
+tmp_dst[351:336] := (a &gt;&gt; (imm8[3:2] * 16))[335:320]
+tmp_dst[367:352] := (a &gt;&gt; (imm8[5:4] * 16))[335:320]
+tmp_dst[383:368] := (a &gt;&gt; (imm8[7:6] * 16))[335:320]
+tmp_dst[447:384] := a[447:384]
+tmp_dst[463:448] := (a &gt;&gt; (imm8[1:0] * 16))[463:448]
+tmp_dst[479:464] := (a &gt;&gt; (imm8[3:2] * 16))[463:448]
+tmp_dst[495:480] := (a &gt;&gt; (imm8[5:4] * 16))[463:448]
+tmp_dst[511:496] := (a &gt;&gt; (imm8[7:6] * 16))[463:448]
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpshufhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := a[63:0]
+tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+tmp_dst[191:128] := a[191:128]
+tmp_dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
+tmp_dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
+tmp_dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
+tmp_dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
+tmp_dst[319:256] := a[319:256]
+tmp_dst[335:320] := (a &gt;&gt; (imm8[1:0] * 16))[335:320]
+tmp_dst[351:336] := (a &gt;&gt; (imm8[3:2] * 16))[335:320]
+tmp_dst[367:352] := (a &gt;&gt; (imm8[5:4] * 16))[335:320]
+tmp_dst[383:368] := (a &gt;&gt; (imm8[7:6] * 16))[335:320]
+tmp_dst[447:384] := a[447:384]
+tmp_dst[463:448] := (a &gt;&gt; (imm8[1:0] * 16))[463:448]
+tmp_dst[479:464] := (a &gt;&gt; (imm8[3:2] * 16))[463:448]
+tmp_dst[495:480] := (a &gt;&gt; (imm8[5:4] * 16))[463:448]
+tmp_dst[511:496] := (a &gt;&gt; (imm8[7:6] * 16))[463:448]
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpshufhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+dst[191:128] := a[191:128]
+dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
+dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
+dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
+dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
+dst[319:256] := a[319:256]
+dst[335:320] := (a &gt;&gt; (imm8[1:0] * 16))[335:320]
+dst[351:336] := (a &gt;&gt; (imm8[3:2] * 16))[335:320]
+dst[367:352] := (a &gt;&gt; (imm8[5:4] * 16))[335:320]
+dst[383:368] := (a &gt;&gt; (imm8[7:6] * 16))[335:320]
+dst[447:384] := a[447:384]
+dst[463:448] := (a &gt;&gt; (imm8[1:0] * 16))[463:448]
+dst[479:464] := (a &gt;&gt; (imm8[3:2] * 16))[463:448]
+dst[495:480] := (a &gt;&gt; (imm8[5:4] * 16))[463:448]
+dst[511:496] := (a &gt;&gt; (imm8[7:6] * 16))[463:448]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpshufhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := a[63:0]
+tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpshufhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := a[63:0]
+tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpshufhw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+tmp_dst[127:64] := a[127:64]
+tmp_dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
+tmp_dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
+tmp_dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
+tmp_dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
+tmp_dst[255:192] := a[255:192]
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpshuflw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+tmp_dst[127:64] := a[127:64]
+tmp_dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
+tmp_dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
+tmp_dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
+tmp_dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
+tmp_dst[255:192] := a[255:192]
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpshuflw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+tmp_dst[127:64] := a[127:64]
+tmp_dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
+tmp_dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
+tmp_dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
+tmp_dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
+tmp_dst[255:192] := a[255:192]
+tmp_dst[271:256] := (a &gt;&gt; (imm8[1:0] * 16))[271:256]
+tmp_dst[287:272] := (a &gt;&gt; (imm8[3:2] * 16))[271:256]
+tmp_dst[303:288] := (a &gt;&gt; (imm8[5:4] * 16))[271:256]
+tmp_dst[319:304] := (a &gt;&gt; (imm8[7:6] * 16))[271:256]
+tmp_dst[383:320] := a[383:320]
+tmp_dst[399:384] := (a &gt;&gt; (imm8[1:0] * 16))[399:384]
+tmp_dst[415:400] := (a &gt;&gt; (imm8[3:2] * 16))[399:384]
+tmp_dst[431:416] := (a &gt;&gt; (imm8[5:4] * 16))[399:384]
+tmp_dst[447:432] := (a &gt;&gt; (imm8[7:6] * 16))[399:384]
+tmp_dst[511:448] := a[511:448]
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpshuflw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+tmp_dst[127:64] := a[127:64]
+tmp_dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
+tmp_dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
+tmp_dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
+tmp_dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
+tmp_dst[255:192] := a[255:192]
+tmp_dst[271:256] := (a &gt;&gt; (imm8[1:0] * 16))[271:256]
+tmp_dst[287:272] := (a &gt;&gt; (imm8[3:2] * 16))[271:256]
+tmp_dst[303:288] := (a &gt;&gt; (imm8[5:4] * 16))[271:256]
+tmp_dst[319:304] := (a &gt;&gt; (imm8[7:6] * 16))[271:256]
+tmp_dst[383:320] := a[383:320]
+tmp_dst[399:384] := (a &gt;&gt; (imm8[1:0] * 16))[399:384]
+tmp_dst[415:400] := (a &gt;&gt; (imm8[3:2] * 16))[399:384]
+tmp_dst[431:416] := (a &gt;&gt; (imm8[5:4] * 16))[399:384]
+tmp_dst[447:432] := (a &gt;&gt; (imm8[7:6] * 16))[399:384]
+tmp_dst[511:448] := a[511:448]
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpshuflw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst".</description>
+	<operation>
+dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+dst[127:64] := a[127:64]
+dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
+dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
+dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
+dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
+dst[255:192] := a[255:192]
+dst[271:256] := (a &gt;&gt; (imm8[1:0] * 16))[271:256]
+dst[287:272] := (a &gt;&gt; (imm8[3:2] * 16))[271:256]
+dst[303:288] := (a &gt;&gt; (imm8[5:4] * 16))[271:256]
+dst[319:304] := (a &gt;&gt; (imm8[7:6] * 16))[271:256]
+dst[383:320] := a[383:320]
+dst[399:384] := (a &gt;&gt; (imm8[1:0] * 16))[399:384]
+dst[415:400] := (a &gt;&gt; (imm8[3:2] * 16))[399:384]
+dst[431:416] := (a &gt;&gt; (imm8[5:4] * 16))[399:384]
+dst[447:432] := (a &gt;&gt; (imm8[7:6] * 16))[399:384]
+dst[511:448] := a[511:448]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpshuflw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+tmp_dst[127:64] := a[127:64]
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpshuflw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+tmp_dst[127:64] := a[127:64]
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpshuflw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpslld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpslld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpslld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpslld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpslld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpslld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpslld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpslld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_bslli_epi128">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
+dst[255:128] := a[255:128] &lt;&lt; (tmp*8)
+dst[383:256] := a[383:256] &lt;&lt; (tmp*8)
+dst[511:384] := a[511:384] &lt;&lt; (tmp*8)
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpslldq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsllvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsllvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsllvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsllw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := SignBit
+		ELSE
+			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrad"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := SignBit
+		ELSE
+			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrad"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := SignBit
+		ELSE
+			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrad"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := SignBit
+		ELSE
+			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrad"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := SignBit
+		ELSE
+			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrad"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := SignBit
+		ELSE
+			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrad"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := SignBit
+		ELSE
+			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrad"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := SignBit
+		ELSE
+			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrad"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := SignBit
+		ELSE
+			dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsraq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := SignBit
+		ELSE
+			dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsraq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := SignBit
+		ELSE
+			dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsraq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := SignBit
+		ELSE
+			dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsraq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := SignBit
+	ELSE
+		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsraq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := SignBit
+	ELSE
+		dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsraq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := SignBit
+		ELSE
+			dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsraq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := SignBit
+		ELSE
+			dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsraq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := SignBit
+		ELSE
+			dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsraq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := SignBit
+		ELSE
+			dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsraq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := SignBit
+	ELSE
+		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsraq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := SignBit
+	ELSE
+		dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsraq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsravd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsravd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsravd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsravd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsravq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsravq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsravq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsravq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsravq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsravq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsravw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsravw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsravw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsravw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsravw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsravw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsravw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsravw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsravw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := SignBit
+		ELSE
+			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := SignBit
+		ELSE
+			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := SignBit
+		ELSE
+			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := SignBit
+		ELSE
+			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := SignBit
+		ELSE
+			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>	
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := SignBit
+		ELSE
+			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := SignBit
+		ELSE
+			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := SignBit
+		ELSE
+			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := SignBit
+	ELSE
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := SignBit
+	ELSE
+		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := SignBit
+		ELSE
+			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := SignBit
+		ELSE
+			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := SignBit
+		ELSE
+			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := SignBit
+		ELSE
+			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsraw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrld"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_bsrli_epi128">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
+dst[255:128] := a[255:128] &gt;&gt; (tmp*8)
+dst[383:256] := a[383:256] &gt;&gt; (tmp*8)
+dst[511:384] := a[511:384] &gt;&gt; (tmp*8)
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsrldq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlvd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlvq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m256i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsrlvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsrlvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m512i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsrlvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlvw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="imm8" type="unsigned int"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="count" type="__m128i"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsrlw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] - b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] - b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] - b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] - b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := a[i+7:i] - b[i+7:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] - b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] - b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+		<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubsb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubsw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubusb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubusw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] - b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] - b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpsubw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] - b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] - b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := a[i+15:i] - b[i+15:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpsubw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] - b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] - b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpsubw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		FOR h := 0 to 31
+			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpternlogd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="c" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		FOR h := 0 to 31
+			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpternlogd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="c" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	FOR h := 0 to 31
+		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+		dst[i+h] := imm8[index[2:0]]
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpternlogd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		FOR h := 0 to 31
+			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpternlogd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		FOR h := 0 to 31
+			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpternlogd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	FOR h := 0 to 31
+		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+		dst[i+h] := imm8[index[2:0]]
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpternlogd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		FOR h := 0 to 63
+			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpternlogq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="c" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		FOR h := 0 to 63
+			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpternlogq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="c" type="__m256i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	FOR h := 0 to 63
+		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+		dst[i+h] := imm8[index[2:0]]
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpternlogq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		FOR h := 0 to 63
+			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpternlogq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		FOR h := 0 to 63
+			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpternlogq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="c" type="__m128i"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	FOR h := 0 to 63
+		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+		dst[i+h] := imm8[index[2:0]]
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpternlogq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_test_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vptestmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_test_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vptestmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_test_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vptestmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_test_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vptestmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_test_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vptestmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_test_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vptestmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_test_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vptestmd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_test_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vptestmd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_test_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vptestmd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_test_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vptestmd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_test_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vptestmq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_test_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vptestmq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_test_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vptestmq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_test_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vptestmq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_test_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vptestmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_test_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vptestmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_test_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vptestmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_test_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vptestmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_test_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vptestmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_test_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vptestmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_testn_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vptestnmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_testn_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vptestnmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_testn_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vptestnmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_testn_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="vptestnmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_testn_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vptestnmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_testn_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vptestnmb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_testn_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vptestnmd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_testn_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vptestnmd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_testn_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vptestnmd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_testn_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ((a[i+31:i] NAND b[i+31:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vptestnmd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_testn_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vptestnmq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_testn_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vptestnmq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_testn_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vptestnmq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_testn_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vptestnmq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_testn_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vptestnmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_testn_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="vptestnmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_testn_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vptestnmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_testn_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="vptestnmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_testn_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="k1" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vptestnmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_testn_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vptestnmw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
+
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpckhbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
+
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpckhbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384])
+
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpunpckhbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384])
+
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpunpckhbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpunpckhbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpckhbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpckhbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpckhdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpckhdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpckhdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpckhdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpckhqdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpckhqdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpckhqdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpckhqdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpckhwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpckhwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpunpckhwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpunpckhwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+
+dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpunpckhwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpckhwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpckhwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
+
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpcklbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
+
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpcklbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384])
+
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpunpcklbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384])
+
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpunpcklbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpunpcklbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpcklbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpcklbw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpckldq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpckldq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpckldq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpckldq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpcklqdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpcklqdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpcklqdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpcklqdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpcklwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpunpcklwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpunpcklwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384])
+
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpunpcklwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
+	<operation>
+INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}	
+
+dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpunpcklwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpcklwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpunpcklwd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpxord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpxord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpxord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpxord"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpxorq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpxorq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpxorq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpxorq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrangepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrangepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrangepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrangepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_range_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	[round_note]
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrangepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrangepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_range_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	[round_note]
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrangepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrangepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_range_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	[round_note]
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrangepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrangeps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrangeps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrangeps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrangeps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_range_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	[round_note]
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrangeps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrangeps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_range_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	[round_note]
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrangeps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrangeps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_range_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	[round_note]
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vrangeps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangeps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangeps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangeps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_range_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	[round_note]
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+IF k[0]
+	dst[63:0]] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangesd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_range_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+IF k[0]
+	dst[63:0]] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangesd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_range_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	[round_note]
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+IF k[0]
+	dst[63:0]] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangesd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_range_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+IF k[0]
+	dst[63:0]] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangesd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_range_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	[round_note]
+	</description>
+	<operation>
+RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+
+dst[63:0]] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangesd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_range_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	[round_note]
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[31:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+IF k[0]
+	dst[31:0]] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangess"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_range_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[31:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+IF k[0]
+	dst[31:0]] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangess"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_range_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	[round_note]
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[31:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+IF k[0]
+	dst[31:0]] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangess"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_range_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[31:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+IF k[0]
+	dst[31:0]] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangess"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_range_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
+	[round_note]
+	</description>
+	<operation>
+RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
+{
+	CASE opCtl[1:0]
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0]
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[31:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+
+dst[31:0]] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrangess"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrcp14pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrcp14pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256d"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrcp14pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrcp14pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrcp14pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128d"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrcp14pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrcp14ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrcp14ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrcp14ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrcp14ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrcp14ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrcp14ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vreducepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vreducepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". </description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vreducepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vreducepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_reduce_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vreducepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vreducepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_reduce_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vreducepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". </description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vreducepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_reduce_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vreducepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". </description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vreduceps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vreduceps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst".</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vreduceps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>	
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vreduceps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_reduce_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vreduceps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vreduceps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_reduce_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vreduceps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst".</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vreduceps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_reduce_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vreduceps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreduceps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreduceps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst".</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreduceps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_reduce_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".</description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+IF k[0]
+	dst[63:0] := ReduceArgumentPD(a[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducesd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_reduce_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+IF k[0]
+	dst[63:0] := ReduceArgumentPD(a[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducesd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_reduce_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".</description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+IF k[0]
+	dst[63:0] := ReduceArgumentPD(a[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducesd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_reduce_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+IF k[0]
+	dst[63:0] := ReduceArgumentPD(a[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducesd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_reduce_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst".</description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+dst[63:0] := ReduceArgumentPD(a[63:0], imm8[7:0])
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducesd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_reduce_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+ReduceArgumentPD(src1[63:0], imm8[7:0])
+{
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	RETURN tmp[63:0]
+}
+
+dst[63:0] := ReduceArgumentPD(a[63:0], imm8[7:0])
+dst[127:64] := b[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducesd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_reduce_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+
+IF k[0]
+	dst[31:0] := ReduceArgumentPS(a[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:64] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducess"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_reduce_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+
+IF k[0]
+	dst[31:0] := ReduceArgumentPS(a[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:64] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducess"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_reduce_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+
+IF k[0]
+	dst[31:0] := ReduceArgumentPS(a[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:64] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducess"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_reduce_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+
+IF k[0]
+	dst[31:0] := ReduceArgumentPS(a[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:64] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducess"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_reduce_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := ReduceArgumentPS(a[31:0], imm8[7:0])
+dst[127:64] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducess"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_reduce_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<parameter varname="rounding" type="int"/>
+	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+ReduceArgumentPS(src1[31:0], imm8[7:0])
+{
+	IF src1[31:0] == NAN
+		RETURN (convert src1[31:0] to QNaN)
+	FI
+	
+	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
+	rc := imm8[1:0] // round control
+	rc_src := imm8[2] // round ccontrol source
+	spe := 0
+	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+}
+
+dst[31:0] := ReduceArgumentPS(a[31:0], imm8[7:0])
+dst[127:64] := b[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vreducess"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}	
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrndscalepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}	
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrndscalepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
+	</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}	
+
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrndscalepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}	
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrndscalepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}	
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrndscalepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
+	</description>
+	<operation>
+RoundTo_IntegerPD(src[63:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
+	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
+	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
+	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
+	ESAC
+	
+	dst[63:0] := 2^-M * tmp[63:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[63:0] != dst[63:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[63:0]
+}	
+
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrndscalepd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}	
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrndscaleps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}	
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrndscaleps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
+	</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}	
+
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrndscaleps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}	
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrndscaleps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}	
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrndscaleps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="imm8" type="int"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
+	</description>
+	<operation>
+RoundTo_IntegerPS(src[31:0], imm8[7:0]){
+	IF(imm8[2] == 1)
+		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
+	ELSE
+		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
+	FI
+	
+	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
+	
+	CASE(rounding_direction)
+	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
+	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
+	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
+	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
+	ESAC
+	
+	dst[31:0] := 2^-M * tmp[31:0] // scale back down
+	
+	IF imm8[3] == 0 //check SPE
+		IF src[31:0] != dst[31:0] //check if precision has been lost
+			set_precision() //set #PE
+		FI
+	FI
+	RETURN dst[31:0]
+}	
+
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrndscaleps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrsqrt14pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrsqrt14pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrsqrt14pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrsqrt14pd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrsqrt14ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vrsqrt14ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrsqrt14ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vrsqrt14ps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vscalefpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vscalefpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vscalefpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vscalefpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vscalefpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vscalefpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vscalefps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vscalefps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vscalefps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vscalefps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vscalefps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
+	<operation>
+SCALE(src1, src2){
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (src2 is denormal AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (src1 is denormal AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vscalefps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="vscatterdpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vscatterdpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="vscatterdpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vscatterdpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="vscatterdps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="vscatterdps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="vscatterdps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vscatterdps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="vscatterqpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vscatterqpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="vscatterqpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vscatterqpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	l := j*64
+	MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="vscatterqps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m256i"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	l := j*64
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="vscatterqps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	l := j*64
+	MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="vscatterqps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<parameter varname="base_addr" type="void*"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="vindex" type="__m128i"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="scale" type="const int"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
+	</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	l := j*64
+	IF k[j]
+		MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="vscatterqps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_shuffle_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+SELECT2(src, control){
+	CASE(control[0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT2(a[255:0], imm8[0])
+tmp_dst[255:128] := SELECT2(b[255:0], imm8[1])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshuff32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_shuffle_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT2(src, control){
+	CASE(control[0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT2(a[255:0], imm8[0])
+tmp_dst[255:128] := SELECT2(b[255:0], imm8[1])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshuff32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_shuffle_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+SELECT2(src, control){
+	CASE(control[0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := SELECT2(a[255:0], imm8[0])
+dst[255:128] := SELECT2(b[255:0], imm8[1])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshuff32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_shuffle_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT2(a[255:0], imm8[0])
+tmp_dst[255:128] := SELECT2(b[255:0], imm8[1])
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshuff64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_shuffle_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT2(a[255:0], imm8[0])
+tmp_dst[255:128] := SELECT2(b[255:0], imm8[1])
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshuff64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_shuffle_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := SELECT2(a[255:0], imm8[0])
+dst[255:128] := SELECT2(b[255:0], imm8[1])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshuff64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_shuffle_i32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+SELECT2(src, control){
+	CASE(control[0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT2(a[255:0], imm8[1:0])
+tmp_dst[255:128] := SELECT2(b[255:0], imm8[3:2])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshufi32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_shuffle_i32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT2(src, control){
+	CASE(control[0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT2(a[255:0], imm8[1:0])
+tmp_dst[255:128] := SELECT2(b[255:0], imm8[3:2])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshufi32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_shuffle_i32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+SELECT2(src, control){
+	CASE(control[0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := SELECT2(a[255:0], imm8[1:0])
+dst[255:128] := SELECT2(b[255:0], imm8[3:2])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshufi32x4"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_shuffle_i64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+SELECT2(src, control){
+	CASE(control[0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT2(a[255:0], imm8[1:0])
+tmp_dst[255:128] := SELECT2(b[255:0], imm8[3:2])
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshufi64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_shuffle_i64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT2(src, control){
+	CASE(control[0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+tmp_dst[127:0] := SELECT2(a[255:0], imm8[1:0])
+tmp_dst[255:128] := SELECT2(b[255:0], imm8[3:2])
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshufi64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_shuffle_i64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+SELECT2(src, control){
+	CASE(control[0])
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	ESAC
+	RETURN tmp[127:0]
+}
+
+dst[127:0] := SELECT2(a[255:0], imm8[1:0])
+dst[255:128] := SELECT2(b[255:0], imm8[3:2])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshufi64x2"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
+tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshufpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
+tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshufpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vshufpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vshufpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshufps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vshufps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vshufps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<parameter varname="imm8" type="const int"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+SELECT4(src, control){
+	CASE(control[1:0])
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vshufps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vsqrtpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vsqrtpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vsqrtpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vsqrtpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vsqrtps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vsqrtps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vsqrtps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vsqrtps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vsubpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vsubpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vsubpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vsubpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vsubps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vsubps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vsubps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vsubps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vunpckhpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vunpckhpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vunpckhpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vunpckhpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vunpckhps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vunpckhps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vunpckhps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vunpckhps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vunpcklpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vunpcklpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vunpcklpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vunpcklpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vunpcklps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vunpcklps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vunpcklps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
+	<operation>
+INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}	
+
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vunpcklps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vxorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256d"/>
+	<parameter varname="b" type="__m256d"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vxorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vxorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vxorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512d"/>
+	<parameter varname="b" type="__m512d"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vxorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128d"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vxorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128d"/>
+	<parameter varname="b" type="__m128d"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vxorpd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m256"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vxorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256"/>
+	<parameter varname="b" type="__m256"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vxorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m512"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vxorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vxorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="a" type="__m512"/>
+	<parameter varname="b" type="__m512"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vxorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="src" type="__m128"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vxorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128"/>
+	<parameter varname="b" type="__m128"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vxorps"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="c" type="__m512i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmadd52luq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="c" type="__m512i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmadd52luq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="c" type="__m512i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmadd52luq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="c" type="__m256i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmadd52luq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="c" type="__m256i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmadd52luq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="c" type="__m256i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmadd52luq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="c" type="__m128i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmadd52luq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="c" type="__m128i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmadd52luq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="c" type="__m128i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmadd52luq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="c" type="__m512i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmadd52huq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="c" type="__m512i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmadd52huq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<parameter varname="c" type="__m512i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmadd52huq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="c" type="__m256i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmadd52huq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="c" type="__m256i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmadd52huq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<parameter varname="c" type="__m256i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmadd52huq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="c" type="__m128i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmadd52huq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="c" type="__m128i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmadd52huq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<parameter varname="k" type="__mmask8"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<parameter varname="c" type="__m128i"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmadd52huq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_multishift_epi64_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst".</description>
+	<operation>
+FOR i := 0 to 7
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		dst[q+j*8+7:q+j*8] := tmp8[7:0]
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmultishiftqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_multishift_epi64_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 7
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		IF k[i*8+j]
+			dst[q+j*8+7:q+j*8] := tmp8[7:0]
+		ELSE
+			dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8]
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmultishiftqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_multishift_epi64_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 7
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		IF k[i*8+j]
+			dst[q+j*8+7:q+j*8] := tmp8[7:0]
+		ELSE
+			dst[q+j*8+7:q+j*8] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpmultishiftqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_multishift_epi64_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst".</description>
+	<operation>
+FOR i := 0 to 3
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		dst[q+j*8+7:q+j*8] := tmp8[7:0]
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmultishiftqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_multishift_epi64_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 3
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		IF k[i*8+j]
+			dst[q+j*8+7:q+j*8] := tmp8[7:0]
+		ELSE
+			dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8]
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmultishiftqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_multishift_epi64_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 3
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		IF k[i*8+j]
+			dst[q+j*8+7:q+j*8] := tmp8[7:0]
+		ELSE
+			dst[q+j*8+7:q+j*8] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpmultishiftqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_multishift_epi64_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst".</description>
+	<operation>
+FOR i := 0 to 1
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		dst[q+j*8+7:q+j*8] := tmp8[7:0]
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmultishiftqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_multishift_epi64_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 1
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		IF k[i*8+j]
+			dst[q+j*8+7:q+j*8] := tmp8[7:0]
+		ELSE
+			dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8]
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmultishiftqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_multishift_epi64_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 1
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		IF k[i*8+j]
+			dst[q+j*8+7:q+j*8] := tmp8[7:0]
+		ELSE
+			dst[q+j*8+7:q+j*8] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpmultishiftqb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutexvar_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	id := idx[i+5:i]*8
+	dst[i+7:i] := a[id+7:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutexvar_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	id := idx[i+5:i]*8
+	IF k[j]
+		dst[i+7:i] := a[id+7:id]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutexvar_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="a" type="__m512i"/>
+	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	id := idx[i+5:i]*8
+	IF k[j]
+		dst[i+7:i] := a[id+7:id]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutexvar_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	id := idx[i+4:i]*8
+	dst[i+7:i] := a[id+7:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutexvar_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	id := idx[i+4:i]*8
+	IF k[j]
+		dst[i+7:i] := a[id+7:id]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutexvar_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="a" type="__m256i"/>
+	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	id := idx[i+4:i]*8
+	IF k[j]
+		dst[i+7:i] := a[id+7:id]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_permutexvar_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	id := idx[i+3:i]*8
+	dst[i+7:i] := a[id+7:id]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_permutexvar_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="src" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	id := idx[i+3:i]*8
+	IF k[j]
+		dst[i+7:i] := a[id+7:id]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_permutexvar_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="a" type="__m128i"/>
+	<description>Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	id := idx[i+3:i]*8
+	IF k[j]
+		dst[i+7:i] := a[id+7:id]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+
+
+
+
+
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutex2var_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	off := 8*idx[i+5:i]
+	dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermi2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutex2var_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+5:i]
+		dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermt2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask2_permutex2var_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+5:i]
+		dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermi2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutex2var_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask64"/>
+	<parameter varname="a" type="__m512i"/>
+	<parameter varname="idx" type="__m512i"/>
+	<parameter varname="b" type="__m512i"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+5:i]
+		dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="vpermi2b"/>
+	<instruction name="vpermt2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutex2var_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	off := 8*idx[i+4:i]
+	dst[i+7:i] := idx[i+6] ? b[off+5:off] : a[off+7:off]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutex2var_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+4:i]
+		dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermt2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask2_permutex2var_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+4:i]
+		dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutex2var_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask32"/>
+	<parameter varname="a" type="__m256i"/>
+	<parameter varname="idx" type="__m256i"/>
+	<parameter varname="b" type="__m256i"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+4:i]
+		dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="vpermi2b"/>
+	<instruction name="vpermt2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_permutex2var_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	off := 8*idx[i+3:i]
+	dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_permutex2var_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+3:i]
+		dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermt2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask2_permutex2var_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+3:i]
+		dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_permutex2var_epi8">
+	<CPUID>AVX512VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<parameter varname="k" type="__mmask16"/>
+	<parameter varname="a" type="__m128i"/>
+	<parameter varname="idx" type="__m128i"/>
+	<parameter varname="b" type="__m128i"/>
+	<description>Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+3:i]
+		dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="vpermi2b"/>
+	<instruction name="vpermt2b"/>
+	<header>immintrin.h</header>
+</intrinsic>
+</intrinsics_list>
diff --git a/library/stdarch/crates/stdsimd-test/Cargo.toml b/library/stdarch/crates/stdsimd-test/Cargo.toml
deleted file mode 100644
index b60a09677b6..00000000000
--- a/library/stdarch/crates/stdsimd-test/Cargo.toml
+++ /dev/null
@@ -1,20 +0,0 @@
-[package]
-name = "stdsimd-test"
-version = "0.1.0"
-authors = ["Alex Crichton <alex@alexcrichton.com>"]
-
-[dependencies]
-assert-instr-macro = { path = "../assert-instr-macro" }
-simd-test-macro = { path = "../simd-test-macro" }
-cc = "1.0"
-lazy_static = "1.0"
-rustc-demangle = "0.1.8"
-cfg-if = "0.1"
-
-[target.wasm32-unknown-unknown.dependencies]
-wasm-bindgen = "0.2.47"
-js-sys = "0.3"
-console_error_panic_hook = "0.1"
-
-[features]
-default = []
diff --git a/library/stdarch/crates/stdsimd-test/src/disassembly.rs b/library/stdarch/crates/stdsimd-test/src/disassembly.rs
deleted file mode 100644
index d38f91c0f32..00000000000
--- a/library/stdarch/crates/stdsimd-test/src/disassembly.rs
+++ /dev/null
@@ -1,182 +0,0 @@
-//! Disassembly calling function for most targets.
-
-use std::{env, collections::HashSet, process::Command, str};
-use crate::Function;
-
-// Extracts the "shim" name from the `symbol`.
-fn normalize(mut symbol: &str) -> String {
-    // Remove trailing colon:
-    if symbol.ends_with(':') {
-        symbol = &symbol[..symbol.len() - 1];
-    }
-    if symbol.ends_with('>') {
-        symbol = &symbol[..symbol.len() - 1];
-    }
-    if let Some(idx) = symbol.find('<') {
-        symbol = &symbol[idx + 1..];
-    }
-
-    let mut symbol = rustc_demangle::demangle(symbol).to_string();
-    symbol = match symbol.rfind("::h") {
-        Some(i) => symbol[..i].to_string(),
-        None => symbol.to_string(),
-    };
-
-    // Remove Rust paths
-    if let Some(last_colon) = symbol.rfind(':') {
-        symbol = (&symbol[last_colon + 1..]).to_string();
-    }
-
-    // Normalize to no leading underscore to handle platforms that may
-    // inject extra ones in symbol names.
-    while symbol.starts_with('_') {
-        symbol.remove(0);
-    }
-    symbol
-}
-
-pub(crate) fn disassemble_myself() -> HashSet<Function> {
-    let me = env::current_exe().expect("failed to get current exe");
-
-    let disassembly = if cfg!(target_arch = "x86_64")
-        && cfg!(target_os = "windows")
-        && cfg!(target_env = "msvc")
-    {
-        let mut cmd = cc::windows_registry::find(
-            "x86_64-pc-windows-msvc",
-            "dumpbin.exe",
-        ).expect("failed to find `dumpbin` tool");
-        let output = cmd
-            .arg("/DISASM")
-            .arg(&me)
-            .output()
-            .expect("failed to execute dumpbin");
-        println!(
-            "{}\n{}",
-            output.status,
-            String::from_utf8_lossy(&output.stderr)
-        );
-        assert!(output.status.success());
-        String::from_utf8(output.stdout)
-    } else if cfg!(target_os = "windows") {
-        panic!("disassembly unimplemented")
-    } else if cfg!(target_os = "macos") {
-        let output = Command::new("otool")
-            .arg("-vt")
-            .arg(&me)
-            .output()
-            .expect("failed to execute otool");
-        println!(
-            "{}\n{}",
-            output.status,
-            String::from_utf8_lossy(&output.stderr)
-        );
-        assert!(output.status.success());
-
-        String::from_utf8(output.stdout)
-    } else {
-        let objdump =
-            env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string());
-        let output = Command::new(objdump.clone())
-            .arg("--disassemble")
-            .arg(&me)
-            .output()
-            .unwrap_or_else(|_| panic!(
-                "failed to execute objdump. OBJDUMP={}",
-                objdump
-            ));
-        println!(
-            "{}\n{}",
-            output.status,
-            String::from_utf8_lossy(&output.stderr)
-        );
-        assert!(output.status.success());
-
-        String::from_utf8(output.stdout)
-    }.expect("failed to convert to utf8");
-
-    parse(&disassembly)
-}
-
-fn parse(output: &str) -> HashSet<Function> {
-    let mut lines = output.lines();
-
-    for line in output.lines().take(100) {
-        println!("{}", line);
-    }
-
-    let mut functions = HashSet::new();
-    let mut cached_header = None;
-    while let Some(header) = cached_header.take().or_else(|| lines.next()) {
-        if !header.ends_with(':') || !header.contains("stdsimd_test_shim") {
-            continue
-        }
-        let symbol = normalize(header);
-        let mut instructions = Vec::new();
-        while let Some(instruction) = lines.next() {
-            if instruction.ends_with(':') {
-                cached_header = Some(instruction);
-                break;
-            }
-            if instruction.is_empty() {
-                cached_header = None;
-                break;
-            }
-            let parts = if cfg!(target_os = "macos") {
-                // Each line of instructions should look like:
-                //
-                //      $addr    $instruction...
-                instruction
-                    .split_whitespace()
-                    .skip(1)
-                    .map(std::string::ToString::to_string)
-                    .collect::<Vec<String>>()
-            } else if cfg!(target_env = "msvc") {
-                // Each line looks like:
-                //
-                // >  $addr: ab cd ef     $instr..
-                // >         00 12          # this line os optional
-                if instruction.starts_with("       ") {
-                    continue;
-                }
-                instruction
-                    .split_whitespace()
-                    .skip(1)
-                    .skip_while(|s| {
-                        s.len() == 2 && usize::from_str_radix(s, 16).is_ok()
-                    }).map(std::string::ToString::to_string)
-                    .skip_while(|s| *s == "lock") // skip x86-specific prefix
-                    .collect::<Vec<String>>()
-            } else {
-                // objdump
-                // Each line of instructions should look like:
-                //
-                //      $rel_offset: ab cd ef 00    $instruction...
-                let expected_len
-                    = if cfg!(target_arch = "arm") || cfg!(target_arch = "aarch64") {
-                        8
-                    } else {
-                        2
-                    };
-
-                instruction
-                    .split_whitespace()
-                    .skip(1)
-                    .skip_while(|s| {
-                        s.len() == expected_len
-                            && usize::from_str_radix(s, 16).is_ok()
-                    })
-                    .skip_while(|s| *s == "lock") // skip x86-specific prefix
-                    .map(std::string::ToString::to_string)
-                    .collect::<Vec<String>>()
-            };
-            instructions.push(parts.join(" "));
-        }
-        let function = Function {
-            name: symbol,
-            instrs: instructions
-        };
-        assert!(functions.insert(function));
-    }
-    functions
-}
diff --git a/library/stdarch/crates/stdsimd-test/src/lib.rs b/library/stdarch/crates/stdsimd-test/src/lib.rs
deleted file mode 100644
index 1deb3acc88b..00000000000
--- a/library/stdarch/crates/stdsimd-test/src/lib.rs
+++ /dev/null
@@ -1,178 +0,0 @@
-//! Runtime support needed for testing the stdsimd crate.
-//!
-//! This basically just disassembles the current executable and then parses the
-//! output once globally and then provides the `assert` function which makes
-//! assertions about the disassembly of a function.
-#![feature(const_str_as_bytes)]
-#![feature(const_transmute)]
-#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]
-
-extern crate assert_instr_macro;
-extern crate cc;
-#[macro_use]
-extern crate lazy_static;
-extern crate rustc_demangle;
-extern crate simd_test_macro;
-#[macro_use]
-extern crate cfg_if;
-
-pub use assert_instr_macro::*;
-pub use simd_test_macro::*;
-use std::{cmp, collections::HashSet, env, hash, str, sync::atomic::AtomicPtr};
-
-// `println!` doesn't work on wasm32 right now, so shadow the compiler's `println!`
-// macro with our own shim that redirects to `console.log`.
-#[allow(unused)]
-#[cfg(target_arch = "wasm32")]
-#[macro_export]
-macro_rules! println {
-    ($($args:tt)*) => (crate::wasm::js_console_log(&format!($($args)*)))
-}
-
-cfg_if! {
-    if #[cfg(target_arch = "wasm32")] {
-        extern crate wasm_bindgen;
-        extern crate console_error_panic_hook;
-        pub mod wasm;
-        use wasm::disassemble_myself;
-    } else {
-        mod disassembly;
-        use disassembly::disassemble_myself;
-    }
-}
-
-lazy_static! {
-    static ref DISASSEMBLY: HashSet<Function> = disassemble_myself();
-}
-
-#[derive(Debug)]
-struct Function {
-    name: String,
-    instrs: Vec<String>,
-}
-impl Function {
-    fn new(n: &str) -> Self {
-        Self {
-            name: n.to_string(),
-            instrs: Vec::new(),
-        }
-    }
-}
-
-impl cmp::PartialEq for Function {
-    fn eq(&self, other: &Self) -> bool {
-        self.name == other.name
-    }
-}
-impl cmp::Eq for Function {}
-
-impl hash::Hash for Function {
-    fn hash<H: hash::Hasher>(&self, state: &mut H) {
-        self.name.hash(state)
-    }
-}
-
-/// Main entry point for this crate, called by the `#[assert_instr]` macro.
-///
-/// This asserts that the function at `fnptr` contains the instruction
-/// `expected` provided.
-pub fn assert(_fnptr: usize, fnname: &str, expected: &str) {
-    //eprintln!("shim name: {}", fnname);
-    let function = &DISASSEMBLY
-        .get(&Function::new(fnname))
-        .unwrap_or_else(|| panic!("function \"{}\" not found in the disassembly", fnname));
-    //eprintln!("  function: {:?}", function);
-
-    let mut instrs = &function.instrs[..];
-    while instrs.last().map_or(false, |s| s == "nop") {
-        instrs = &instrs[..instrs.len() - 1];
-    }
-
-    // Look for `expected` as the first part of any instruction in this
-    // function, e.g., tzcntl in tzcntl %rax,%rax.
-    let found = instrs.iter().any(|s| s.contains(expected));
-
-    // Look for `call` instructions in the disassembly to detect whether
-    // inlining failed: all intrinsics are `#[inline(always)]`, so
-    // calling one intrinsic from another should not generate `call`
-    // instructions.
-    let inlining_failed = instrs.windows(2).any(|s| {
-        // On 32-bit x86 position independent code will call itself and be
-        // immediately followed by a `pop` to learn about the current address.
-        // Let's not take that into account when considering whether a function
-        // failed inlining something.
-        s[0].contains("call") && (!cfg!(target_arch = "x86") || s[1].contains("pop"))
-    });
-
-    let instruction_limit = std::env::var("STDSIMD_ASSERT_INSTR_LIMIT")
-        .ok()
-        .map_or_else(
-            || match expected {
-                // `cpuid` returns a pretty big aggregate structure, so exempt
-                // it from the slightly more restrictive 22 instructions below.
-                "cpuid" => 30,
-
-                // Apparently, on Windows, LLVM generates a bunch of
-                // saves/restores of xmm registers around these intstructions,
-                // which exceeds the limit of 20 below. As it seems dictated by
-                // Windows's ABI (I believe?), we probably can't do much
-                // about it.
-                "vzeroall" | "vzeroupper" if cfg!(windows) => 30,
-
-                // Intrinsics using `cvtpi2ps` are typically "composites" and
-                // in some cases exceed the limit.
-                "cvtpi2ps" => 25,
-
-                // core_arch/src/acle/simd32
-                "usad8" => 27,
-                "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29,
-
-                // Original limit was 20 instructions, but ARM DSP Intrinsics
-                // are exactly 20 instructions long. So, bump the limit to 22
-                // instead of adding here a long list of exceptions.
-                _ => 22,
-            },
-            |v| v.parse().unwrap(),
-        );
-    let probably_only_one_instruction = instrs.len() < instruction_limit;
-
-    if found && probably_only_one_instruction && !inlining_failed {
-        return;
-    }
-
-    // Help debug by printing out the found disassembly, and then panic as we
-    // didn't find the instruction.
-    println!("disassembly for {}: ", fnname,);
-    for (i, instr) in instrs.iter().enumerate() {
-        println!("\t{:2}: {}", i, instr);
-    }
-
-    if !found {
-        panic!(
-            "failed to find instruction `{}` in the disassembly",
-            expected
-        );
-    } else if !probably_only_one_instruction {
-        panic!(
-            "instruction found, but the disassembly contains too many \
-             instructions: #instructions = {} >= {} (limit)",
-            instrs.len(),
-            instruction_limit
-        );
-    } else if inlining_failed {
-        panic!(
-            "instruction found, but the disassembly contains `call` \
-             instructions, which hint that inlining failed"
-        );
-    }
-}
-
-pub fn assert_skip_test_ok(name: &str) {
-    if env::var("STDSIMD_TEST_EVERYTHING").is_err() {
-        return;
-    }
-    panic!("skipped test `{}` when it shouldn't be skipped", name);
-}
-
-// See comment in `assert-instr-macro` crate for why this exists
-pub static _DONT_DEDUP: AtomicPtr<u8> = AtomicPtr::new(b"".as_ptr() as *mut _);
diff --git a/library/stdarch/crates/stdsimd-test/src/wasm.rs b/library/stdarch/crates/stdsimd-test/src/wasm.rs
deleted file mode 100644
index 36664061bbd..00000000000
--- a/library/stdarch/crates/stdsimd-test/src/wasm.rs
+++ /dev/null
@@ -1,88 +0,0 @@
-//! Disassembly calling function for `wasm32` targets.
-use wasm_bindgen::prelude::*;
-
-use crate::Function;
-use std::collections::HashSet;
-
-#[wasm_bindgen(module = "child_process")]
-extern "C" {
-    #[wasm_bindgen(js_name = execFileSync)]
-    fn exec_file_sync(cmd: &str, args: &js_sys::Array, opts: &js_sys::Object) -> Buffer;
-}
-
-#[wasm_bindgen(module = "buffer")]
-extern "C" {
-    type Buffer;
-    #[wasm_bindgen(method, js_name = toString)]
-    fn to_string(this: &Buffer) -> String;
-}
-
-#[wasm_bindgen]
-extern "C" {
-    #[wasm_bindgen(js_namespace = require)]
-    fn resolve(module: &str) -> String;
-    #[wasm_bindgen(js_namespace = console, js_name = log)]
-    pub fn js_console_log(s: &str);
-}
-
-pub(crate) fn disassemble_myself() -> HashSet<Function> {
-    use std::path::Path;
-    ::console_error_panic_hook::set_once();
-    // Our wasm module in the wasm-bindgen test harness is called
-    // "wasm-bindgen-test_bg". When running in node this is actually a shim JS
-    // file. Ask node where that JS file is, and then we use that with a wasm
-    // extension to find the wasm file itself.
-    let js_shim = resolve("wasm-bindgen-test_bg");
-    let js_shim = Path::new(&js_shim).with_extension("wasm");
-
-    // Execute `wasm2wat` synchronously, waiting for and capturing all of its
-    // output. Note that we pass in a custom `maxBuffer` parameter because we're
-    // generating a ton of output that needs to be buffered.
-    let args = js_sys::Array::new();
-    args.push(&js_shim.display().to_string().into());
-    args.push(&"--enable-simd".into());
-    let opts = js_sys::Object::new();
-    js_sys::Reflect::set(&opts, &"maxBuffer".into(), &(200 * 1024 * 1024).into())
-        .unwrap();
-    let output = exec_file_sync("wasm2wat", &args, &opts).to_string();
-
-    let mut ret: HashSet<Function> = HashSet::new();
-    let mut lines = output.lines().map(|s| s.trim());
-    while let Some(line) = lines.next() {
-        // If this isn't a function, we don't care about it.
-        if !line.starts_with("(func ") {
-            continue;
-        }
-
-        let mut function = Function {
-            name: String::new(),
-            instrs: Vec::new(),
-        };
-
-        // Empty functions will end in `))` so there's nothing to do, otherwise
-        // we'll have a bunch of following lines which are instructions.
-        //
-        // Lines that have an imbalanced `)` mark the end of a function.
-        if !line.ends_with("))") {
-            while let Some(line) = lines.next() {
-                function.instrs.push(line.to_string());
-                if !line.starts_with("(") && line.ends_with(")") {
-                    break;
-                }
-            }
-        }
-        // The second element here split on whitespace should be the name of
-        // the function, skipping the type/params/results
-        function.name = line.split_whitespace().nth(1).unwrap().to_string();
-        if function.name.starts_with("$") {
-            function.name = function.name[1..].to_string()
-        }
-
-        if !function.name.contains("stdsimd_test_shim") {
-            continue;
-        }
-
-        assert!(ret.insert(function));
-    }
-    return ret;
-}
diff --git a/library/stdarch/crates/stdsimd-verify/.gitattributes b/library/stdarch/crates/stdsimd-verify/.gitattributes
deleted file mode 100644
index 621fdea6f7d..00000000000
--- a/library/stdarch/crates/stdsimd-verify/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-*.xml binary
diff --git a/library/stdarch/crates/stdsimd-verify/Cargo.toml b/library/stdarch/crates/stdsimd-verify/Cargo.toml
deleted file mode 100644
index 9d3d951723f..00000000000
--- a/library/stdarch/crates/stdsimd-verify/Cargo.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-[package]
-name = "stdsimd-verify"
-version = "0.1.0"
-authors = ["Alex Crichton <alex@alexcrichton.com>"]
-edition = "2018"
-
-[dependencies]
-proc-macro2 = "0.4"
-quote = "0.6"
-syn = { version = "0.15", features = ["full"] }
-
-[lib]
-proc-macro = true
-test = false
-
-[dev-dependencies]
-serde = { version = "1.0", features = ['derive'] }
-serde-xml-rs = "0.3"
-html5ever = "0.23.0"
diff --git a/library/stdarch/crates/stdsimd-verify/arm-intrinsics.html b/library/stdarch/crates/stdsimd-verify/arm-intrinsics.html
deleted file mode 100644
index ac246c6bae2..00000000000
--- a/library/stdarch/crates/stdsimd-verify/arm-intrinsics.html
+++ /dev/null
@@ -1,93399 +0,0 @@
-
-
-<!DOCTYPE html>
-<html class="page no-js" lang="en">
-<head>
-    <script>
-        if ((window.location.href.indexOf('<') || window.location.href.indexOf('>')) > -1) {
-            window.location.href = window.location.href.replace(/<.*>/g, '');            
-        }
-    </script>
-
-    <title>Technologies | NEON Intrinsics Reference – Arm Developer</title>
-    
-<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<meta http-equiv="X-UA-Compatible" content="IE=Edge">
-<meta content="en" http-equiv="content-language">
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
-<meta name="author" content="Arm Ltd.">
-<meta name="description" content="All the NEON intrinsics reference in an interactive page.">
-<meta name="keywords" content="">
-<meta content="Copyright &#169; 1995-2018 Arm Limited (or its affiliates). All rights reserved." name="copyright">
-<meta name="apple-mobile-web-app-capable" content="yes">
-<meta name="msapplication-config" content="https://developer.arm.com:443/shared/common/img/favicon/browserconfig.xml">
-<meta name="msapplication-TileColor" content="#2b5797">
-<meta name="msapplication-TileImage" content="https://developer.arm.com:443/shared/common/img/favicon/mstile-144x144.png">
-<meta name="theme-color" content="#ffffff">
-<meta name="server" content="ARMGPCD2" />
-
-<meta property="og:title" content="Technologies | NEON Intrinsics Reference – Arm Developer">
-<meta property="og:description" content="All the NEON intrinsics reference in an interactive page.">
-<meta property="og:image" content="https://developer.arm.com:443">
-<meta property="og:site_name" content="ARM Developer">
-<meta property="og:url" content="https://developer.arm.com/technologies/neon/intrinsics">
-<meta property="og:type" content="website">
-<meta property="og:locale" content="en">
-
-<meta property="article:author" content="Arm Ltd.">
-<meta property="article:publisher" content="Arm Ltd.">
-
-<meta name="twitter:card" content="summary">
-<meta name="twitter:site" content="ARM Developer">
-<meta name="twitter:title" content="Technologies | NEON Intrinsics Reference – Arm Developer">
-<meta name="twitter:description" content="All the NEON intrinsics reference in an interactive page.">
-<meta name="twitter:image" content="https://developer.arm.com:443">
-<meta name="twitter:url" content="https://developer.arm.com/technologies/neon/intrinsics">
-
-<meta itemprop="name" content="Technologies | NEON Intrinsics Reference – Arm Developer">
-<meta itemprop="description" content="All the NEON intrinsics reference in an interactive page.">
-<meta itemprop="image" content="https://developer.arm.com:443">
-
-
-    
-
-    <link rel="stylesheet" type="text/css" href="/shared/developer.arm.com/css/app.css?v=D41D8CD98F00B204E9800998ECF8427E" />
-
-    
-
-<link rel="apple-touch-icon" sizes="57x57" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
-<link rel="apple-touch-icon" sizes="60x60" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
-<link rel="apple-touch-icon" sizes="72x72" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
-<link rel="apple-touch-icon" sizes="76x76" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
-<link rel="apple-touch-icon" sizes="114x114" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
-<link rel="apple-touch-icon" sizes="120x120" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
-<link rel="apple-touch-icon" sizes="144x144" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
-<link rel="apple-touch-icon" sizes="152x152" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
-<link rel="apple-touch-icon" sizes="180x180" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
-<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/favicon-32x32.png?v=2.29.0.0" sizes="32x32" />
-<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/favicon-48x48.png?v=2.29.0.0" sizes="48x48" />
-<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/android-chrome-192x192.png?v=2.29.0.0" sizes="192x192" />
-<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/android-chrome-256x256.png?v=2.29.0.0" sizes="256x256" />
-<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/favicon-16x16.png?v=2.29.0.0" sizes="16x16" />
-<link rel="shortcut icon" type="image/ico" href="https://developer.arm.com/shared/common/img/favicon/favicon.ico?v=2.29.0.0" />
-<link rel="manifest" href="https://developer.arm.com/shared/common/img/favicon/manifest.json?v=2.29.0.0" />
-
-    <link rel="search" type="application/opensearchdescription+xml" title="ARM Developer" href="/opensearch.xml"/>
-
-    
-    
-
-
-<!-- Google Tag Manager -->
-<script>
-(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
-new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
-j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
-'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
-    })(window, document, 'script', 'dataLayer', 'GTM-K25LQR');
-</script>
-<!-- End Google Tag Manager -->
-
-
-</head>
-<body id="">
-    
-    
-
-<noscript>
-    <iframe src="https://www.googletagmanager.com/ns.html?id=GTM-K25LQR" height="0" width="0" style="display:none;visibility:hidden"></iframe>
-</noscript>
-
-
-    
-    <div class="c-feedback-message-container u-no-print"><style>
-/* Docs top margin fix */
-#content.arm-docs .section-wrapper h1 {
-    padding-top: 0 !important;
-}
-#content.arm-docs .section-wrapper {
-    margin-top: 2em;
-}
-</style>
-<style>
-    .top-bar-section .has-dropdown ul.dropdown a.active {
-      box-shadow: none;
-    }
-</style>
-<div id="modal-welcome" class="reveal-modal" data-reveal>
-  <p class="lead">ARM’s developer website includes documentation, tutorials, support resources and more.</p>
-  <p>Over the next few months we will be adding more developer resources and documentation for all the products and technologies that ARM provides.</p>
-  <a class="close-reveal-modal button" style="position: relative; float: right; color: white; font-size: 1em;">Close</a>
-</div>
-
-<!-- Hot fix for accordion icons -->
-<style>
-.accordion .accordion-navigation > a .accordion-icon:before {
-    content: "\f196" !important;
-}
-.accordion .accordion-navigation.active > a .accordion-icon:before {
-    content: "\f147" !important;
-}
-.accordion .accordion-navigation > a .accordion-icon {
-    font-size: 1em !important;
-    vertical-align: 0 !important;
-}
-</style></div>
-
-    
-    
-
-    
-    
-
-<div class="c-skip-navigation u-no-print">
-    <a href="#content" aria-label="Clik here to skip to Main Content">Skip to Main Content</a>
-    <a href="#footer" aria-label="Clik here to skip to Footer Navigation">Skip to Footer Navigation</a>
-</div>
-
-<div class="c-unsupported-browser-message u-no-print text-center old-ie-version">
-    <p>
-        <strong>
-            Sorry, your browser is not supported. <a href="https://whatbrowser.org/" target="_blank"> We recommend upgrading your browser</a>.
-            We have done our best to make all the documentation and resources available on old versions of Internet Explorer, but vector image support and the layout may not be optimal. Technical documentation is available as a PDF Download.
-        </strong>
-    </p>
-</div>
-
-<noscript>
-    <div class="js-disabled-message u-no-print text-center">
-        <p>
-            <strong>JavaScript seems to be disabled in your browser.</strong><br />
-            You must have JavaScript enabled in your browser to utilize the functionality of this website.
-        </p>
-    </div>
-</noscript>
-
-
-<div class="c-notifications-wrapper">
-
-    
-    <style>
-        .c-notifications-wrapper {
-            background-color: #e5eceb;
-            position: relative;
-            z-index: 99;
-        }
-    </style>
-
-    
-    
-
-
-    
-    <script>
-        (function() {
-            function setHeight() {
-                var $notification = document.querySelector('.c-notification');
-                if (!$notification) return;
-                var computedStyles = getComputedStyle($notification);
-                var height = computedStyles.getPropertyValue('height');
-                var $parent = $notification.parentElement;
-                if ($parent) $parent.style.height = height;
-            }
-            function getCookie(cname) {
-                var name = cname + '=';
-                var decodedCookie = decodeURIComponent(document.cookie);
-                var ca = decodedCookie.split(';');
-                for(var i = 0; i <ca.length; i++) {
-                    var c = ca[i];
-                    while (c.charAt(0) == ' ') {
-                        c = c.substring(1);
-                    }
-                    if (c.indexOf(name) == 0) {
-                        return c.substring(name.length, c.length);
-                    }
-                }
-                return '';
-            }                   
-            var $closeNotification = document.querySelector('.c-notification__close');
-            var $notificationWrapper = document.querySelector('.c-notifications-wrapper');
-            if (!$notificationWrapper) return;
-            if (!$closeNotification) return;
-            $closeNotification.onclick = function() {    
-                $notificationWrapper.style.display = 'none';
-                var expiryDate = new Date();
-                expiryDate.setTime(expiryDate.getTime() + (3650 * 24 * 60 * 60 * 1000));
-                document.cookie ='acceptSesameCookie=true; expires=' + expiryDate + '; domain=.arm.com; path=/';
-            };
-            var acceptCookie = getCookie('acceptSesameCookie');
-            if (acceptCookie) {
-                $notificationWrapper.style.display = 'none';
-            }
-            window.addEventListener('resize', setHeight);
-            setHeight(); 
-        })();
-    </script>
-</div>
-
-
-<header class="c-header u-no-print" role="banner">
-    
-    <div class="arm-global-menu-wrapper">
-        <div class="arm-global-menu">
-
-            
-            <nav class="top-bar js-mobile-navigation" data-topbar="" role="navigation">
-
-                
-                <div class="global-menu">
- 
-                    
-                    <ul class="title-area">
-
-                        
-                        <li class="navigation-dropdown">
-                            <span class="navigation-dropdown-label">
-                                <a href="/">
-                                    <span>
-                                        <img src="/shared/developer.arm.com/img/arm-developer.svg" alt="ARM Developer" />
-                                    </span>
-                                    <i class="fa fa-caret-down"></i>
-                                </a>
-                            </span>
-                            <ul class="navigation-dropdown-list">
-    <li class="navigation-dropdown-list-item">
-        <a href="/" title="Home"><span><i class="fa fa-home"></i> Home</span></a>
-    </li>
-    <li class="navigation-dropdown-list-divider"><span></span></li>    
-        <li class="navigation-dropdown-list-item">
-            <a href="https://developer.arm.com/embedded" title="Embedded Software Development"><span>Embedded Software Development</span></a>
-        </li>
-        <li class="navigation-dropdown-list-item">
-            <a href="https://developer.arm.com/open-source" title="Linux &amp; Open Source"><span>Linux &amp; Open Source</span></a>
-        </li>
-        <li class="navigation-dropdown-list-item">
-            <a href="https://www.arm.com/resources/education" title="Education"><span>Education</span></a>
-        </li>
-        <li class="navigation-dropdown-list-item">
-            <a href="https://www.arm.com/resources/research" title="Research"><span>Research</span></a>
-        </li>
-        <li class="navigation-dropdown-list-item">
-            <a href="https://developer.arm.com/graphics" title="Graphics &amp; Multimedia Development"><span>Graphics &amp; Multimedia Development</span></a>
-        </li>
-        <li class="navigation-dropdown-list-item">
-            <a href="https://developer.arm.com/soc" title="SoC Design"><span>SoC Design</span></a>
-        </li>
-        <li class="navigation-dropdown-list-item">
-            <a href="https://developer.arm.com/hpc" title="High Performance Computing"><span>High Performance Computing</span></a>
-        </li>
-</ul>
-
-                        </li>
-
-                        
-                        <li class="menu">
-    <ul>
-            <li><a class="underline" href="https://developer.arm.com/products" title="Products">Products</a></li>
-            <li><a class="underline" href="https://developer.arm.com/solutions" title="Solutions">Solutions</a></li>
-            <li><a class="underline" href="https://developer.arm.com/technologies" title="Technologies">Technologies</a></li>
-            <li><a class="underline" href="https://developer.arm.com/support" title="Support">Support</a></li>
-    </ul>
-</li>
-
-
-                        
-                        <li class="search js-search">
-                                <div id="global-search-box">
-                                    <!-- When customizing this component, ensure to use "Coveo.$" instead of the regular jQuery "$" to
-     avoid any conflicts with Sitecore's Page Editor/Experience Editor.  -->
-
-<div>
-    <link rel="stylesheet" href="/Coveo/css/CoveoFullSearchNewDesign.css" />
-    <link rel="stylesheet" href="/Coveo/css/CoveoComponent.css" />
-    <link href="/shared/developer.arm.com/css/search.css" rel="stylesheet" />
-
-    <script type="text/javascript" src="/Coveo/js/CoveoJsSearch.WithDependencies.min.js"></script>
-    <script type="text/javascript" src="/Coveo/js/CoveoForSitecorePolyfills.js"></script>
-    <script type="text/javascript" src="/Coveo/js/CoveoForSitecore.js"></script>
-
-</div>
-<script src="/Coveo/js/CoveoForSitecoreOmniboxResultListFixCursor.js"></script>
-
-
-
-
-    <div>
-        <script type="text/javascript" src="/Coveo/js/cultures/en.js"></script>
-        <script type="text/javascript">
-            Coveo.$(function () {
-              
-                var searchbox = Coveo.$('#globalsearchbox');
-                if (typeof (CoveoForSitecore) !== 'undefined') {
-                    CoveoForSitecore.componentsOptions = {"analyticsCustomMetadata" : {"sitename" : "website" , "siteName" : "website" , "pageFullPath" : "/sitecore/content/developer/technologies/neon/intrinsics"},"analyticsEndpointUri" : "/coveo/rest/v6/analytics" , "boostExpressions" : "" , "clientLanguageFieldName" : "@z95xlanguage" , "clientLanguageName" : "en" , "defaultSortType" : "" , "defaultSortField" : "" , "defaultSortCriteriaNoSpace" : "Relevancy" , "defaultSortCriteriaLowercase" : "relevancy" , "enableClientSideLogging" : false,"externalCollections" : [],"externalSources" : [],"filterResultsOnCurrentCulture" : true,"filterExpression" : "NOT @templateid==(\"adb6ca4f-03ef-4f47-b9ac-9ce2ba53ff97\",\"fe5dd826-48c6-436d-b87a-7c4210c7413b\") AND @haslayout == 1 AND @z95xpath == \"110d559fdea542ea9c1c8a5df7e70ef9\"" , "id" : "coveoa8e6c6d9" , "indexSourceName" : "Coveo_web_index - DEVELOPER" , "isEditingInPageEditor" : false,"isPreviewingInPageEditor" : false,"isPreviewingInPageEditorWithSimulatedDevice" : false,"latestVersionFieldName" : "@z95xlatestversion" , "pageFullPath" : "/sitecore/content/developer/technologies/neon/intrinsics" , "pageName" : "intrinsics" , "restEndpointUri" : "/coveo/rest" , "searchboxPlaceholderText" : "" , "sendToSitecoreAnalytics" : false,"sitecoreItemId" : "eae5ffe5-224d-49c2-b491-93cad803b595" , "sitecoreItemUri" : "sitecore://web/{EAE5FFE5-224D-49C2-B491-93CAD803B595}?lang=en\u0026ver=2" , "siteName" : "website" , "searchRedirectionItemName" : "search" , "searchRedirectionUrl" : "/search" , "keepOmniboxSuggestionsProvidersDefaultOrdering" : false};
-                    searchbox.coveoForSitecore('initSearchbox',
-                        CoveoForSitecore.componentsOptions);
-                } else {
-                    Coveo.SearchEndpoint.endpoints["default"] = new Coveo.SearchEndpoint({"restUri" : "/coveo/rest" , "queryStringArguments" : {"sitecoreItemUri" : "sitecore://web/{EAE5FFE5-224D-49C2-B491-93CAD803B595}?lang=en\u0026ver=2" , "siteName" : "website"}});
-                    searchbox.coveo('initSearchbox',
-                        '/search');
-                }
-
-                 Coveo.$('#globalsearchbox').on("afterInitialization", function() {
-                var queryBox = Coveo.$(this).find("input.CoveoQueryBox");
-                if (!queryBox) {
-                    return;
-                }
-                queryBox.attr("placeholder", '');
-                queryBox.attr("aria-label", 'Search');
-            });
-                
-            });
-        </script>
-
-        <div id="globalsearchbox"
-                          >
-            <div class="CoveoAnalytics"
-                 data-anonymous="True"
-                 data-endpoint="/coveo/rest/coveoanalytics"
-                 data-search-hub="search"
-                 data-send-to-cloud="True">
-            </div>
-            <div class="CoveoSearchbox"
-                 data-auto-focus="True"
-                 data-enable-lowercase-operators="False"
-                 data-enable-partial-match="False"
-                 data-partial-match-keywords="5"
-                 data-partial-match-threshold="50%"
-                 data-enable-question-marks="False"
-                 data-enable-wildcards="False"
-                 data-enable-omnibox="true"
-                 data-omnibox-timeout="500"
-                 data-enable-field-addon="False"
-                 data-enable-simple-field-addon="False"
-                 data-enable-top-query-addon="False"
-                 data-enable-reveal-query-suggest-addon="False"
-                 data-enable-query-extension-addon="False"
-                 ></div>
-
-
-            
-
-
-
-
-    <script type="text/javascript">
-        
-       
-    </script>
-    <span class="CoveoForSitecoreOmniboxResultList"
-          data-header-title='Suggested Results'
-          data-query-expression=''
-          data-number-of-results='10'
-          data-result-template-id='globalsearchresults'>
-    </span>
-    <script id="globalsearchresults" class="result-template" type="text/x-underscore-template">
-        <div>
-            <a href="{{=clickUri}}" class="coveo-title">{{=title?highlight(title, titleHighlights):''}}</a>
-        </div>
-    </script>
-
-
-        </div>
-    </div>
-
-                                </div>
-                        </li>
-                
-                        
-                        
-
-                        
-                        <li class="menu-icon c-mobile-toggle c-mobile-search-toggle js-mobile-toggle" data-toggle="search">
-                            <a href="#" tabindex="0" title="Search" aria-label="Search" aria-haspopup="true">
-                                <i class="fa fa-search"></i><span class="sr-only">Search</span>
-                            </a>
-                        </li>
-
-                        
-                        
-
-                        
-                        <li class="menu-icon c-mobile-toggle c-mobile-navigation-toggle js-mobile-toggle" data-toggle="navigation">
-                            <a href="#" tabindex="0" title="Mobile Navigation" aria-label="Mobile Navigation" aria-haspopup="true">
-                                <i class="fa fa-bars"></i><span class="sr-only">Mobile Navigation</span>
-                            </a>
-                        </li>
-                
-                        
-                        <li class="developer-user-menu">
-                            
-
-<div id="c-65110123-4209-4daa-a4a3-335c8f0caa10" class="c-user-menu" role="navigation" aria-label="User menu">
-    <ul class="c-user-menu__items c-navigation__items c-user-menu__root" role="menubar">
-    <li class="c-user-menu__item" aria-haspopup="true" role="menuitem">
-        <a class="c-user-menu__toggle" title="User Menu" tabindex="0" aria-expanded="false">
-            <i class="fa fa-user" aria-hidden="true"></i>
-        </a>
-        <ul class="c-user-menu__items c-navigation__items c-user-menu__section is-aligned-right" aria-hidden="true" role="menu" tabindex="-1">
-                    <li class="c-user-menu__item" aria-haspopup="false" role="menuitem">
-                        <a href="/login?returnUrl=/technologies/neon/intrinsics" title="Login" tabindex="0" aria-expanded="false">Login</a>
-                    </li>
-                    <li class="c-user-menu__item" aria-haspopup="false" role="menuitem">
-                        <a href="/register?returnUrl=/technologies/neon/intrinsics" title="Register" tabindex="0" aria-expanded="false">Register</a>
-                    </li>
-        </ul>
-    </li>
-</ul>
-
-</div>
-
-<link rel="stylesheet" href="https://developer.arm.com/shared/arm-account/css/modules/user-menu.css?v=2.29.0.0" />
-
-                        </li>
-                    </ul>
-                    
-
-                </div>
-                
-
-                
-                <section class="arm-mobile-navigation top-bar-section hide-for-large-up">
-                    <ul class="right">
-    <li class="has-dropdown">
-        <a href="#">Main Menu</a>
-        <ul class="dropdown">
-                <li><a href="/products">Products</a></li>
-                <li><a href="/solutions">Solutions</a></li>
-                <li><a href="/technologies">Technologies</a></li>
-                <li><a href="/support">Support</a></li>
-        </ul>
-    </li>
-    <ul class="left"><li><a class="" href="/technologies">Overview</a></li><li><a class="" href="/technologies/big-little">big.LITTLE</a></li><li><a class="active" href="/technologies/neon">NEON</a></li><li><a class="" href="/embedded/cmsis">CMSIS</a></li><li class="has-dropdown"><a class="" href="/technologies/dsp">DSP</a><ul class="dropdown"><li><a class="" href="/technologies/dsp">DSP Overview</a></li><li><a class="" href="/technologies/dsp/arm-dsp-ecosystem-partners">Arm DSP ecosystem partners</a></li><li><a class="" href="/technologies/dsp/dsp-for-cortex-r">DSP for Cortex-R</a></li><li><a class="" href="/technologies/dsp/dsp-for-cortex-m">DSP for Cortex-M</a></li><li><a class="" href="/technologies/neon">NEON for Cortex-A and Cortex-R52</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm">Machine Learning on Arm</a><ul class="dropdown"><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm/developer-material">Developer material</a><ul class="dropdown"><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides">How-to guides</a><ul class="dropdown"><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-onnx">Configuring the Arm NN SDK build environment for ONNX</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-tensorflow">Configuring the Arm NN SDK build environment for TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-tensorflow-lite">Configuring the Arm NN SDK build environment for TensorFlow Lite</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-caffe-model-on-openmv-using-cmsis-nn">Deploying a Caffe Model on OpenMV using CMSIS-NN</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-cloud-based-ml-for-speech-transcription">Deploying cloud-based ML for speech transcription</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-caffe">Configuring the Arm NN SDK build environment for Caffe</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-tensorflow-mnist-model-on-arm-nn">Deploying a TensorFlow MNIST model on Arm NN</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-caffe-mnist-model-using-the-arm-nn-sdk">Deploying a Caffe MNIST model using the Arm NN SDK</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/profiling-alexnet-on-raspberry-pi-and-hikey-960-with-the-compute-library">Profiling AlexNet on Raspberry Pi and HiKey 960 with the Compute Library</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/optimizing-neural-networks-for-mobile-and-embedded-devices-with-tensorflow">Optimizing neural networks for mobile and embedded devices with TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/running-alexnet-on-raspberry-pi-with-compute-library">Running AlexNet on Raspberry Pi with Compute Library</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/improving-your-machine-learning-workflow-using-the-arm-nn-sdk">Improving your machine learning workflow using the Arm NN SDK</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/quantizing-neural-networks-to-8-bit-using-tensorflow">Quantizing neural networks to 8-bit using TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/cross-compiling-arm-nn-for-the-raspberry-pi-and-tensorflow">Cross-compiling Arm NN for the Raspberry Pi and TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/teach-your-raspberry-pi-yeah-world">Teach your Raspberry Pi - Yeah world</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/teach-your-raspberry-pi-multi-gesture">Teach your Raspberry Pi - Multi-gesture</a></li></ul></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/webinars">Webinars</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/software-for-machine-learning-on-arm">Software for Machine Learning on Arm</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/research-papers">Research papers</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/white-papers">White papers</a></li></ul></li><li><a class="" href="/technologies/machine-learning-on-arm/ecosystem-partners">Ecosystem partners</a></li><li><a class="" href="https://community.arm.com/p/ml-blog">Blog</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/security-on-arm">Security on Arm</a><ul class="dropdown"><li><a class="" href="/technologies/security-on-arm/arm-technologies">Arm technologies</a></li><li><a class="" href="/technologies/security-on-arm/how-do-i-implement">How do I implement</a></li><li><a class="" href="/technologies/security-on-arm/types-of-attack-and-counter-measures">Types of attack and counter-measures</a></li><li class="has-dropdown"><a class="" href="/technologies/security-on-arm/arm-security-developer-community">Arm Security Developer Community</a><ul class="dropdown"><li><a class="" href="/technologies/security-on-arm/arm-security-developer-community/arm-security-partners">Arm Security Partners</a></li></ul></li></ul></li><li><a class="" href="/technologies/uefi-drivers">UEFI Drivers</a></li><li><a class="" href="/technologies/dynamiq">DynamIQ</a></li><li class="has-dropdown"><a class="" href="/technologies/graphics-technologies">Graphics Technologies</a><ul class="dropdown"><li><a class="" href="/technologies/graphics-technologies/adaptive-scalable-texture-compression">Adaptive Scalable Texture Compression</a></li><li><a class="" href="/technologies/graphics-technologies/arm-frame-buffer-compression">Arm Frame Buffer Compression</a></li><li><a class="" href="/technologies/graphics-technologies/transaction-elimination">Transaction Elimination</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/trustzone">TrustZone</a><ul class="dropdown"><li><a class="" href="/technologies/trustzone/webinar-how-to-implement-a-secure-iot-system-on-armv8-m">Webinar - How to implement a secure IoT system on Armv8-M</a></li></ul></li><li><a class="" href="/technologies/compute-library">Compute Library</a></li><li><a class="" href="/technologies/floating-point">Floating Point</a></li></ul>
-</ul>
-
-                </section>
-                
-
-            </nav>
-            
-
-            <script>
-                (function() {
-                    var $globalMenu = document.querySelector('.arm-global-menu');
-                    if (!$globalMenu) return;
-                    var computedStyles = getComputedStyle($globalMenu);
-                    var height = computedStyles.getPropertyValue('height');
-                    var $parent = $globalMenu.parentElement;
-                    if ($parent) $parent.style.height = height;
-                })();
-            </script>
-
-        </div>
-    </div>
-
-    
-<div class="main-header">
-    <div class="row">
-        <div class="large-12 columns">
-            <ul class="breadcrumbs">
-                <li>
-                    <a href="/" title="Home">Home</a>
-                </li>
-                <li>
-                    <a href="/technologies" title="Technologies">Technologies</a>
-                </li>
-                <li>
-                    <a href="/technologies/neon" title="NEON">NEON</a>
-                </li>
-                <li class="current">NEON Intrinsics Reference</li>
-</ul>
-
-
-<h1>NEON Intrinsics Reference</h1>        </div>
-    </div>
-</div>
-
-
-<div class="c-contextual-navigation-wrapper show-for-large-up">
-    <style>
-        .c-contextual-navigation-wrapper,
-        .c-contextual-navigation {
-            min-height: 55px;
-            position: relative;
-        }
-        .c-contextual-navigation {
-            width: 100%;
-        }
-        .c-contextual-navigation.is-stuck {
-            position: fixed;
-            z-index: 999;
-        }
-    </style>
-    <div id="middle" class="c-contextual-navigation full-width-nav">
-        <div class="contain-to-grid">
-            <nav class="top-bar mid-navigation" data-topbar="" role="navigation">
-                <ul class="title-area">
-                    <li class="name"></li>
-                    <li class="toggle-topbar menu-icon"><a href="#"><span></span></a></li>
-                </ul>
-                <section class="top-bar-section mid-nav">
-<ul class="left"><li><a class="" href="/technologies">Overview</a></li><li><a class="" href="/technologies/big-little">big.LITTLE</a></li><li><a class="active" href="/technologies/neon">NEON</a></li><li><a class="" href="/embedded/cmsis">CMSIS</a></li><li class="has-dropdown"><a class="" href="/technologies/dsp">DSP</a><ul class="dropdown"><li><a class="" href="/technologies/dsp">DSP Overview</a></li><li><a class="" href="/technologies/dsp/arm-dsp-ecosystem-partners">Arm DSP ecosystem partners</a></li><li><a class="" href="/technologies/dsp/dsp-for-cortex-r">DSP for Cortex-R</a></li><li><a class="" href="/technologies/dsp/dsp-for-cortex-m">DSP for Cortex-M</a></li><li><a class="" href="/technologies/neon">NEON for Cortex-A and Cortex-R52</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm">Machine Learning on Arm</a><ul class="dropdown"><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm/developer-material">Developer material</a><ul class="dropdown"><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides">How-to guides</a><ul class="dropdown"><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-onnx">Configuring the Arm NN SDK build environment for ONNX</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-tensorflow">Configuring the Arm NN SDK build environment for TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-tensorflow-lite">Configuring the Arm NN SDK build environment for TensorFlow Lite</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-caffe-model-on-openmv-using-cmsis-nn">Deploying a Caffe Model on OpenMV using CMSIS-NN</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-cloud-based-ml-for-speech-transcription">Deploying cloud-based ML for speech transcription</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-caffe">Configuring the Arm NN SDK build environment for Caffe</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-tensorflow-mnist-model-on-arm-nn">Deploying a TensorFlow MNIST model on Arm NN</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-caffe-mnist-model-using-the-arm-nn-sdk">Deploying a Caffe MNIST model using the Arm NN SDK</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/profiling-alexnet-on-raspberry-pi-and-hikey-960-with-the-compute-library">Profiling AlexNet on Raspberry Pi and HiKey 960 with the Compute Library</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/optimizing-neural-networks-for-mobile-and-embedded-devices-with-tensorflow">Optimizing neural networks for mobile and embedded devices with TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/running-alexnet-on-raspberry-pi-with-compute-library">Running AlexNet on Raspberry Pi with Compute Library</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/improving-your-machine-learning-workflow-using-the-arm-nn-sdk">Improving your machine learning workflow using the Arm NN SDK</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/quantizing-neural-networks-to-8-bit-using-tensorflow">Quantizing neural networks to 8-bit using TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/cross-compiling-arm-nn-for-the-raspberry-pi-and-tensorflow">Cross-compiling Arm NN for the Raspberry Pi and TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/teach-your-raspberry-pi-yeah-world">Teach your Raspberry Pi - Yeah world</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/teach-your-raspberry-pi-multi-gesture">Teach your Raspberry Pi - Multi-gesture</a></li></ul></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/webinars">Webinars</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/software-for-machine-learning-on-arm">Software for Machine Learning on Arm</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/research-papers">Research papers</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/white-papers">White papers</a></li></ul></li><li><a class="" href="/technologies/machine-learning-on-arm/ecosystem-partners">Ecosystem partners</a></li><li><a class="" href="https://community.arm.com/p/ml-blog">Blog</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/security-on-arm">Security on Arm</a><ul class="dropdown"><li><a class="" href="/technologies/security-on-arm/arm-technologies">Arm technologies</a></li><li><a class="" href="/technologies/security-on-arm/how-do-i-implement">How do I implement</a></li><li><a class="" href="/technologies/security-on-arm/types-of-attack-and-counter-measures">Types of attack and counter-measures</a></li><li class="has-dropdown"><a class="" href="/technologies/security-on-arm/arm-security-developer-community">Arm Security Developer Community</a><ul class="dropdown"><li><a class="" href="/technologies/security-on-arm/arm-security-developer-community/arm-security-partners">Arm Security Partners</a></li></ul></li></ul></li><li><a class="" href="/technologies/uefi-drivers">UEFI Drivers</a></li><li><a class="" href="/technologies/dynamiq">DynamIQ</a></li><li class="has-dropdown"><a class="" href="/technologies/graphics-technologies">Graphics Technologies</a><ul class="dropdown"><li><a class="" href="/technologies/graphics-technologies/adaptive-scalable-texture-compression">Adaptive Scalable Texture Compression</a></li><li><a class="" href="/technologies/graphics-technologies/arm-frame-buffer-compression">Arm Frame Buffer Compression</a></li><li><a class="" href="/technologies/graphics-technologies/transaction-elimination">Transaction Elimination</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/trustzone">TrustZone</a><ul class="dropdown"><li><a class="" href="/technologies/trustzone/webinar-how-to-implement-a-secure-iot-system-on-armv8-m">Webinar - How to implement a secure IoT system on Armv8-M</a></li></ul></li><li><a class="" href="/technologies/compute-library">Compute Library</a></li><li><a class="" href="/technologies/floating-point">Floating Point</a></li></ul>                </section>
-            </nav>
-        </div>
-    </div>
-    <script>
-        (function() {
-            var $contextualNavigation;
-            var $contextualNavigationWrapper;
-
-            function setHeight() {
-                var computedStyles = getComputedStyle($contextualNavigation);
-                var height = computedStyles.getPropertyValue('height');
-                $contextualNavigationWrapper.style.height = height;
-            };
-
-            function setPosition(evt) {
-                var $globalNavigationWrapper = document.querySelector('.arm-global-menu-wrapper');
-                var $notificationsWrapper = document.querySelector('.c-notifications-wrapper');
-
-                var pageOffset = window.pageYOffset;
-
-                var globalNavigationHeight = (!!$globalNavigationWrapper) ? $globalNavigationWrapper.clientHeight : 0;
-                var notificationsHeight = (!!$notificationsWrapper) ? $notificationsWrapper.clientHeight : 0;
-
-                var sum = globalNavigationHeight + notificationsHeight;
-                if (pageOffset >= sum) {
-                    $contextualNavigation.classList.add('is-stuck');
-                    $contextualNavigation.style.top = sum + 'px';
-                } else {
-                    $contextualNavigation.classList.remove('is-stuck');
-                    $contextualNavigation.style.top = 0;
-                }
-            };
-
-            function repaint(evt) {
-                $contextualNavigation = document.querySelector('.c-contextual-navigation');
-                $contextualNavigationWrapper = $contextualNavigation.parentElement;
-                setHeight();
-                setPosition();
-            };
-
-            window.addEventListener('scroll', repaint);
-            window.addEventListener('resize', repaint);
-
-            repaint();
-        })();
-    </script>
-</div>
-</header>
-
-
-<main class="c-component c-content" id="content" role="main">
-    <!-- START ProductItemContent -->
-<div>
-    
-
-
-<div id="c-d6ca4787-3a02-4c15-91dd-387f84915495" class="o-widget c-generic-content small-text-center large-text-left"  data-widget="generic-content-variation-1">
-    <article class="row">
-        <section class="columns">
-            <h2 class="c-panel__subtitle">NEON Intrinsics</h2>
-            <p>Click on the intrinsic name to display more information about the intrinsic. To search for an intrinsic, enter the name of the intrinsic in the search box. As you type, the matching intrinsics will be displayed.</p>
-        </section>
-    </article>
-</div>
-
-
-<div id="c-5e6d165c-46c9-4b6a-aae4-2aeb4639d070" class="c-widget c-html-snippet"  data-deprecated>
-    <div class="row">
-        <div class="columns">
-            <h2></h2>
-
-<style>
-.intrinsics-search input {
-    height: 2.4rem;
-	width: 80%;
-	display: inline;
-}
-
-.intrinsics-search button {
-    width: 20%;
-    height: 2.4rem;
-	display: inline;
-}
-
-.intrinsic-accordion label,
-.intrinsic-accordion input[type="checkbox"] + label
-{
-	font-family: Consolas, monospace;
-	padding: 0.25em 0.5em;
-	font-size: 1em;
-	position: relative;
-    display: block;
-    cursor: pointer;
-    background: #EFEFEF;
-    border: none;
-	margin: 0;
-    color: #565b5b;
-}
-
-.intrinsic-accordion label b {
-	color: #009fc1;
-}
-
-.intrinsic-accordion .right {
-	float:right;
-	font-family: sans-serif;
-	font-size: 0.8em;
-}
-
-.intrinsic-accordion .intrinsic_name {
-	color: blue;
-}
-
-.intrinsic-accordion .intrinsic a {
-	color: #009fc1;
-    text-decoration: underline;
-    text-decoration-style: solid;
-    font-weight: bold;
-}
-
-.intrinsic-accordion label:hover {
-    background: #F3F3F3;
-}
-
-.intrinsic-accordion input:checked + label,
-.intrinsic-accordion input:checked + label:hover {
-    background: #CDECC5;
-}
-.intrinsic-accordion input {
-    display: none;
-}
-.intrinsic-accordion article {
-    background: rgb(255, 255, 255);
-    /*overflow: hidden;*/
-    display: none;
-    -webkit-transition: all 0.3s ease-in-out;
-    -moz-transition: all 0.3s ease-in-out;
-    -o-transition: all 0.3s ease-in-out;
-    -ms-transition: all 0.3s ease-in-out;
-    transition: all 0.3s ease-in-out;
-}
-
-.intrinsic-accordion input:checked ~ article {
-    -webkit-transition: all 0.5s ease-in-out;
-    -moz-transition: all 0.5s ease-in-out;
-    -o-transition: all 0.5s ease-in-out;
-    -ms-transition: all 0.5s ease-in-out;
-    transition: all 0.5s ease-in-out;
-	display: block;
-}
-
-.intrinsic-accordion .intrinsic {
-    border: 1px solid #ededed;
-}
-
-.intrinsic-accordion .intrinsic article {
-    margin: 1rem;
-}
-
-</style>
-
-<script>
-
-</script>
-<script>
-$(document).ready(function(){
-  $("#js-intrinsics-query").keyup(function(){
-   var bla = $('#js-intrinsics-query').val();
-   $( ".intrinsic" ).each(function(){
-        var htxt=$(this).text();
-        if (htxt.toLowerCase().indexOf(bla.toLowerCase()) > -1) {
-            $(this).show();
-        } else {
-            $(this).hide();
-        }
-    });
-  });
-});
-
-</script>
-</head>
-<body>
-<div class="row">
-  <div class="large-6 columns spacing-2 intrinsics-search">
-	<input id="js-intrinsics-query"><button class="tiny"><i class="fa fa-search"></i></button>
-  </div>
-</div>
-
-<section class="intrinsic-accordion">
-<div class="intrinsic"><input id="vadd_s8" type="checkbox"><label for="vadd_s8"><div>int8x8_t <b><b>vadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_s8" type="checkbox"><label for="vaddq_s8"><div>int8x16_t <b><b>vaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_s16" type="checkbox"><label for="vadd_s16"><div>int16x4_t <b><b>vadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_s16" type="checkbox"><label for="vaddq_s16"><div>int16x8_t <b><b>vaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_s32" type="checkbox"><label for="vadd_s32"><div>int32x2_t <b><b>vadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_s32" type="checkbox"><label for="vaddq_s32"><div>int32x4_t <b><b>vaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_s64" type="checkbox"><label for="vadd_s64"><div>int64x1_t <b><b>vadd_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_s64" type="checkbox"><label for="vaddq_s64"><div>int64x2_t <b><b>vaddq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_u8" type="checkbox"><label for="vadd_u8"><div>uint8x8_t <b><b>vadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_u8" type="checkbox"><label for="vaddq_u8"><div>uint8x16_t <b><b>vaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_u16" type="checkbox"><label for="vadd_u16"><div>uint16x4_t <b><b>vadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_u16" type="checkbox"><label for="vaddq_u16"><div>uint16x8_t <b><b>vaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_u32" type="checkbox"><label for="vadd_u32"><div>uint32x2_t <b><b>vadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_u32" type="checkbox"><label for="vaddq_u32"><div>uint32x4_t <b><b>vaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_u64" type="checkbox"><label for="vadd_u64"><div>uint64x1_t <b><b>vadd_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_u64" type="checkbox"><label for="vaddq_u64"><div>uint64x2_t <b><b>vaddq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_f32" type="checkbox"><label for="vadd_f32"><div>float32x2_t <b><b>vadd_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&amp;FP registers, writes the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fadd-vector-floating-point-add-vector">FADD</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_f32" type="checkbox"><label for="vaddq_f32"><div>float32x4_t <b><b>vaddq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&amp;FP registers, writes the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fadd-vector-floating-point-add-vector">FADD</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_f64" type="checkbox"><label for="vadd_f64"><div>float64x1_t <b><b>vadd_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&amp;FP registers, writes the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fadd-vector-floating-point-add-vector">FADD</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_f64" type="checkbox"><label for="vaddq_f64"><div>float64x2_t <b><b>vaddq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&amp;FP registers, writes the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fadd-vector-floating-point-add-vector">FADD</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddd_s64" type="checkbox"><label for="vaddd_s64"><div>int64_t <b><b>vaddd_s64</b></b> (int64_t a, int64_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddd_u64" type="checkbox"><label for="vaddd_u64"><div>uint64_t <b><b>vaddd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_s8" type="checkbox"><label for="vaddl_s8"><div>int16x8_t <b><b>vaddl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_s16" type="checkbox"><label for="vaddl_s16"><div>int32x4_t <b><b>vaddl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_s32" type="checkbox"><label for="vaddl_s32"><div>int64x2_t <b><b>vaddl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_u8" type="checkbox"><label for="vaddl_u8"><div>uint16x8_t <b><b>vaddl_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_u16" type="checkbox"><label for="vaddl_u16"><div>uint32x4_t <b><b>vaddl_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_u32" type="checkbox"><label for="vaddl_u32"><div>uint64x2_t <b><b>vaddl_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_s8" type="checkbox"><label for="vaddl_high_s8"><div>int16x8_t <b><b>vaddl_high_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_s16" type="checkbox"><label for="vaddl_high_s16"><div>int32x4_t <b><b>vaddl_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_s32" type="checkbox"><label for="vaddl_high_s32"><div>int64x2_t <b><b>vaddl_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_u8" type="checkbox"><label for="vaddl_high_u8"><div>uint16x8_t <b><b>vaddl_high_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_u16" type="checkbox"><label for="vaddl_high_u16"><div>uint32x4_t <b><b>vaddl_high_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_u32" type="checkbox"><label for="vaddl_high_u32"><div>uint64x2_t <b><b>vaddl_high_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_s8" type="checkbox"><label for="vaddw_s8"><div>int16x8_t <b><b>vaddw_s8</b></b> (int16x8_t a, int8x8_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW</a> Vd.8H,Vn.8H,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_s16" type="checkbox"><label for="vaddw_s16"><div>int32x4_t <b><b>vaddw_s16</b></b> (int32x4_t a, int16x4_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW</a> Vd.4S,Vn.4S,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_s32" type="checkbox"><label for="vaddw_s32"><div>int64x2_t <b><b>vaddw_s32</b></b> (int64x2_t a, int32x2_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW</a> Vd.2D,Vn.2D,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_u8" type="checkbox"><label for="vaddw_u8"><div>uint16x8_t <b><b>vaddw_u8</b></b> (uint16x8_t a, uint8x8_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW</a> Vd.8H,Vn.8H,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_u16" type="checkbox"><label for="vaddw_u16"><div>uint32x4_t <b><b>vaddw_u16</b></b> (uint32x4_t a, uint16x4_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW</a> Vd.4S,Vn.4S,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_u32" type="checkbox"><label for="vaddw_u32"><div>uint64x2_t <b><b>vaddw_u32</b></b> (uint64x2_t a, uint32x2_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW</a> Vd.2D,Vn.2D,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_s8" type="checkbox"><label for="vaddw_high_s8"><div>int16x8_t <b><b>vaddw_high_s8</b></b> (int16x8_t a, int8x16_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW2</a> Vd.8H,Vn.8H,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_s16" type="checkbox"><label for="vaddw_high_s16"><div>int32x4_t <b><b>vaddw_high_s16</b></b> (int32x4_t a, int16x8_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW2</a> Vd.4S,Vn.4S,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_s32" type="checkbox"><label for="vaddw_high_s32"><div>int64x2_t <b><b>vaddw_high_s32</b></b> (int64x2_t a, int32x4_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW2</a> Vd.2D,Vn.2D,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_u8" type="checkbox"><label for="vaddw_high_u8"><div>uint16x8_t <b><b>vaddw_high_u8</b></b> (uint16x8_t a, uint8x16_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW2</a> Vd.8H,Vn.8H,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_u16" type="checkbox"><label for="vaddw_high_u16"><div>uint32x4_t <b><b>vaddw_high_u16</b></b> (uint32x4_t a, uint16x8_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW2</a> Vd.4S,Vn.4S,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_u32" type="checkbox"><label for="vaddw_high_u32"><div>uint64x2_t <b><b>vaddw_high_u32</b></b> (uint64x2_t a, uint32x4_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW2</a> Vd.2D,Vn.2D,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_s8" type="checkbox"><label for="vhadd_s8"><div>int8x8_t <b><b>vhadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_s8" type="checkbox"><label for="vhaddq_s8"><div>int8x16_t <b><b>vhaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_s16" type="checkbox"><label for="vhadd_s16"><div>int16x4_t <b><b>vhadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_s16" type="checkbox"><label for="vhaddq_s16"><div>int16x8_t <b><b>vhaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_s32" type="checkbox"><label for="vhadd_s32"><div>int32x2_t <b><b>vhadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_s32" type="checkbox"><label for="vhaddq_s32"><div>int32x4_t <b><b>vhaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_u8" type="checkbox"><label for="vhadd_u8"><div>uint8x8_t <b><b>vhadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_u8" type="checkbox"><label for="vhaddq_u8"><div>uint8x16_t <b><b>vhaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_u16" type="checkbox"><label for="vhadd_u16"><div>uint16x4_t <b><b>vhadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_u16" type="checkbox"><label for="vhaddq_u16"><div>uint16x8_t <b><b>vhaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_u32" type="checkbox"><label for="vhadd_u32"><div>uint32x2_t <b><b>vhadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_u32" type="checkbox"><label for="vhaddq_u32"><div>uint32x4_t <b><b>vhaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_s8" type="checkbox"><label for="vrhadd_s8"><div>int8x8_t <b><b>vrhadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_s8" type="checkbox"><label for="vrhaddq_s8"><div>int8x16_t <b><b>vrhaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_s16" type="checkbox"><label for="vrhadd_s16"><div>int16x4_t <b><b>vrhadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_s16" type="checkbox"><label for="vrhaddq_s16"><div>int16x8_t <b><b>vrhaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_s32" type="checkbox"><label for="vrhadd_s32"><div>int32x2_t <b><b>vrhadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_s32" type="checkbox"><label for="vrhaddq_s32"><div>int32x4_t <b><b>vrhaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_u8" type="checkbox"><label for="vrhadd_u8"><div>uint8x8_t <b><b>vrhadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_u8" type="checkbox"><label for="vrhaddq_u8"><div>uint8x16_t <b><b>vrhaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_u16" type="checkbox"><label for="vrhadd_u16"><div>uint16x4_t <b><b>vrhadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_u16" type="checkbox"><label for="vrhaddq_u16"><div>uint16x8_t <b><b>vrhaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_u32" type="checkbox"><label for="vrhadd_u32"><div>uint32x2_t <b><b>vrhadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_u32" type="checkbox"><label for="vrhaddq_u32"><div>uint32x4_t <b><b>vrhaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_s8" type="checkbox"><label for="vqadd_s8"><div>int8x8_t <b><b>vqadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_s8" type="checkbox"><label for="vqaddq_s8"><div>int8x16_t <b><b>vqaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_s16" type="checkbox"><label for="vqadd_s16"><div>int16x4_t <b><b>vqadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_s16" type="checkbox"><label for="vqaddq_s16"><div>int16x8_t <b><b>vqaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_s32" type="checkbox"><label for="vqadd_s32"><div>int32x2_t <b><b>vqadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_s32" type="checkbox"><label for="vqaddq_s32"><div>int32x4_t <b><b>vqaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_s64" type="checkbox"><label for="vqadd_s64"><div>int64x1_t <b><b>vqadd_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_s64" type="checkbox"><label for="vqaddq_s64"><div>int64x2_t <b><b>vqaddq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_u8" type="checkbox"><label for="vqadd_u8"><div>uint8x8_t <b><b>vqadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_u8" type="checkbox"><label for="vqaddq_u8"><div>uint8x16_t <b><b>vqaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_u16" type="checkbox"><label for="vqadd_u16"><div>uint16x4_t <b><b>vqadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_u16" type="checkbox"><label for="vqaddq_u16"><div>uint16x8_t <b><b>vqaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_u32" type="checkbox"><label for="vqadd_u32"><div>uint32x2_t <b><b>vqadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_u32" type="checkbox"><label for="vqaddq_u32"><div>uint32x4_t <b><b>vqaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_u64" type="checkbox"><label for="vqadd_u64"><div>uint64x1_t <b><b>vqadd_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_u64" type="checkbox"><label for="vqaddq_u64"><div>uint64x2_t <b><b>vqaddq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddb_s8" type="checkbox"><label for="vqaddb_s8"><div>int8_t <b><b>vqaddb_s8</b></b> (int8_t a, int8_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Bd,Bn,Bm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
-b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddh_s16" type="checkbox"><label for="vqaddh_s16"><div>int16_t <b><b>vqaddh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Hd,Hn,Hm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqadds_s32" type="checkbox"><label for="vqadds_s32"><div>int32_t <b><b>vqadds_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddd_s64" type="checkbox"><label for="vqaddd_s64"><div>int64_t <b><b>vqaddd_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddb_u8" type="checkbox"><label for="vqaddb_u8"><div>uint8_t <b><b>vqaddb_u8</b></b> (uint8_t a, uint8_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Bd,Bn,Bm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
-b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddh_u16" type="checkbox"><label for="vqaddh_u16"><div>uint16_t <b><b>vqaddh_u16</b></b> (uint16_t a, uint16_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Hd,Hn,Hm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqadds_u32" type="checkbox"><label for="vqadds_u32"><div>uint32_t <b><b>vqadds_u32</b></b> (uint32_t a, uint32_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddd_u64" type="checkbox"><label for="vqaddd_u64"><div>uint64_t <b><b>vqaddd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer sum;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    sum = element1 + element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadd_s8" type="checkbox"><label for="vuqadd_s8"><div>int8x8_t <b><b>vuqadd_s8</b></b> (int8x8_t a, uint8x8_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddq_s8" type="checkbox"><label for="vuqaddq_s8"><div>int8x16_t <b><b>vuqaddq_s8</b></b> (int8x16_t a, uint8x16_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadd_s16" type="checkbox"><label for="vuqadd_s16"><div>int16x4_t <b><b>vuqadd_s16</b></b> (int16x4_t a, uint16x4_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddq_s16" type="checkbox"><label for="vuqaddq_s16"><div>int16x8_t <b><b>vuqaddq_s16</b></b> (int16x8_t a, uint16x8_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadd_s32" type="checkbox"><label for="vuqadd_s32"><div>int32x2_t <b><b>vuqadd_s32</b></b> (int32x2_t a, uint32x2_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddq_s32" type="checkbox"><label for="vuqaddq_s32"><div>int32x4_t <b><b>vuqaddq_s32</b></b> (int32x4_t a, uint32x4_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadd_s64" type="checkbox"><label for="vuqadd_s64"><div>int64x1_t <b><b>vuqadd_s64</b></b> (int64x1_t a, uint64x1_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddq_s64" type="checkbox"><label for="vuqaddq_s64"><div>int64x2_t <b><b>vuqaddq_s64</b></b> (int64x2_t a, uint64x2_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddb_s8" type="checkbox"><label for="vuqaddb_s8"><div>int8_t <b><b>vuqaddb_s8</b></b> (int8_t a, uint8_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Bd,Bn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bd <br />
-b &rarr; Bn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddh_s16" type="checkbox"><label for="vuqaddh_s16"><div>int16_t <b><b>vuqaddh_s16</b></b> (int16_t a, uint16_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Hd,Hn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hd <br />
-b &rarr; Hn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadds_s32" type="checkbox"><label for="vuqadds_s32"><div>int32_t <b><b>vuqadds_s32</b></b> (int32_t a, uint32_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
-b &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddd_s64" type="checkbox"><label for="vuqaddd_s64"><div>int64_t <b><b>vuqaddd_s64</b></b> (int64_t a, uint64_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadd_u8" type="checkbox"><label for="vsqadd_u8"><div>uint8x8_t <b><b>vsqadd_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddq_u8" type="checkbox"><label for="vsqaddq_u8"><div>uint8x16_t <b><b>vsqaddq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadd_u16" type="checkbox"><label for="vsqadd_u16"><div>uint16x4_t <b><b>vsqadd_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddq_u16" type="checkbox"><label for="vsqaddq_u16"><div>uint16x8_t <b><b>vsqaddq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadd_u32" type="checkbox"><label for="vsqadd_u32"><div>uint32x2_t <b><b>vsqadd_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddq_u32" type="checkbox"><label for="vsqaddq_u32"><div>uint32x4_t <b><b>vsqaddq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadd_u64" type="checkbox"><label for="vsqadd_u64"><div>uint64x1_t <b><b>vsqadd_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddq_u64" type="checkbox"><label for="vsqaddq_u64"><div>uint64x2_t <b><b>vsqaddq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddb_u8" type="checkbox"><label for="vsqaddb_u8"><div>uint8_t <b><b>vsqaddb_u8</b></b> (uint8_t a, int8_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Bd,Bn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bd <br />
-b &rarr; Bn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddh_u16" type="checkbox"><label for="vsqaddh_u16"><div>uint16_t <b><b>vsqaddh_u16</b></b> (uint16_t a, int16_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Hd,Hn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hd <br />
-b &rarr; Hn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadds_u32" type="checkbox"><label for="vsqadds_u32"><div>uint32_t <b><b>vsqadds_u32</b></b> (uint32_t a, int32_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
-b &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddd_u64" type="checkbox"><label for="vsqaddd_u64"><div>uint64_t <b><b>vsqaddd_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-integer op1;
-integer op2;
-boolean sat;
-
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
-    if sat then FPSR.QC = '1';
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_s16" type="checkbox"><label for="vaddhn_s16"><div>int8x8_t <b><b>vaddhn_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.8B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_s32" type="checkbox"><label for="vaddhn_s32"><div>int16x4_t <b><b>vaddhn_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.4H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_s64" type="checkbox"><label for="vaddhn_s64"><div>int32x2_t <b><b>vaddhn_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.2S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_u16" type="checkbox"><label for="vaddhn_u16"><div>uint8x8_t <b><b>vaddhn_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.8B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_u32" type="checkbox"><label for="vaddhn_u32"><div>uint16x4_t <b><b>vaddhn_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.4H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_u64" type="checkbox"><label for="vaddhn_u64"><div>uint32x2_t <b><b>vaddhn_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.2S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_s16" type="checkbox"><label for="vaddhn_high_s16"><div>int8x16_t <b><b>vaddhn_high_s16</b></b> (int8x8_t r, int16x8_t a, int16x8_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.16B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_s32" type="checkbox"><label for="vaddhn_high_s32"><div>int16x8_t <b><b>vaddhn_high_s32</b></b> (int16x4_t r, int32x4_t a, int32x4_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.8H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_s64" type="checkbox"><label for="vaddhn_high_s64"><div>int32x4_t <b><b>vaddhn_high_s64</b></b> (int32x2_t r, int64x2_t a, int64x2_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.4S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_u16" type="checkbox"><label for="vaddhn_high_u16"><div>uint8x16_t <b><b>vaddhn_high_u16</b></b> (uint8x8_t r, uint16x8_t a, uint16x8_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.16B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_u32" type="checkbox"><label for="vaddhn_high_u32"><div>uint16x8_t <b><b>vaddhn_high_u32</b></b> (uint16x4_t r, uint32x4_t a, uint32x4_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.8H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_u64" type="checkbox"><label for="vaddhn_high_u64"><div>uint32x4_t <b><b>vaddhn_high_u64</b></b> (uint32x2_t r, uint64x2_t a, uint64x2_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.4S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_s16" type="checkbox"><label for="vraddhn_s16"><div>int8x8_t <b><b>vraddhn_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.8B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_s32" type="checkbox"><label for="vraddhn_s32"><div>int16x4_t <b><b>vraddhn_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.4H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_s64" type="checkbox"><label for="vraddhn_s64"><div>int32x2_t <b><b>vraddhn_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.2S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_u16" type="checkbox"><label for="vraddhn_u16"><div>uint8x8_t <b><b>vraddhn_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.8B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_u32" type="checkbox"><label for="vraddhn_u32"><div>uint16x4_t <b><b>vraddhn_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.4H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_u64" type="checkbox"><label for="vraddhn_u64"><div>uint32x2_t <b><b>vraddhn_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.2S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_s16" type="checkbox"><label for="vraddhn_high_s16"><div>int8x16_t <b><b>vraddhn_high_s16</b></b> (int8x8_t r, int16x8_t a, int16x8_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.16B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_s32" type="checkbox"><label for="vraddhn_high_s32"><div>int16x8_t <b><b>vraddhn_high_s32</b></b> (int16x4_t r, int32x4_t a, int32x4_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.8H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_s64" type="checkbox"><label for="vraddhn_high_s64"><div>int32x4_t <b><b>vraddhn_high_s64</b></b> (int32x2_t r, int64x2_t a, int64x2_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.4S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_u16" type="checkbox"><label for="vraddhn_high_u16"><div>uint8x16_t <b><b>vraddhn_high_u16</b></b> (uint8x8_t r, uint16x8_t a, uint16x8_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.16B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_u32" type="checkbox"><label for="vraddhn_high_u32"><div>uint16x8_t <b><b>vraddhn_high_u32</b></b> (uint16x4_t r, uint32x4_t a, uint32x4_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.8H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_u64" type="checkbox"><label for="vraddhn_high_u64"><div>uint32x4_t <b><b>vraddhn_high_u64</b></b> (uint32x2_t r, uint64x2_t a, uint64x2_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.4S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_s8" type="checkbox"><label for="vmul_s8"><div>int8x8_t <b><b>vmul_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_s8" type="checkbox"><label for="vmulq_s8"><div>int8x16_t <b><b>vmulq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_s16" type="checkbox"><label for="vmul_s16"><div>int16x4_t <b><b>vmul_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_s16" type="checkbox"><label for="vmulq_s16"><div>int16x8_t <b><b>vmulq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_s32" type="checkbox"><label for="vmul_s32"><div>int32x2_t <b><b>vmul_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_s32" type="checkbox"><label for="vmulq_s32"><div>int32x4_t <b><b>vmulq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_u8" type="checkbox"><label for="vmul_u8"><div>uint8x8_t <b><b>vmul_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_u8" type="checkbox"><label for="vmulq_u8"><div>uint8x16_t <b><b>vmulq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_u16" type="checkbox"><label for="vmul_u16"><div>uint16x4_t <b><b>vmul_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_u16" type="checkbox"><label for="vmulq_u16"><div>uint16x8_t <b><b>vmulq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_u32" type="checkbox"><label for="vmul_u32"><div>uint32x2_t <b><b>vmul_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_u32" type="checkbox"><label for="vmulq_u32"><div>uint32x4_t <b><b>vmulq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_f32" type="checkbox"><label for="vmul_f32"><div>float32x2_t <b><b>vmul_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_f32" type="checkbox"><label for="vmulq_f32"><div>float32x4_t <b><b>vmulq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_p8" type="checkbox"><label for="vmul_p8"><div>poly8x8_t <b><b>vmul_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Polynomial multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply. This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmul-polynomial-multiply">PMUL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_p8" type="checkbox"><label for="vmulq_p8"><div>poly8x16_t <b><b>vmulq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Polynomial multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply. This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmul-polynomial-multiply">PMUL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_f64" type="checkbox"><label for="vmul_f64"><div>float64x1_t <b><b>vmul_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_f64" type="checkbox"><label for="vmulq_f64"><div>float64x2_t <b><b>vmulq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_f32" type="checkbox"><label for="vmulx_f32"><div>float32x2_t <b><b>vmulx_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_f32" type="checkbox"><label for="vmulxq_f32"><div>float32x4_t <b><b>vmulxq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_f64" type="checkbox"><label for="vmulx_f64"><div>float64x1_t <b><b>vmulx_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_f64" type="checkbox"><label for="vmulxq_f64"><div>float64x2_t <b><b>vmulxq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxs_f32" type="checkbox"><label for="vmulxs_f32"><div>float32_t <b><b>vmulxs_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxd_f64" type="checkbox"><label for="vmulxd_f64"><div>float64_t <b><b>vmulxd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_lane_f32" type="checkbox"><label for="vmulx_lane_f32"><div>float32x2_t <b><b>vmulx_lane_f32</b></b> (float32x2_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_lane_f32" type="checkbox"><label for="vmulxq_lane_f32"><div>float32x4_t <b><b>vmulxq_lane_f32</b></b> (float32x4_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_lane_f64" type="checkbox"><label for="vmulx_lane_f64"><div>float64x1_t <b><b>vmulx_lane_f64</b></b> (float64x1_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-v &rarr; Vm.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_lane_f64" type="checkbox"><label for="vmulxq_lane_f64"><div>float64x2_t <b><b>vmulxq_lane_f64</b></b> (float64x2_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2D,Vn.2D,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-v &rarr; Vm.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxs_lane_f32" type="checkbox"><label for="vmulxs_lane_f32"><div>float32_t <b><b>vmulxs_lane_f32</b></b> (float32_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Sd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxd_lane_f64" type="checkbox"><label for="vmulxd_lane_f64"><div>float64_t <b><b>vmulxd_lane_f64</b></b> (float64_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-v &rarr; Vm.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_laneq_f32" type="checkbox"><label for="vmulx_laneq_f32"><div>float32x2_t <b><b>vmulx_laneq_f32</b></b> (float32x2_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_laneq_f32" type="checkbox"><label for="vmulxq_laneq_f32"><div>float32x4_t <b><b>vmulxq_laneq_f32</b></b> (float32x4_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_laneq_f64" type="checkbox"><label for="vmulx_laneq_f64"><div>float64x1_t <b><b>vmulx_laneq_f64</b></b> (float64x1_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-v &rarr; Vm.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_laneq_f64" type="checkbox"><label for="vmulxq_laneq_f64"><div>float64x2_t <b><b>vmulxq_laneq_f64</b></b> (float64x2_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2D,Vn.2D,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-v &rarr; Vm.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxs_laneq_f32" type="checkbox"><label for="vmulxs_laneq_f32"><div>float32_t <b><b>vmulxs_laneq_f32</b></b> (float32_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Sd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxd_laneq_f64" type="checkbox"><label for="vmulxd_laneq_f64"><div>float64_t <b><b>vmulxd_laneq_f64</b></b> (float64_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-v &rarr; Vm.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    if mulx_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdiv_f32" type="checkbox"><label for="vdiv_f32"><div>float32x2_t <b><b>vdiv_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point divide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&amp;FP register, by the floating-point values in the corresponding elements in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fdiv-vector-floating-point-divide-vector">FDIV</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPDiv.3" title="function: bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPDiv</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdivq_f32" type="checkbox"><label for="vdivq_f32"><div>float32x4_t <b><b>vdivq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point divide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&amp;FP register, by the floating-point values in the corresponding elements in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fdiv-vector-floating-point-divide-vector">FDIV</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPDiv.3" title="function: bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPDiv</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdiv_f64" type="checkbox"><label for="vdiv_f64"><div>float64x1_t <b><b>vdiv_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point divide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&amp;FP register, by the floating-point values in the corresponding elements in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fdiv-vector-floating-point-divide-vector">FDIV</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPDiv.3" title="function: bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPDiv</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdivq_f64" type="checkbox"><label for="vdivq_f64"><div>float64x2_t <b><b>vdivq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point divide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&amp;FP register, by the floating-point values in the corresponding elements in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fdiv-vector-floating-point-divide-vector">FDIV</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPDiv.3" title="function: bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPDiv</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_s8" type="checkbox"><label for="vmla_s8"><div>int8x8_t <b><b>vmla_s8</b></b> (int8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_s8" type="checkbox"><label for="vmlaq_s8"><div>int8x16_t <b><b>vmlaq_s8</b></b> (int8x16_t a, int8x16_t b, int8x16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_s16" type="checkbox"><label for="vmla_s16"><div>int16x4_t <b><b>vmla_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_s16" type="checkbox"><label for="vmlaq_s16"><div>int16x8_t <b><b>vmlaq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_s32" type="checkbox"><label for="vmla_s32"><div>int32x2_t <b><b>vmla_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_s32" type="checkbox"><label for="vmlaq_s32"><div>int32x4_t <b><b>vmlaq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_u8" type="checkbox"><label for="vmla_u8"><div>uint8x8_t <b><b>vmla_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_u8" type="checkbox"><label for="vmlaq_u8"><div>uint8x16_t <b><b>vmlaq_u8</b></b> (uint8x16_t a, uint8x16_t b, uint8x16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_u16" type="checkbox"><label for="vmla_u16"><div>uint16x4_t <b><b>vmla_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_u16" type="checkbox"><label for="vmlaq_u16"><div>uint16x8_t <b><b>vmlaq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_u32" type="checkbox"><label for="vmla_u32"><div>uint32x2_t <b><b>vmla_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_u32" type="checkbox"><label for="vmlaq_u32"><div>uint32x4_t <b><b>vmlaq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_f32" type="checkbox"><label for="vmla_f32"><div>float32x2_t <b><b>vmla_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Floating-point multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 1
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
-b &rarr; N/A <br />
-c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_f32" type="checkbox"><label for="vmlaq_f32"><div>float32x4_t <b><b>vmlaq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Floating-point multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 3
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
-b &rarr; N/A <br />
-c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_f64" type="checkbox"><label for="vmla_f64"><div>float64x1_t <b><b>vmla_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Floating-point multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c[i]) for i = 0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
-b &rarr; N/A <br />
-c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_f64" type="checkbox"><label for="vmlaq_f64"><div>float64x2_t <b><b>vmlaq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Floating-point multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 1
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
-b &rarr; N/A <br />
-c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_s8" type="checkbox"><label for="vmlal_s8"><div>int16x8_t <b><b>vmlal_s8</b></b> (int16x8_t a, int8x8_t b, int8x8_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_s16" type="checkbox"><label for="vmlal_s16"><div>int32x4_t <b><b>vmlal_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_s32" type="checkbox"><label for="vmlal_s32"><div>int64x2_t <b><b>vmlal_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_u8" type="checkbox"><label for="vmlal_u8"><div>uint16x8_t <b><b>vmlal_u8</b></b> (uint16x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_u16" type="checkbox"><label for="vmlal_u16"><div>uint32x4_t <b><b>vmlal_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_u32" type="checkbox"><label for="vmlal_u32"><div>uint64x2_t <b><b>vmlal_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_s8" type="checkbox"><label for="vmlal_high_s8"><div>int16x8_t <b><b>vmlal_high_s8</b></b> (int16x8_t a, int8x16_t b, int8x16_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_s16" type="checkbox"><label for="vmlal_high_s16"><div>int32x4_t <b><b>vmlal_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_s32" type="checkbox"><label for="vmlal_high_s32"><div>int64x2_t <b><b>vmlal_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_u8" type="checkbox"><label for="vmlal_high_u8"><div>uint16x8_t <b><b>vmlal_high_u8</b></b> (uint16x8_t a, uint8x16_t b, uint8x16_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_u16" type="checkbox"><label for="vmlal_high_u16"><div>uint32x4_t <b><b>vmlal_high_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_u32" type="checkbox"><label for="vmlal_high_u32"><div>uint64x2_t <b><b>vmlal_high_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_s8" type="checkbox"><label for="vmls_s8"><div>int8x8_t <b><b>vmls_s8</b></b> (int8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_s8" type="checkbox"><label for="vmlsq_s8"><div>int8x16_t <b><b>vmlsq_s8</b></b> (int8x16_t a, int8x16_t b, int8x16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_s16" type="checkbox"><label for="vmls_s16"><div>int16x4_t <b><b>vmls_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_s16" type="checkbox"><label for="vmlsq_s16"><div>int16x8_t <b><b>vmlsq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_s32" type="checkbox"><label for="vmls_s32"><div>int32x2_t <b><b>vmls_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_s32" type="checkbox"><label for="vmlsq_s32"><div>int32x4_t <b><b>vmlsq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_u8" type="checkbox"><label for="vmls_u8"><div>uint8x8_t <b><b>vmls_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_u8" type="checkbox"><label for="vmlsq_u8"><div>uint8x16_t <b><b>vmlsq_u8</b></b> (uint8x16_t a, uint8x16_t b, uint8x16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_u16" type="checkbox"><label for="vmls_u16"><div>uint16x4_t <b><b>vmls_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_u16" type="checkbox"><label for="vmlsq_u16"><div>uint16x8_t <b><b>vmlsq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_u32" type="checkbox"><label for="vmls_u32"><div>uint32x2_t <b><b>vmls_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_u32" type="checkbox"><label for="vmlsq_u32"><div>uint32x4_t <b><b>vmlsq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_f32" type="checkbox"><label for="vmls_f32"><div>float32x2_t <b><b>vmls_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 1
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
-b &rarr; N/A <br />
-c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_f32" type="checkbox"><label for="vmlsq_f32"><div>float32x4_t <b><b>vmlsq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 3
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
-b &rarr; N/A <br />
-c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_f64" type="checkbox"><label for="vmls_f64"><div>float64x1_t <b><b>vmls_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c[i]) for i = 0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
-b &rarr; N/A <br />
-c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_f64" type="checkbox"><label for="vmlsq_f64"><div>float64x2_t <b><b>vmlsq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 1
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
-b &rarr; N/A <br />
-c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_s8" type="checkbox"><label for="vmlsl_s8"><div>int16x8_t <b><b>vmlsl_s8</b></b> (int16x8_t a, int8x8_t b, int8x8_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_s16" type="checkbox"><label for="vmlsl_s16"><div>int32x4_t <b><b>vmlsl_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_s32" type="checkbox"><label for="vmlsl_s32"><div>int64x2_t <b><b>vmlsl_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_u8" type="checkbox"><label for="vmlsl_u8"><div>uint16x8_t <b><b>vmlsl_u8</b></b> (uint16x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_u16" type="checkbox"><label for="vmlsl_u16"><div>uint32x4_t <b><b>vmlsl_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_u32" type="checkbox"><label for="vmlsl_u32"><div>uint64x2_t <b><b>vmlsl_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_s8" type="checkbox"><label for="vmlsl_high_s8"><div>int16x8_t <b><b>vmlsl_high_s8</b></b> (int16x8_t a, int8x16_t b, int8x16_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_s16" type="checkbox"><label for="vmlsl_high_s16"><div>int32x4_t <b><b>vmlsl_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_s32" type="checkbox"><label for="vmlsl_high_s32"><div>int64x2_t <b><b>vmlsl_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_u8" type="checkbox"><label for="vmlsl_high_u8"><div>uint16x8_t <b><b>vmlsl_high_u8</b></b> (uint16x8_t a, uint8x16_t b, uint8x16_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_u16" type="checkbox"><label for="vmlsl_high_u16"><div>uint32x4_t <b><b>vmlsl_high_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_u32" type="checkbox"><label for="vmlsl_high_u32"><div>uint64x2_t <b><b>vmlsl_high_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_f32" type="checkbox"><label for="vfma_f32"><div>float32x2_t <b><b>vfma_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_f32" type="checkbox"><label for="vfmaq_f32"><div>float32x4_t <b><b>vfmaq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfma_f64" type="checkbox"><label for="vfma_f64"><div>float64x1_t <b><b>vfma_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Floating-point fused multiply-add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add (scalar). This instruction multiplies the values of the first two SIMD&amp;FP source registers, adds the product to the value of the third SIMD&amp;FP source register, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmadd-floating-point-fused-multiply-add-scalar">FMADD</a> Dd,Dn,Dm,Da
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Da <br />
-b &rarr; Dn <br />
-c &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) result;
-bits(datasize) operanda = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[a];
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(operanda, operand1, operand2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_f64" type="checkbox"><label for="vfmaq_f64"><div>float64x2_t <b><b>vfmaq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-c &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_lane_f32" type="checkbox"><label for="vfma_lane_f32"><div>float32x2_t <b><b>vfma_lane_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_lane_f32" type="checkbox"><label for="vfmaq_lane_f32"><div>float32x4_t <b><b>vfmaq_lane_f32</b></b> (float32x4_t a, float32x4_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_lane_f64" type="checkbox"><label for="vfma_lane_f64"><div>float64x1_t <b><b>vfma_lane_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-v &rarr; Vm.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_lane_f64" type="checkbox"><label for="vfmaq_lane_f64"><div>float64x2_t <b><b>vfmaq_lane_f64</b></b> (float64x2_t a, float64x2_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2D,Vn.2D,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-v &rarr; Vm.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmas_lane_f32" type="checkbox"><label for="vfmas_lane_f32"><div>float32_t <b><b>vfmas_lane_f32</b></b> (float32_t a, float32_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Sd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
-b &rarr; Sn <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmad_lane_f64" type="checkbox"><label for="vfmad_lane_f64"><div>float64_t <b><b>vfmad_lane_f64</b></b> (float64_t a, float64_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-v &rarr; Vm.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_laneq_f32" type="checkbox"><label for="vfma_laneq_f32"><div>float32x2_t <b><b>vfma_laneq_f32</b></b> (float32x2_t a, float32x2_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_laneq_f32" type="checkbox"><label for="vfmaq_laneq_f32"><div>float32x4_t <b><b>vfmaq_laneq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_laneq_f64" type="checkbox"><label for="vfma_laneq_f64"><div>float64x1_t <b><b>vfma_laneq_f64</b></b> (float64x1_t a, float64x1_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-v &rarr; Vm.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_laneq_f64" type="checkbox"><label for="vfmaq_laneq_f64"><div>float64x2_t <b><b>vfmaq_laneq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2D,Vn.2D,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-v &rarr; Vm.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmas_laneq_f32" type="checkbox"><label for="vfmas_laneq_f32"><div>float32_t <b><b>vfmas_laneq_f32</b></b> (float32_t a, float32_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Sd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
-b &rarr; Sn <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmad_laneq_f64" type="checkbox"><label for="vfmad_laneq_f64"><div>float64_t <b><b>vfmad_laneq_f64</b></b> (float64_t a, float64_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-v &rarr; Vm.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_f32" type="checkbox"><label for="vfms_f32"><div>float32x2_t <b><b>vfms_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_f32" type="checkbox"><label for="vfmsq_f32"><div>float32x4_t <b><b>vfmsq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfms_f64" type="checkbox"><label for="vfms_f64"><div>float64x1_t <b><b>vfms_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Floating-point fused multiply-subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Fused Multiply-Subtract (scalar). This instruction multiplies the values of the first two SIMD&amp;FP source registers, negates the product, adds that to the value of the third SIMD&amp;FP source register, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmsub-floating-point-fused-multiply-subtract-scalar">FMSUB</a> Dd,Dn,Dm,Da
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Da <br />
-b &rarr; Dn <br />
-c &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) result;
-bits(datasize) operanda = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[a];
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(operand1);
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(operanda, operand1, operand2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_f64" type="checkbox"><label for="vfmsq_f64"><div>float64x2_t <b><b>vfmsq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-c &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_lane_f32" type="checkbox"><label for="vfms_lane_f32"><div>float32x2_t <b><b>vfms_lane_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_lane_f32" type="checkbox"><label for="vfmsq_lane_f32"><div>float32x4_t <b><b>vfmsq_lane_f32</b></b> (float32x4_t a, float32x4_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_lane_f64" type="checkbox"><label for="vfms_lane_f64"><div>float64x1_t <b><b>vfms_lane_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-v &rarr; Vm.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_lane_f64" type="checkbox"><label for="vfmsq_lane_f64"><div>float64x2_t <b><b>vfmsq_lane_f64</b></b> (float64x2_t a, float64x2_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2D,Vn.2D,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-v &rarr; Vm.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmss_lane_f32" type="checkbox"><label for="vfmss_lane_f32"><div>float32_t <b><b>vfmss_lane_f32</b></b> (float32_t a, float32_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Sd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
-b &rarr; Sn <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsd_lane_f64" type="checkbox"><label for="vfmsd_lane_f64"><div>float64_t <b><b>vfmsd_lane_f64</b></b> (float64_t a, float64_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-v &rarr; Vm.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_laneq_f32" type="checkbox"><label for="vfms_laneq_f32"><div>float32x2_t <b><b>vfms_laneq_f32</b></b> (float32x2_t a, float32x2_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_laneq_f32" type="checkbox"><label for="vfmsq_laneq_f32"><div>float32x4_t <b><b>vfmsq_laneq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_laneq_f64" type="checkbox"><label for="vfms_laneq_f64"><div>float64x1_t <b><b>vfms_laneq_f64</b></b> (float64x1_t a, float64x1_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-v &rarr; Vm.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_laneq_f64" type="checkbox"><label for="vfmsq_laneq_f64"><div>float64x2_t <b><b>vfmsq_laneq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2D,Vn.2D,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-v &rarr; Vm.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmss_laneq_f32" type="checkbox"><label for="vfmss_laneq_f32"><div>float32_t <b><b>vfmss_laneq_f32</b></b> (float32_t a, float32_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Sd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
-b &rarr; Sn <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsd_laneq_f64" type="checkbox"><label for="vfmsd_laneq_f64"><div>float64_t <b><b>vfmsd_laneq_f64</b></b> (float64_t a, float64_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-v &rarr; Vm.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_s16" type="checkbox"><label for="vqdmulh_s16"><div>int16x4_t <b><b>vqdmulh_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_s16" type="checkbox"><label for="vqdmulhq_s16"><div>int16x8_t <b><b>vqdmulhq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_s32" type="checkbox"><label for="vqdmulh_s32"><div>int32x2_t <b><b>vqdmulh_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_s32" type="checkbox"><label for="vqdmulhq_s32"><div>int32x4_t <b><b>vqdmulhq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhh_s16" type="checkbox"><label for="vqdmulhh_s16"><div>int16_t <b><b>vqdmulhh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Hd,Hn,Hm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhs_s32" type="checkbox"><label for="vqdmulhs_s32"><div>int32_t <b><b>vqdmulhs_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_s16" type="checkbox"><label for="vqrdmulh_s16"><div>int16x4_t <b><b>vqrdmulh_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_s16" type="checkbox"><label for="vqrdmulhq_s16"><div>int16x8_t <b><b>vqrdmulhq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_s32" type="checkbox"><label for="vqrdmulh_s32"><div>int32x2_t <b><b>vqrdmulh_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_s32" type="checkbox"><label for="vqrdmulhq_s32"><div>int32x4_t <b><b>vqrdmulhq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhh_s16" type="checkbox"><label for="vqrdmulhh_s16"><div>int16_t <b><b>vqrdmulhh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Hd,Hn,Hm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhs_s32" type="checkbox"><label for="vqrdmulhs_s32"><div>int32_t <b><b>vqrdmulhs_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_s16" type="checkbox"><label for="vqdmlal_s16"><div>int32x4_t <b><b>vqdmlal_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_s32" type="checkbox"><label for="vqdmlal_s32"><div>int64x2_t <b><b>vqdmlal_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlalh_s16" type="checkbox"><label for="vqdmlalh_s16"><div>int32_t <b><b>vqdmlalh_s16</b></b> (int32_t a, int16_t b, int16_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Sd,Hn,Hm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
-b &rarr; Hn <br />
-c &rarr; Hm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlals_s32" type="checkbox"><label for="vqdmlals_s32"><div>int64_t <b><b>vqdmlals_s32</b></b> (int64_t a, int32_t b, int32_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Dd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Sn <br />
-c &rarr; Sm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_s16" type="checkbox"><label for="vqdmlal_high_s16"><div>int32x4_t <b><b>vqdmlal_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_s32" type="checkbox"><label for="vqdmlal_high_s32"><div>int64x2_t <b><b>vqdmlal_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_s16" type="checkbox"><label for="vqdmlsl_s16"><div>int32x4_t <b><b>vqdmlsl_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_s32" type="checkbox"><label for="vqdmlsl_s32"><div>int64x2_t <b><b>vqdmlsl_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlslh_s16" type="checkbox"><label for="vqdmlslh_s16"><div>int32_t <b><b>vqdmlslh_s16</b></b> (int32_t a, int16_t b, int16_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Sd,Hn,Hm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
-b &rarr; Hn <br />
-c &rarr; Hm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsls_s32" type="checkbox"><label for="vqdmlsls_s32"><div>int64_t <b><b>vqdmlsls_s32</b></b> (int64_t a, int32_t b, int32_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Dd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Sn <br />
-c &rarr; Sm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_s16" type="checkbox"><label for="vqdmlsl_high_s16"><div>int32x4_t <b><b>vqdmlsl_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_s32" type="checkbox"><label for="vqdmlsl_high_s32"><div>int64x2_t <b><b>vqdmlsl_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_s8" type="checkbox"><label for="vmull_s8"><div>int16x8_t <b><b>vmull_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_s16" type="checkbox"><label for="vmull_s16"><div>int32x4_t <b><b>vmull_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_s32" type="checkbox"><label for="vmull_s32"><div>int64x2_t <b><b>vmull_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_u8" type="checkbox"><label for="vmull_u8"><div>uint16x8_t <b><b>vmull_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_u16" type="checkbox"><label for="vmull_u16"><div>uint32x4_t <b><b>vmull_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_u32" type="checkbox"><label for="vmull_u32"><div>uint64x2_t <b><b>vmull_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_p8" type="checkbox"><label for="vmull_p8"><div>poly16x8_t <b><b>vmull_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Polynomial multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmull-pmull2-polynomial-multiply-long">PMULL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_s8" type="checkbox"><label for="vmull_high_s8"><div>int16x8_t <b><b>vmull_high_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_s16" type="checkbox"><label for="vmull_high_s16"><div>int32x4_t <b><b>vmull_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_s32" type="checkbox"><label for="vmull_high_s32"><div>int64x2_t <b><b>vmull_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_u8" type="checkbox"><label for="vmull_high_u8"><div>uint16x8_t <b><b>vmull_high_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_u16" type="checkbox"><label for="vmull_high_u16"><div>uint32x4_t <b><b>vmull_high_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_u32" type="checkbox"><label for="vmull_high_u32"><div>uint64x2_t <b><b>vmull_high_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_p8" type="checkbox"><label for="vmull_high_p8"><div>poly16x8_t <b><b>vmull_high_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Polynomial multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmull-pmull2-polynomial-multiply-long">PMULL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_s16" type="checkbox"><label for="vqdmull_s16"><div>int32x4_t <b><b>vqdmull_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_s32" type="checkbox"><label for="vqdmull_s32"><div>int64x2_t <b><b>vqdmull_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmullh_s16" type="checkbox"><label for="vqdmullh_s16"><div>int32_t <b><b>vqdmullh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Sd,Hn,Hm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulls_s32" type="checkbox"><label for="vqdmulls_s32"><div>int64_t <b><b>vqdmulls_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Dd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_s16" type="checkbox"><label for="vqdmull_high_s16"><div>int32x4_t <b><b>vqdmull_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_s32" type="checkbox"><label for="vqdmull_high_s32"><div>int64x2_t <b><b>vqdmull_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsub_s8" type="checkbox"><label for="vsub_s8"><div>int8x8_t <b><b>vsub_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_s8" type="checkbox"><label for="vsubq_s8"><div>int8x16_t <b><b>vsubq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_s16" type="checkbox"><label for="vsub_s16"><div>int16x4_t <b><b>vsub_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_s16" type="checkbox"><label for="vsubq_s16"><div>int16x8_t <b><b>vsubq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_s32" type="checkbox"><label for="vsub_s32"><div>int32x2_t <b><b>vsub_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_s32" type="checkbox"><label for="vsubq_s32"><div>int32x4_t <b><b>vsubq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_s64" type="checkbox"><label for="vsub_s64"><div>int64x1_t <b><b>vsub_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_s64" type="checkbox"><label for="vsubq_s64"><div>int64x2_t <b><b>vsubq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_u8" type="checkbox"><label for="vsub_u8"><div>uint8x8_t <b><b>vsub_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_u8" type="checkbox"><label for="vsubq_u8"><div>uint8x16_t <b><b>vsubq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_u16" type="checkbox"><label for="vsub_u16"><div>uint16x4_t <b><b>vsub_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_u16" type="checkbox"><label for="vsubq_u16"><div>uint16x8_t <b><b>vsubq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_u32" type="checkbox"><label for="vsub_u32"><div>uint32x2_t <b><b>vsub_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_u32" type="checkbox"><label for="vsubq_u32"><div>uint32x4_t <b><b>vsubq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_u64" type="checkbox"><label for="vsub_u64"><div>uint64x1_t <b><b>vsub_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_u64" type="checkbox"><label for="vsubq_u64"><div>uint64x2_t <b><b>vsubq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_f32" type="checkbox"><label for="vsub_f32"><div>float32x2_t <b><b>vsub_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register, from the corresponding elements in the vector in the first source SIMD&amp;FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsub-vector-floating-point-subtract-vector">FSUB</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_f32" type="checkbox"><label for="vsubq_f32"><div>float32x4_t <b><b>vsubq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register, from the corresponding elements in the vector in the first source SIMD&amp;FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsub-vector-floating-point-subtract-vector">FSUB</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_f64" type="checkbox"><label for="vsub_f64"><div>float64x1_t <b><b>vsub_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register, from the corresponding elements in the vector in the first source SIMD&amp;FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsub-vector-floating-point-subtract-vector">FSUB</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_f64" type="checkbox"><label for="vsubq_f64"><div>float64x2_t <b><b>vsubq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register, from the corresponding elements in the vector in the first source SIMD&amp;FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsub-vector-floating-point-subtract-vector">FSUB</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubd_s64" type="checkbox"><label for="vsubd_s64"><div>int64_t <b><b>vsubd_s64</b></b> (int64_t a, int64_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubd_u64" type="checkbox"><label for="vsubd_u64"><div>uint64_t <b><b>vsubd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_s8" type="checkbox"><label for="vsubl_s8"><div>int16x8_t <b><b>vsubl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_s16" type="checkbox"><label for="vsubl_s16"><div>int32x4_t <b><b>vsubl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_s32" type="checkbox"><label for="vsubl_s32"><div>int64x2_t <b><b>vsubl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_u8" type="checkbox"><label for="vsubl_u8"><div>uint16x8_t <b><b>vsubl_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_u16" type="checkbox"><label for="vsubl_u16"><div>uint32x4_t <b><b>vsubl_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_u32" type="checkbox"><label for="vsubl_u32"><div>uint64x2_t <b><b>vsubl_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_s8" type="checkbox"><label for="vsubl_high_s8"><div>int16x8_t <b><b>vsubl_high_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_s16" type="checkbox"><label for="vsubl_high_s16"><div>int32x4_t <b><b>vsubl_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_s32" type="checkbox"><label for="vsubl_high_s32"><div>int64x2_t <b><b>vsubl_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_u8" type="checkbox"><label for="vsubl_high_u8"><div>uint16x8_t <b><b>vsubl_high_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_u16" type="checkbox"><label for="vsubl_high_u16"><div>uint32x4_t <b><b>vsubl_high_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_u32" type="checkbox"><label for="vsubl_high_u32"><div>uint64x2_t <b><b>vsubl_high_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_s8" type="checkbox"><label for="vsubw_s8"><div>int16x8_t <b><b>vsubw_s8</b></b> (int16x8_t a, int8x8_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW</a> Vd.8H,Vn.8H,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_s16" type="checkbox"><label for="vsubw_s16"><div>int32x4_t <b><b>vsubw_s16</b></b> (int32x4_t a, int16x4_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW</a> Vd.4S,Vn.4S,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_s32" type="checkbox"><label for="vsubw_s32"><div>int64x2_t <b><b>vsubw_s32</b></b> (int64x2_t a, int32x2_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW</a> Vd.2D,Vn.2D,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_u8" type="checkbox"><label for="vsubw_u8"><div>uint16x8_t <b><b>vsubw_u8</b></b> (uint16x8_t a, uint8x8_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW</a> Vd.8H,Vn.8H,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_u16" type="checkbox"><label for="vsubw_u16"><div>uint32x4_t <b><b>vsubw_u16</b></b> (uint32x4_t a, uint16x4_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW</a> Vd.4S,Vn.4S,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_u32" type="checkbox"><label for="vsubw_u32"><div>uint64x2_t <b><b>vsubw_u32</b></b> (uint64x2_t a, uint32x2_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW</a> Vd.2D,Vn.2D,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_s8" type="checkbox"><label for="vsubw_high_s8"><div>int16x8_t <b><b>vsubw_high_s8</b></b> (int16x8_t a, int8x16_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW2</a> Vd.8H,Vn.8H,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_s16" type="checkbox"><label for="vsubw_high_s16"><div>int32x4_t <b><b>vsubw_high_s16</b></b> (int32x4_t a, int16x8_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW2</a> Vd.4S,Vn.4S,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_s32" type="checkbox"><label for="vsubw_high_s32"><div>int64x2_t <b><b>vsubw_high_s32</b></b> (int64x2_t a, int32x4_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW2</a> Vd.2D,Vn.2D,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_u8" type="checkbox"><label for="vsubw_high_u8"><div>uint16x8_t <b><b>vsubw_high_u8</b></b> (uint16x8_t a, uint8x16_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW2</a> Vd.8H,Vn.8H,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_u16" type="checkbox"><label for="vsubw_high_u16"><div>uint32x4_t <b><b>vsubw_high_u16</b></b> (uint32x4_t a, uint16x8_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW2</a> Vd.4S,Vn.4S,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_u32" type="checkbox"><label for="vsubw_high_u32"><div>uint64x2_t <b><b>vsubw_high_u32</b></b> (uint64x2_t a, uint32x4_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW2</a> Vd.2D,Vn.2D,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-integer sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_s8" type="checkbox"><label for="vhsub_s8"><div>int8x8_t <b><b>vhsub_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_s8" type="checkbox"><label for="vhsubq_s8"><div>int8x16_t <b><b>vhsubq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_s16" type="checkbox"><label for="vhsub_s16"><div>int16x4_t <b><b>vhsub_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_s16" type="checkbox"><label for="vhsubq_s16"><div>int16x8_t <b><b>vhsubq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_s32" type="checkbox"><label for="vhsub_s32"><div>int32x2_t <b><b>vhsub_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_s32" type="checkbox"><label for="vhsubq_s32"><div>int32x4_t <b><b>vhsubq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_u8" type="checkbox"><label for="vhsub_u8"><div>uint8x8_t <b><b>vhsub_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_u8" type="checkbox"><label for="vhsubq_u8"><div>uint8x16_t <b><b>vhsubq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_u16" type="checkbox"><label for="vhsub_u16"><div>uint16x4_t <b><b>vhsub_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_u16" type="checkbox"><label for="vhsubq_u16"><div>uint16x8_t <b><b>vhsubq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_u32" type="checkbox"><label for="vhsub_u32"><div>uint32x2_t <b><b>vhsub_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_u32" type="checkbox"><label for="vhsubq_u32"><div>uint32x4_t <b><b>vhsubq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_s8" type="checkbox"><label for="vqsub_s8"><div>int8x8_t <b><b>vqsub_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_s8" type="checkbox"><label for="vqsubq_s8"><div>int8x16_t <b><b>vqsubq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_s16" type="checkbox"><label for="vqsub_s16"><div>int16x4_t <b><b>vqsub_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_s16" type="checkbox"><label for="vqsubq_s16"><div>int16x8_t <b><b>vqsubq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_s32" type="checkbox"><label for="vqsub_s32"><div>int32x2_t <b><b>vqsub_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_s32" type="checkbox"><label for="vqsubq_s32"><div>int32x4_t <b><b>vqsubq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_s64" type="checkbox"><label for="vqsub_s64"><div>int64x1_t <b><b>vqsub_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_s64" type="checkbox"><label for="vqsubq_s64"><div>int64x2_t <b><b>vqsubq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_u8" type="checkbox"><label for="vqsub_u8"><div>uint8x8_t <b><b>vqsub_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_u8" type="checkbox"><label for="vqsubq_u8"><div>uint8x16_t <b><b>vqsubq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_u16" type="checkbox"><label for="vqsub_u16"><div>uint16x4_t <b><b>vqsub_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_u16" type="checkbox"><label for="vqsubq_u16"><div>uint16x8_t <b><b>vqsubq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_u32" type="checkbox"><label for="vqsub_u32"><div>uint32x2_t <b><b>vqsub_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_u32" type="checkbox"><label for="vqsubq_u32"><div>uint32x4_t <b><b>vqsubq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_u64" type="checkbox"><label for="vqsub_u64"><div>uint64x1_t <b><b>vqsub_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_u64" type="checkbox"><label for="vqsubq_u64"><div>uint64x2_t <b><b>vqsubq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubb_s8" type="checkbox"><label for="vqsubb_s8"><div>int8_t <b><b>vqsubb_s8</b></b> (int8_t a, int8_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Bd,Bn,Bm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
-b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubh_s16" type="checkbox"><label for="vqsubh_s16"><div>int16_t <b><b>vqsubh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Hd,Hn,Hm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubs_s32" type="checkbox"><label for="vqsubs_s32"><div>int32_t <b><b>vqsubs_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubd_s64" type="checkbox"><label for="vqsubd_s64"><div>int64_t <b><b>vqsubd_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubb_u8" type="checkbox"><label for="vqsubb_u8"><div>uint8_t <b><b>vqsubb_u8</b></b> (uint8_t a, uint8_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Bd,Bn,Bm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
-b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubh_u16" type="checkbox"><label for="vqsubh_u16"><div>uint16_t <b><b>vqsubh_u16</b></b> (uint16_t a, uint16_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Hd,Hn,Hm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubs_u32" type="checkbox"><label for="vqsubs_u32"><div>uint32_t <b><b>vqsubs_u32</b></b> (uint32_t a, uint32_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubd_u64" type="checkbox"><label for="vqsubd_u64"><div>uint64_t <b><b>vqsubd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer diff;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    diff = element1 - element2;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_s16" type="checkbox"><label for="vsubhn_s16"><div>int8x8_t <b><b>vsubhn_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.8B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_s32" type="checkbox"><label for="vsubhn_s32"><div>int16x4_t <b><b>vsubhn_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.4H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_s64" type="checkbox"><label for="vsubhn_s64"><div>int32x2_t <b><b>vsubhn_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.2S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_u16" type="checkbox"><label for="vsubhn_u16"><div>uint8x8_t <b><b>vsubhn_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.8B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_u32" type="checkbox"><label for="vsubhn_u32"><div>uint16x4_t <b><b>vsubhn_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.4H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_u64" type="checkbox"><label for="vsubhn_u64"><div>uint32x2_t <b><b>vsubhn_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.2S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_s16" type="checkbox"><label for="vsubhn_high_s16"><div>int8x16_t <b><b>vsubhn_high_s16</b></b> (int8x8_t r, int16x8_t a, int16x8_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.16B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_s32" type="checkbox"><label for="vsubhn_high_s32"><div>int16x8_t <b><b>vsubhn_high_s32</b></b> (int16x4_t r, int32x4_t a, int32x4_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.8H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_s64" type="checkbox"><label for="vsubhn_high_s64"><div>int32x4_t <b><b>vsubhn_high_s64</b></b> (int32x2_t r, int64x2_t a, int64x2_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.4S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_u16" type="checkbox"><label for="vsubhn_high_u16"><div>uint8x16_t <b><b>vsubhn_high_u16</b></b> (uint8x8_t r, uint16x8_t a, uint16x8_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.16B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_u32" type="checkbox"><label for="vsubhn_high_u32"><div>uint16x8_t <b><b>vsubhn_high_u32</b></b> (uint16x4_t r, uint32x4_t a, uint32x4_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.8H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_u64" type="checkbox"><label for="vsubhn_high_u64"><div>uint32x4_t <b><b>vsubhn_high_u64</b></b> (uint32x2_t r, uint64x2_t a, uint64x2_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.4S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_s16" type="checkbox"><label for="vrsubhn_s16"><div>int8x8_t <b><b>vrsubhn_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.8B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_s32" type="checkbox"><label for="vrsubhn_s32"><div>int16x4_t <b><b>vrsubhn_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.4H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_s64" type="checkbox"><label for="vrsubhn_s64"><div>int32x2_t <b><b>vrsubhn_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.2S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_u16" type="checkbox"><label for="vrsubhn_u16"><div>uint8x8_t <b><b>vrsubhn_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.8B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_u32" type="checkbox"><label for="vrsubhn_u32"><div>uint16x4_t <b><b>vrsubhn_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.4H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_u64" type="checkbox"><label for="vrsubhn_u64"><div>uint32x2_t <b><b>vrsubhn_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.2S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_s16" type="checkbox"><label for="vrsubhn_high_s16"><div>int8x16_t <b><b>vrsubhn_high_s16</b></b> (int8x8_t r, int16x8_t a, int16x8_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.16B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_s32" type="checkbox"><label for="vrsubhn_high_s32"><div>int16x8_t <b><b>vrsubhn_high_s32</b></b> (int16x4_t r, int32x4_t a, int32x4_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.8H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_s64" type="checkbox"><label for="vrsubhn_high_s64"><div>int32x4_t <b><b>vrsubhn_high_s64</b></b> (int32x2_t r, int64x2_t a, int64x2_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.4S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_u16" type="checkbox"><label for="vrsubhn_high_u16"><div>uint8x16_t <b><b>vrsubhn_high_u16</b></b> (uint8x8_t r, uint16x8_t a, uint16x8_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.16B,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_u32" type="checkbox"><label for="vrsubhn_high_u32"><div>uint16x8_t <b><b>vrsubhn_high_u32</b></b> (uint16x4_t r, uint32x4_t a, uint32x4_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.8H,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_u64" type="checkbox"><label for="vrsubhn_high_u64"><div>uint32x4_t <b><b>vrsubhn_high_u64</b></b> (uint32x2_t r, uint64x2_t a, uint64x2_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.4S,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
-bits(2*esize) element1;
-bits(2*esize) element2;
-bits(2*esize) sum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
-    if sub_op then
-        sum = element1 - element2;
-    else
-        sum = element1 + element2;
-    sum = sum + round_const;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceq_s8" type="checkbox"><label for="vceq_s8"><div>uint8x8_t <b><b>vceq_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_s8" type="checkbox"><label for="vceqq_s8"><div>uint8x16_t <b><b>vceqq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_s16" type="checkbox"><label for="vceq_s16"><div>uint16x4_t <b><b>vceq_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_s16" type="checkbox"><label for="vceqq_s16"><div>uint16x8_t <b><b>vceqq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_s32" type="checkbox"><label for="vceq_s32"><div>uint32x2_t <b><b>vceq_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_s32" type="checkbox"><label for="vceqq_s32"><div>uint32x4_t <b><b>vceqq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_u8" type="checkbox"><label for="vceq_u8"><div>uint8x8_t <b><b>vceq_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_u8" type="checkbox"><label for="vceqq_u8"><div>uint8x16_t <b><b>vceqq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_u16" type="checkbox"><label for="vceq_u16"><div>uint16x4_t <b><b>vceq_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_u16" type="checkbox"><label for="vceqq_u16"><div>uint16x8_t <b><b>vceqq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_u32" type="checkbox"><label for="vceq_u32"><div>uint32x2_t <b><b>vceq_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_u32" type="checkbox"><label for="vceqq_u32"><div>uint32x4_t <b><b>vceqq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_f32" type="checkbox"><label for="vceq_f32"><div>uint32x2_t <b><b>vceq_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_f32" type="checkbox"><label for="vceqq_f32"><div>uint32x4_t <b><b>vceqq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_p8" type="checkbox"><label for="vceq_p8"><div>uint8x8_t <b><b>vceq_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_p8" type="checkbox"><label for="vceqq_p8"><div>uint8x16_t <b><b>vceqq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_s64" type="checkbox"><label for="vceq_s64"><div>uint64x1_t <b><b>vceq_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_s64" type="checkbox"><label for="vceqq_s64"><div>uint64x2_t <b><b>vceqq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceq_u64" type="checkbox"><label for="vceq_u64"><div>uint64x1_t <b><b>vceq_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_u64" type="checkbox"><label for="vceqq_u64"><div>uint64x2_t <b><b>vceqq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceq_p64" type="checkbox"><label for="vceq_p64"><div>uint64x1_t <b><b>vceq_p64</b></b> (poly64x1_t a, poly64x1_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_p64" type="checkbox"><label for="vceqq_p64"><div>uint64x2_t <b><b>vceqq_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_f64" type="checkbox"><label for="vceq_f64"><div>uint64x1_t <b><b>vceq_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_f64" type="checkbox"><label for="vceqq_f64"><div>uint64x2_t <b><b>vceqq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqd_s64" type="checkbox"><label for="vceqd_s64"><div>uint64_t <b><b>vceqd_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqd_u64" type="checkbox"><label for="vceqd_u64"><div>uint64_t <b><b>vceqd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqs_f32" type="checkbox"><label for="vceqs_f32"><div>uint32_t <b><b>vceqs_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqd_f64" type="checkbox"><label for="vceqd_f64"><div>uint64_t <b><b>vceqd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_s8" type="checkbox"><label for="vceqz_s8"><div>uint8x8_t <b><b>vceqz_s8</b></b> (int8x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_s8" type="checkbox"><label for="vceqzq_s8"><div>uint8x16_t <b><b>vceqzq_s8</b></b> (int8x16_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_s16" type="checkbox"><label for="vceqz_s16"><div>uint16x4_t <b><b>vceqz_s16</b></b> (int16x4_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4H,Vn.4H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_s16" type="checkbox"><label for="vceqzq_s16"><div>uint16x8_t <b><b>vceqzq_s16</b></b> (int16x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8H,Vn.8H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_s32" type="checkbox"><label for="vceqz_s32"><div>uint32x2_t <b><b>vceqz_s32</b></b> (int32x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2S,Vn.2S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_s32" type="checkbox"><label for="vceqzq_s32"><div>uint32x4_t <b><b>vceqzq_s32</b></b> (int32x4_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4S,Vn.4S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_u8" type="checkbox"><label for="vceqz_u8"><div>uint8x8_t <b><b>vceqz_u8</b></b> (uint8x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_u8" type="checkbox"><label for="vceqzq_u8"><div>uint8x16_t <b><b>vceqzq_u8</b></b> (uint8x16_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_u16" type="checkbox"><label for="vceqz_u16"><div>uint16x4_t <b><b>vceqz_u16</b></b> (uint16x4_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4H,Vn.4H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_u16" type="checkbox"><label for="vceqzq_u16"><div>uint16x8_t <b><b>vceqzq_u16</b></b> (uint16x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8H,Vn.8H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_u32" type="checkbox"><label for="vceqz_u32"><div>uint32x2_t <b><b>vceqz_u32</b></b> (uint32x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2S,Vn.2S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_u32" type="checkbox"><label for="vceqzq_u32"><div>uint32x4_t <b><b>vceqzq_u32</b></b> (uint32x4_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4S,Vn.4S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_f32" type="checkbox"><label for="vceqz_f32"><div>uint32x2_t <b><b>vceqz_f32</b></b> (float32x2_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.2S,Vn.2S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_f32" type="checkbox"><label for="vceqzq_f32"><div>uint32x4_t <b><b>vceqzq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.4S,Vn.4S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_p8" type="checkbox"><label for="vceqz_p8"><div>uint8x8_t <b><b>vceqz_p8</b></b> (poly8x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_p8" type="checkbox"><label for="vceqzq_p8"><div>uint8x16_t <b><b>vceqzq_p8</b></b> (poly8x16_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_s64" type="checkbox"><label for="vceqz_s64"><div>uint64x1_t <b><b>vceqz_s64</b></b> (int64x1_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_s64" type="checkbox"><label for="vceqzq_s64"><div>uint64x2_t <b><b>vceqzq_s64</b></b> (int64x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_u64" type="checkbox"><label for="vceqz_u64"><div>uint64x1_t <b><b>vceqz_u64</b></b> (uint64x1_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_u64" type="checkbox"><label for="vceqzq_u64"><div>uint64x2_t <b><b>vceqzq_u64</b></b> (uint64x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_p64" type="checkbox"><label for="vceqz_p64"><div>uint64x1_t <b><b>vceqz_p64</b></b> (poly64x1_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_p64" type="checkbox"><label for="vceqzq_p64"><div>uint64x2_t <b><b>vceqzq_p64</b></b> (poly64x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_f64" type="checkbox"><label for="vceqz_f64"><div>uint64x1_t <b><b>vceqz_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_f64" type="checkbox"><label for="vceqzq_f64"><div>uint64x2_t <b><b>vceqzq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.2D,Vn.2D,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzd_s64" type="checkbox"><label for="vceqzd_s64"><div>uint64_t <b><b>vceqzd_s64</b></b> (int64_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzd_u64" type="checkbox"><label for="vceqzd_u64"><div>uint64_t <b><b>vceqzd_u64</b></b> (uint64_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzs_f32" type="checkbox"><label for="vceqzs_f32"><div>uint32_t <b><b>vceqzs_f32</b></b> (float32_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Sd,Sn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzd_f64" type="checkbox"><label for="vceqzd_f64"><div>uint64_t <b><b>vceqzd_f64</b></b> (float64_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcge_s8" type="checkbox"><label for="vcge_s8"><div>uint8x8_t <b><b>vcge_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8B,Vm.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_s8" type="checkbox"><label for="vcgeq_s8"><div>uint8x16_t <b><b>vcgeq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.16B,Vm.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_s16" type="checkbox"><label for="vcge_s16"><div>uint16x4_t <b><b>vcge_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4H,Vm.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_s16" type="checkbox"><label for="vcgeq_s16"><div>uint16x8_t <b><b>vcgeq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8H,Vm.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_s32" type="checkbox"><label for="vcge_s32"><div>uint32x2_t <b><b>vcge_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2S,Vm.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_s32" type="checkbox"><label for="vcgeq_s32"><div>uint32x4_t <b><b>vcgeq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4S,Vm.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_u8" type="checkbox"><label for="vcge_u8"><div>uint8x8_t <b><b>vcge_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.8B,Vm.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_u8" type="checkbox"><label for="vcgeq_u8"><div>uint8x16_t <b><b>vcgeq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.16B,Vm.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_u16" type="checkbox"><label for="vcge_u16"><div>uint16x4_t <b><b>vcge_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.4H,Vm.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_u16" type="checkbox"><label for="vcgeq_u16"><div>uint16x8_t <b><b>vcgeq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.8H,Vm.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_u32" type="checkbox"><label for="vcge_u32"><div>uint32x2_t <b><b>vcge_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.2S,Vm.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_u32" type="checkbox"><label for="vcgeq_u32"><div>uint32x4_t <b><b>vcgeq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.4S,Vm.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_f32" type="checkbox"><label for="vcge_f32"><div>uint32x2_t <b><b>vcge_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2S,Vm.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_f32" type="checkbox"><label for="vcgeq_f32"><div>uint32x4_t <b><b>vcgeq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.4S,Vm.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_s64" type="checkbox"><label for="vcge_s64"><div>uint64x1_t <b><b>vcge_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_s64" type="checkbox"><label for="vcgeq_s64"><div>uint64x2_t <b><b>vcgeq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2D,Vm.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcge_u64" type="checkbox"><label for="vcge_u64"><div>uint64x1_t <b><b>vcge_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_u64" type="checkbox"><label for="vcgeq_u64"><div>uint64x2_t <b><b>vcgeq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.2D,Vm.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcge_f64" type="checkbox"><label for="vcge_f64"><div>uint64x1_t <b><b>vcge_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_f64" type="checkbox"><label for="vcgeq_f64"><div>uint64x2_t <b><b>vcgeq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2D,Vm.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcged_s64" type="checkbox"><label for="vcged_s64"><div>uint64_t <b><b>vcged_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcged_u64" type="checkbox"><label for="vcged_u64"><div>uint64_t <b><b>vcged_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcges_f32" type="checkbox"><label for="vcges_f32"><div>uint32_t <b><b>vcges_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcged_f64" type="checkbox"><label for="vcged_f64"><div>uint64_t <b><b>vcged_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_s8" type="checkbox"><label for="vcgez_s8"><div>uint8x8_t <b><b>vcgez_s8</b></b> (int8x8_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8B,Vn.8B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_s8" type="checkbox"><label for="vcgezq_s8"><div>uint8x16_t <b><b>vcgezq_s8</b></b> (int8x16_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.16B,Vn.16B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_s16" type="checkbox"><label for="vcgez_s16"><div>uint16x4_t <b><b>vcgez_s16</b></b> (int16x4_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4H,Vn.4H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_s16" type="checkbox"><label for="vcgezq_s16"><div>uint16x8_t <b><b>vcgezq_s16</b></b> (int16x8_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8H,Vn.8H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_s32" type="checkbox"><label for="vcgez_s32"><div>uint32x2_t <b><b>vcgez_s32</b></b> (int32x2_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2S,Vn.2S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_s32" type="checkbox"><label for="vcgezq_s32"><div>uint32x4_t <b><b>vcgezq_s32</b></b> (int32x4_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4S,Vn.4S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_s64" type="checkbox"><label for="vcgez_s64"><div>uint64x1_t <b><b>vcgez_s64</b></b> (int64x1_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_s64" type="checkbox"><label for="vcgezq_s64"><div>uint64x2_t <b><b>vcgezq_s64</b></b> (int64x2_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2D,Vn.2D,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_f32" type="checkbox"><label for="vcgez_f32"><div>uint32x2_t <b><b>vcgez_f32</b></b> (float32x2_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2S,Vn.2S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_f32" type="checkbox"><label for="vcgezq_f32"><div>uint32x4_t <b><b>vcgezq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.4S,Vn.4S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_f64" type="checkbox"><label for="vcgez_f64"><div>uint64x1_t <b><b>vcgez_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_f64" type="checkbox"><label for="vcgezq_f64"><div>uint64x2_t <b><b>vcgezq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2D,Vn.2D,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezd_s64" type="checkbox"><label for="vcgezd_s64"><div>uint64_t <b><b>vcgezd_s64</b></b> (int64_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezs_f32" type="checkbox"><label for="vcgezs_f32"><div>uint32_t <b><b>vcgezs_f32</b></b> (float32_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Sd,Sn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezd_f64" type="checkbox"><label for="vcgezd_f64"><div>uint64_t <b><b>vcgezd_f64</b></b> (float64_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcle_s8" type="checkbox"><label for="vcle_s8"><div>uint8x8_t <b><b>vcle_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8B,Vm.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_s8" type="checkbox"><label for="vcleq_s8"><div>uint8x16_t <b><b>vcleq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.16B,Vm.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_s16" type="checkbox"><label for="vcle_s16"><div>uint16x4_t <b><b>vcle_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4H,Vm.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_s16" type="checkbox"><label for="vcleq_s16"><div>uint16x8_t <b><b>vcleq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8H,Vm.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_s32" type="checkbox"><label for="vcle_s32"><div>uint32x2_t <b><b>vcle_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2S,Vm.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_s32" type="checkbox"><label for="vcleq_s32"><div>uint32x4_t <b><b>vcleq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4S,Vm.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_u8" type="checkbox"><label for="vcle_u8"><div>uint8x8_t <b><b>vcle_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.8B,Vm.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_u8" type="checkbox"><label for="vcleq_u8"><div>uint8x16_t <b><b>vcleq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.16B,Vm.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_u16" type="checkbox"><label for="vcle_u16"><div>uint16x4_t <b><b>vcle_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.4H,Vm.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_u16" type="checkbox"><label for="vcleq_u16"><div>uint16x8_t <b><b>vcleq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.8H,Vm.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_u32" type="checkbox"><label for="vcle_u32"><div>uint32x2_t <b><b>vcle_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.2S,Vm.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_u32" type="checkbox"><label for="vcleq_u32"><div>uint32x4_t <b><b>vcleq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.4S,Vm.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_f32" type="checkbox"><label for="vcle_f32"><div>uint32x2_t <b><b>vcle_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2S,Vm.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_f32" type="checkbox"><label for="vcleq_f32"><div>uint32x4_t <b><b>vcleq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.4S,Vm.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_s64" type="checkbox"><label for="vcle_s64"><div>uint64x1_t <b><b>vcle_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_s64" type="checkbox"><label for="vcleq_s64"><div>uint64x2_t <b><b>vcleq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2D,Vm.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcle_u64" type="checkbox"><label for="vcle_u64"><div>uint64x1_t <b><b>vcle_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_u64" type="checkbox"><label for="vcleq_u64"><div>uint64x2_t <b><b>vcleq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.2D,Vm.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcle_f64" type="checkbox"><label for="vcle_f64"><div>uint64x1_t <b><b>vcle_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_f64" type="checkbox"><label for="vcleq_f64"><div>uint64x2_t <b><b>vcleq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2D,Vm.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcled_s64" type="checkbox"><label for="vcled_s64"><div>uint64_t <b><b>vcled_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcled_u64" type="checkbox"><label for="vcled_u64"><div>uint64_t <b><b>vcled_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcles_f32" type="checkbox"><label for="vcles_f32"><div>uint32_t <b><b>vcles_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Sd,Sm,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcled_f64" type="checkbox"><label for="vcled_f64"><div>uint64_t <b><b>vcled_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_s8" type="checkbox"><label for="vclez_s8"><div>uint8x8_t <b><b>vclez_s8</b></b> (int8x8_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.8B,Vn.8B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_s8" type="checkbox"><label for="vclezq_s8"><div>uint8x16_t <b><b>vclezq_s8</b></b> (int8x16_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.16B,Vn.16B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_s16" type="checkbox"><label for="vclez_s16"><div>uint16x4_t <b><b>vclez_s16</b></b> (int16x4_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.4H,Vn.4H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_s16" type="checkbox"><label for="vclezq_s16"><div>uint16x8_t <b><b>vclezq_s16</b></b> (int16x8_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.8H,Vn.8H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_s32" type="checkbox"><label for="vclez_s32"><div>uint32x2_t <b><b>vclez_s32</b></b> (int32x2_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.2S,Vn.2S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_s32" type="checkbox"><label for="vclezq_s32"><div>uint32x4_t <b><b>vclezq_s32</b></b> (int32x4_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.4S,Vn.4S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_s64" type="checkbox"><label for="vclez_s64"><div>uint64x1_t <b><b>vclez_s64</b></b> (int64x1_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_s64" type="checkbox"><label for="vclezq_s64"><div>uint64x2_t <b><b>vclezq_s64</b></b> (int64x2_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.2D,Vn.2D,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_f32" type="checkbox"><label for="vclez_f32"><div>uint32x2_t <b><b>vclez_f32</b></b> (float32x2_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.2S,Vn.2S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_f32" type="checkbox"><label for="vclezq_f32"><div>uint32x4_t <b><b>vclezq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Vd.4S,Vn.4S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_f64" type="checkbox"><label for="vclez_f64"><div>uint64x1_t <b><b>vclez_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_f64" type="checkbox"><label for="vclezq_f64"><div>uint64x2_t <b><b>vclezq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Vd.2D,Vn.2D,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezd_s64" type="checkbox"><label for="vclezd_s64"><div>uint64_t <b><b>vclezd_s64</b></b> (int64_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezs_f32" type="checkbox"><label for="vclezs_f32"><div>uint32_t <b><b>vclezs_f32</b></b> (float32_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Sd,Sn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezd_f64" type="checkbox"><label for="vclezd_f64"><div>uint64_t <b><b>vclezd_f64</b></b> (float64_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_s8" type="checkbox"><label for="vcgt_s8"><div>uint8x8_t <b><b>vcgt_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_s8" type="checkbox"><label for="vcgtq_s8"><div>uint8x16_t <b><b>vcgtq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_s16" type="checkbox"><label for="vcgt_s16"><div>uint16x4_t <b><b>vcgt_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_s16" type="checkbox"><label for="vcgtq_s16"><div>uint16x8_t <b><b>vcgtq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_s32" type="checkbox"><label for="vcgt_s32"><div>uint32x2_t <b><b>vcgt_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_s32" type="checkbox"><label for="vcgtq_s32"><div>uint32x4_t <b><b>vcgtq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_u8" type="checkbox"><label for="vcgt_u8"><div>uint8x8_t <b><b>vcgt_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_u8" type="checkbox"><label for="vcgtq_u8"><div>uint8x16_t <b><b>vcgtq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_u16" type="checkbox"><label for="vcgt_u16"><div>uint16x4_t <b><b>vcgt_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_u16" type="checkbox"><label for="vcgtq_u16"><div>uint16x8_t <b><b>vcgtq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_u32" type="checkbox"><label for="vcgt_u32"><div>uint32x2_t <b><b>vcgt_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_u32" type="checkbox"><label for="vcgtq_u32"><div>uint32x4_t <b><b>vcgtq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_f32" type="checkbox"><label for="vcgt_f32"><div>uint32x2_t <b><b>vcgt_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_f32" type="checkbox"><label for="vcgtq_f32"><div>uint32x4_t <b><b>vcgtq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_s64" type="checkbox"><label for="vcgt_s64"><div>uint64x1_t <b><b>vcgt_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_s64" type="checkbox"><label for="vcgtq_s64"><div>uint64x2_t <b><b>vcgtq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_u64" type="checkbox"><label for="vcgt_u64"><div>uint64x1_t <b><b>vcgt_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_u64" type="checkbox"><label for="vcgtq_u64"><div>uint64x2_t <b><b>vcgtq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_f64" type="checkbox"><label for="vcgt_f64"><div>uint64x1_t <b><b>vcgt_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_f64" type="checkbox"><label for="vcgtq_f64"><div>uint64x2_t <b><b>vcgtq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtd_s64" type="checkbox"><label for="vcgtd_s64"><div>uint64_t <b><b>vcgtd_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtd_u64" type="checkbox"><label for="vcgtd_u64"><div>uint64_t <b><b>vcgtd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgts_f32" type="checkbox"><label for="vcgts_f32"><div>uint32_t <b><b>vcgts_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtd_f64" type="checkbox"><label for="vcgtd_f64"><div>uint64_t <b><b>vcgtd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_s8" type="checkbox"><label for="vcgtz_s8"><div>uint8x8_t <b><b>vcgtz_s8</b></b> (int8x8_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8B,Vn.8B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_s8" type="checkbox"><label for="vcgtzq_s8"><div>uint8x16_t <b><b>vcgtzq_s8</b></b> (int8x16_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.16B,Vn.16B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_s16" type="checkbox"><label for="vcgtz_s16"><div>uint16x4_t <b><b>vcgtz_s16</b></b> (int16x4_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4H,Vn.4H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_s16" type="checkbox"><label for="vcgtzq_s16"><div>uint16x8_t <b><b>vcgtzq_s16</b></b> (int16x8_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8H,Vn.8H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_s32" type="checkbox"><label for="vcgtz_s32"><div>uint32x2_t <b><b>vcgtz_s32</b></b> (int32x2_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2S,Vn.2S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_s32" type="checkbox"><label for="vcgtzq_s32"><div>uint32x4_t <b><b>vcgtzq_s32</b></b> (int32x4_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4S,Vn.4S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_s64" type="checkbox"><label for="vcgtz_s64"><div>uint64x1_t <b><b>vcgtz_s64</b></b> (int64x1_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_s64" type="checkbox"><label for="vcgtzq_s64"><div>uint64x2_t <b><b>vcgtzq_s64</b></b> (int64x2_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2D,Vn.2D,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_f32" type="checkbox"><label for="vcgtz_f32"><div>uint32x2_t <b><b>vcgtz_f32</b></b> (float32x2_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2S,Vn.2S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_f32" type="checkbox"><label for="vcgtzq_f32"><div>uint32x4_t <b><b>vcgtzq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.4S,Vn.4S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_f64" type="checkbox"><label for="vcgtz_f64"><div>uint64x1_t <b><b>vcgtz_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_f64" type="checkbox"><label for="vcgtzq_f64"><div>uint64x2_t <b><b>vcgtzq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2D,Vn.2D,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzd_s64" type="checkbox"><label for="vcgtzd_s64"><div>uint64_t <b><b>vcgtzd_s64</b></b> (int64_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzs_f32" type="checkbox"><label for="vcgtzs_f32"><div>uint32_t <b><b>vcgtzs_f32</b></b> (float32_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Sd,Sn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzd_f64" type="checkbox"><label for="vcgtzd_f64"><div>uint64_t <b><b>vcgtzd_f64</b></b> (float64_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclt_s8" type="checkbox"><label for="vclt_s8"><div>uint8x8_t <b><b>vclt_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8B,Vm.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_s8" type="checkbox"><label for="vcltq_s8"><div>uint8x16_t <b><b>vcltq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.16B,Vm.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_s16" type="checkbox"><label for="vclt_s16"><div>uint16x4_t <b><b>vclt_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4H,Vm.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_s16" type="checkbox"><label for="vcltq_s16"><div>uint16x8_t <b><b>vcltq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8H,Vm.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_s32" type="checkbox"><label for="vclt_s32"><div>uint32x2_t <b><b>vclt_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2S,Vm.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_s32" type="checkbox"><label for="vcltq_s32"><div>uint32x4_t <b><b>vcltq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4S,Vm.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_u8" type="checkbox"><label for="vclt_u8"><div>uint8x8_t <b><b>vclt_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.8B,Vm.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_u8" type="checkbox"><label for="vcltq_u8"><div>uint8x16_t <b><b>vcltq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.16B,Vm.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_u16" type="checkbox"><label for="vclt_u16"><div>uint16x4_t <b><b>vclt_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.4H,Vm.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_u16" type="checkbox"><label for="vcltq_u16"><div>uint16x8_t <b><b>vcltq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.8H,Vm.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_u32" type="checkbox"><label for="vclt_u32"><div>uint32x2_t <b><b>vclt_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.2S,Vm.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_u32" type="checkbox"><label for="vcltq_u32"><div>uint32x4_t <b><b>vcltq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.4S,Vm.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_f32" type="checkbox"><label for="vclt_f32"><div>uint32x2_t <b><b>vclt_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2S,Vm.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_f32" type="checkbox"><label for="vcltq_f32"><div>uint32x4_t <b><b>vcltq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.4S,Vm.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_s64" type="checkbox"><label for="vclt_s64"><div>uint64x1_t <b><b>vclt_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_s64" type="checkbox"><label for="vcltq_s64"><div>uint64x2_t <b><b>vcltq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2D,Vm.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclt_u64" type="checkbox"><label for="vclt_u64"><div>uint64x1_t <b><b>vclt_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_u64" type="checkbox"><label for="vcltq_u64"><div>uint64x2_t <b><b>vcltq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.2D,Vm.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclt_f64" type="checkbox"><label for="vclt_f64"><div>uint64x1_t <b><b>vclt_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_f64" type="checkbox"><label for="vcltq_f64"><div>uint64x2_t <b><b>vcltq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2D,Vm.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltd_s64" type="checkbox"><label for="vcltd_s64"><div>uint64_t <b><b>vcltd_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltd_u64" type="checkbox"><label for="vcltd_u64"><div>uint64_t <b><b>vcltd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclts_f32" type="checkbox"><label for="vclts_f32"><div>uint32_t <b><b>vclts_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Sd,Sm,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltd_f64" type="checkbox"><label for="vcltd_f64"><div>uint64_t <b><b>vcltd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_s8" type="checkbox"><label for="vcltz_s8"><div>uint8x8_t <b><b>vcltz_s8</b></b> (int8x8_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.8B,Vn.8B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_s8" type="checkbox"><label for="vcltzq_s8"><div>uint8x16_t <b><b>vcltzq_s8</b></b> (int8x16_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.16B,Vn.16B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_s16" type="checkbox"><label for="vcltz_s16"><div>uint16x4_t <b><b>vcltz_s16</b></b> (int16x4_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.4H,Vn.4H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_s16" type="checkbox"><label for="vcltzq_s16"><div>uint16x8_t <b><b>vcltzq_s16</b></b> (int16x8_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.8H,Vn.8H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_s32" type="checkbox"><label for="vcltz_s32"><div>uint32x2_t <b><b>vcltz_s32</b></b> (int32x2_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.2S,Vn.2S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_s32" type="checkbox"><label for="vcltzq_s32"><div>uint32x4_t <b><b>vcltzq_s32</b></b> (int32x4_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.4S,Vn.4S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_s64" type="checkbox"><label for="vcltz_s64"><div>uint64x1_t <b><b>vcltz_s64</b></b> (int64x1_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_s64" type="checkbox"><label for="vcltzq_s64"><div>uint64x2_t <b><b>vcltzq_s64</b></b> (int64x2_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.2D,Vn.2D,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_f32" type="checkbox"><label for="vcltz_f32"><div>uint32x2_t <b><b>vcltz_f32</b></b> (float32x2_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Vd.2S,Vn.2S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_f32" type="checkbox"><label for="vcltzq_f32"><div>uint32x4_t <b><b>vcltzq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Vd.4S,Vn.4S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_f64" type="checkbox"><label for="vcltz_f64"><div>uint64x1_t <b><b>vcltz_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_f64" type="checkbox"><label for="vcltzq_f64"><div>uint64x2_t <b><b>vcltzq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Vd.2D,Vn.2D,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzd_s64" type="checkbox"><label for="vcltzd_s64"><div>uint64_t <b><b>vcltzd_s64</b></b> (int64_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzs_f32" type="checkbox"><label for="vcltzs_f32"><div>uint32_t <b><b>vcltzs_f32</b></b> (float32_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Sd,Sn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzd_f64" type="checkbox"><label for="vcltzd_f64"><div>uint64_t <b><b>vcltzd_f64</b></b> (float64_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Dd,Dn,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
-bits(esize) element;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    case comparison of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcage_f32" type="checkbox"><label for="vcage_f32"><div>uint32x2_t <b><b>vcage_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcageq_f32" type="checkbox"><label for="vcageq_f32"><div>uint32x4_t <b><b>vcageq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcage_f64" type="checkbox"><label for="vcage_f64"><div>uint64x1_t <b><b>vcage_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcageq_f64" type="checkbox"><label for="vcageq_f64"><div>uint64x2_t <b><b>vcageq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcages_f32" type="checkbox"><label for="vcages_f32"><div>uint32_t <b><b>vcages_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaged_f64" type="checkbox"><label for="vcaged_f64"><div>uint64_t <b><b>vcaged_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcale_f32" type="checkbox"><label for="vcale_f32"><div>uint32x2_t <b><b>vcale_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.2S,Vm.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcaleq_f32" type="checkbox"><label for="vcaleq_f32"><div>uint32x4_t <b><b>vcaleq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.4S,Vm.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcale_f64" type="checkbox"><label for="vcale_f64"><div>uint64x1_t <b><b>vcale_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaleq_f64" type="checkbox"><label for="vcaleq_f64"><div>uint64x2_t <b><b>vcaleq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.2D,Vm.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcales_f32" type="checkbox"><label for="vcales_f32"><div>uint32_t <b><b>vcales_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Sd,Sm,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaled_f64" type="checkbox"><label for="vcaled_f64"><div>uint64_t <b><b>vcaled_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcagt_f32" type="checkbox"><label for="vcagt_f32"><div>uint32x2_t <b><b>vcagt_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcagtq_f32" type="checkbox"><label for="vcagtq_f32"><div>uint32x4_t <b><b>vcagtq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcagt_f64" type="checkbox"><label for="vcagt_f64"><div>uint64x1_t <b><b>vcagt_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcagtq_f64" type="checkbox"><label for="vcagtq_f64"><div>uint64x2_t <b><b>vcagtq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcagts_f32" type="checkbox"><label for="vcagts_f32"><div>uint32_t <b><b>vcagts_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcagtd_f64" type="checkbox"><label for="vcagtd_f64"><div>uint64_t <b><b>vcagtd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcalt_f32" type="checkbox"><label for="vcalt_f32"><div>uint32x2_t <b><b>vcalt_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.2S,Vm.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcaltq_f32" type="checkbox"><label for="vcaltq_f32"><div>uint32x4_t <b><b>vcaltq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.4S,Vm.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcalt_f64" type="checkbox"><label for="vcalt_f64"><div>uint64x1_t <b><b>vcalt_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaltq_f64" type="checkbox"><label for="vcaltq_f64"><div>uint64x2_t <b><b>vcaltq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcalts_f32" type="checkbox"><label for="vcalts_f32"><div>uint32_t <b><b>vcalts_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Sd,Sm,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaltd_f64" type="checkbox"><label for="vcaltd_f64"><div>uint64_t <b><b>vcaltd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Dd,Dm,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if abs then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
-    case cmp of
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
-        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
- CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtst_s8" type="checkbox"><label for="vtst_s8"><div>uint8x8_t <b><b>vtst_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_s8" type="checkbox"><label for="vtstq_s8"><div>uint8x16_t <b><b>vtstq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_s16" type="checkbox"><label for="vtst_s16"><div>uint16x4_t <b><b>vtst_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_s16" type="checkbox"><label for="vtstq_s16"><div>uint16x8_t <b><b>vtstq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_s32" type="checkbox"><label for="vtst_s32"><div>uint32x2_t <b><b>vtst_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_s32" type="checkbox"><label for="vtstq_s32"><div>uint32x4_t <b><b>vtstq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_u8" type="checkbox"><label for="vtst_u8"><div>uint8x8_t <b><b>vtst_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_u8" type="checkbox"><label for="vtstq_u8"><div>uint8x16_t <b><b>vtstq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_u16" type="checkbox"><label for="vtst_u16"><div>uint16x4_t <b><b>vtst_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_u16" type="checkbox"><label for="vtstq_u16"><div>uint16x8_t <b><b>vtstq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_u32" type="checkbox"><label for="vtst_u32"><div>uint32x2_t <b><b>vtst_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_u32" type="checkbox"><label for="vtstq_u32"><div>uint32x4_t <b><b>vtstq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_p8" type="checkbox"><label for="vtst_p8"><div>uint8x8_t <b><b>vtst_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_p8" type="checkbox"><label for="vtstq_p8"><div>uint8x16_t <b><b>vtstq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_s64" type="checkbox"><label for="vtst_s64"><div>uint64x1_t <b><b>vtst_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_s64" type="checkbox"><label for="vtstq_s64"><div>uint64x2_t <b><b>vtstq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtst_u64" type="checkbox"><label for="vtst_u64"><div>uint64x1_t <b><b>vtst_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_u64" type="checkbox"><label for="vtstq_u64"><div>uint64x2_t <b><b>vtstq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtst_p64" type="checkbox"><label for="vtst_p64"><div>uint64x1_t <b><b>vtst_p64</b></b> (poly64x1_t a, poly64x1_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_p64" type="checkbox"><label for="vtstq_p64"><div>uint64x2_t <b><b>vtstq_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstd_s64" type="checkbox"><label for="vtstd_s64"><div>uint64_t <b><b>vtstd_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtstd_u64" type="checkbox"><label for="vtstd_u64"><div>uint64_t <b><b>vtstd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-boolean test_passed;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if and_test then
-        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
-    else
-        test_passed = (element1 == element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabd_s8" type="checkbox"><label for="vabd_s8"><div>int8x8_t <b><b>vabd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_s8" type="checkbox"><label for="vabdq_s8"><div>int8x16_t <b><b>vabdq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_s16" type="checkbox"><label for="vabd_s16"><div>int16x4_t <b><b>vabd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_s16" type="checkbox"><label for="vabdq_s16"><div>int16x8_t <b><b>vabdq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_s32" type="checkbox"><label for="vabd_s32"><div>int32x2_t <b><b>vabd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_s32" type="checkbox"><label for="vabdq_s32"><div>int32x4_t <b><b>vabdq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_u8" type="checkbox"><label for="vabd_u8"><div>uint8x8_t <b><b>vabd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_u8" type="checkbox"><label for="vabdq_u8"><div>uint8x16_t <b><b>vabdq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_u16" type="checkbox"><label for="vabd_u16"><div>uint16x4_t <b><b>vabd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_u16" type="checkbox"><label for="vabdq_u16"><div>uint16x8_t <b><b>vabdq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_u32" type="checkbox"><label for="vabd_u32"><div>uint32x2_t <b><b>vabd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_u32" type="checkbox"><label for="vabdq_u32"><div>uint32x4_t <b><b>vabdq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_f32" type="checkbox"><label for="vabd_f32"><div>float32x2_t <b><b>vabd_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_f32" type="checkbox"><label for="vabdq_f32"><div>float32x4_t <b><b>vabdq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_f64" type="checkbox"><label for="vabd_f64"><div>float64x1_t <b><b>vabd_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_f64" type="checkbox"><label for="vabdq_f64"><div>float64x2_t <b><b>vabdq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabds_f32" type="checkbox"><label for="vabds_f32"><div>float32_t <b><b>vabds_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdd_f64" type="checkbox"><label for="vabdd_f64"><div>float64_t <b><b>vabdd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) diff;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_s8" type="checkbox"><label for="vabdl_s8"><div>int16x8_t <b><b>vabdl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_s16" type="checkbox"><label for="vabdl_s16"><div>int32x4_t <b><b>vabdl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_s32" type="checkbox"><label for="vabdl_s32"><div>int64x2_t <b><b>vabdl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_u8" type="checkbox"><label for="vabdl_u8"><div>uint16x8_t <b><b>vabdl_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_u16" type="checkbox"><label for="vabdl_u16"><div>uint32x4_t <b><b>vabdl_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_u32" type="checkbox"><label for="vabdl_u32"><div>uint64x2_t <b><b>vabdl_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_s8" type="checkbox"><label for="vabdl_high_s8"><div>int16x8_t <b><b>vabdl_high_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_s16" type="checkbox"><label for="vabdl_high_s16"><div>int32x4_t <b><b>vabdl_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_s32" type="checkbox"><label for="vabdl_high_s32"><div>int64x2_t <b><b>vabdl_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_u8" type="checkbox"><label for="vabdl_high_u8"><div>uint16x8_t <b><b>vabdl_high_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_u16" type="checkbox"><label for="vabdl_high_u16"><div>uint32x4_t <b><b>vabdl_high_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_u32" type="checkbox"><label for="vabdl_high_u32"><div>uint64x2_t <b><b>vabdl_high_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaba_s8" type="checkbox"><label for="vaba_s8"><div>int8x8_t <b><b>vaba_s8</b></b> (int8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_s8" type="checkbox"><label for="vabaq_s8"><div>int8x16_t <b><b>vabaq_s8</b></b> (int8x16_t a, int8x16_t b, int8x16_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_s16" type="checkbox"><label for="vaba_s16"><div>int16x4_t <b><b>vaba_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_s16" type="checkbox"><label for="vabaq_s16"><div>int16x8_t <b><b>vabaq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_s32" type="checkbox"><label for="vaba_s32"><div>int32x2_t <b><b>vaba_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_s32" type="checkbox"><label for="vabaq_s32"><div>int32x4_t <b><b>vabaq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_u8" type="checkbox"><label for="vaba_u8"><div>uint8x8_t <b><b>vaba_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_u8" type="checkbox"><label for="vabaq_u8"><div>uint8x16_t <b><b>vabaq_u8</b></b> (uint8x16_t a, uint8x16_t b, uint8x16_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_u16" type="checkbox"><label for="vaba_u16"><div>uint16x4_t <b><b>vaba_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_u16" type="checkbox"><label for="vabaq_u16"><div>uint16x8_t <b><b>vabaq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_u32" type="checkbox"><label for="vaba_u32"><div>uint32x2_t <b><b>vaba_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_u32" type="checkbox"><label for="vabaq_u32"><div>uint32x4_t <b><b>vabaq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-bits(esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_s8" type="checkbox"><label for="vabal_s8"><div>int16x8_t <b><b>vabal_s8</b></b> (int16x8_t a, int8x8_t b, int8x8_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_s16" type="checkbox"><label for="vabal_s16"><div>int32x4_t <b><b>vabal_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_s32" type="checkbox"><label for="vabal_s32"><div>int64x2_t <b><b>vabal_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_u8" type="checkbox"><label for="vabal_u8"><div>uint16x8_t <b><b>vabal_u8</b></b> (uint16x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL</a> Vd.8H,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_u16" type="checkbox"><label for="vabal_u16"><div>uint32x4_t <b><b>vabal_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL</a> Vd.4S,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_u32" type="checkbox"><label for="vabal_u32"><div>uint64x2_t <b><b>vabal_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL</a> Vd.2D,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_s8" type="checkbox"><label for="vabal_high_s8"><div>int16x8_t <b><b>vabal_high_s8</b></b> (int16x8_t a, int8x16_t b, int8x16_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_s16" type="checkbox"><label for="vabal_high_s16"><div>int32x4_t <b><b>vabal_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_s32" type="checkbox"><label for="vabal_high_s32"><div>int64x2_t <b><b>vabal_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_u8" type="checkbox"><label for="vabal_high_u8"><div>uint16x8_t <b><b>vabal_high_u8</b></b> (uint16x8_t a, uint8x16_t b, uint8x16_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL2</a> Vd.8H,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_u16" type="checkbox"><label for="vabal_high_u16"><div>uint32x4_t <b><b>vabal_high_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL2</a> Vd.4S,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_u32" type="checkbox"><label for="vabal_high_u32"><div>uint64x2_t <b><b>vabal_high_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL2</a> Vd.2D,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) absdiff;
-
-result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmax_s8" type="checkbox"><label for="vmax_s8"><div>int8x8_t <b><b>vmax_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_s8" type="checkbox"><label for="vmaxq_s8"><div>int8x16_t <b><b>vmaxq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_s16" type="checkbox"><label for="vmax_s16"><div>int16x4_t <b><b>vmax_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_s16" type="checkbox"><label for="vmaxq_s16"><div>int16x8_t <b><b>vmaxq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_s32" type="checkbox"><label for="vmax_s32"><div>int32x2_t <b><b>vmax_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_s32" type="checkbox"><label for="vmaxq_s32"><div>int32x4_t <b><b>vmaxq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_u8" type="checkbox"><label for="vmax_u8"><div>uint8x8_t <b><b>vmax_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_u8" type="checkbox"><label for="vmaxq_u8"><div>uint8x16_t <b><b>vmaxq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_u16" type="checkbox"><label for="vmax_u16"><div>uint16x4_t <b><b>vmax_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_u16" type="checkbox"><label for="vmaxq_u16"><div>uint16x8_t <b><b>vmaxq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_u32" type="checkbox"><label for="vmax_u32"><div>uint32x2_t <b><b>vmax_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_u32" type="checkbox"><label for="vmaxq_u32"><div>uint32x4_t <b><b>vmaxq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_f32" type="checkbox"><label for="vmax_f32"><div>float32x2_t <b><b>vmax_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmax-vector-floating-point-maximum-vector">FMAX</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_f32" type="checkbox"><label for="vmaxq_f32"><div>float32x4_t <b><b>vmaxq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmax-vector-floating-point-maximum-vector">FMAX</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_f64" type="checkbox"><label for="vmax_f64"><div>float64x1_t <b><b>vmax_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmax-vector-floating-point-maximum-vector">FMAX</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_f64" type="checkbox"><label for="vmaxq_f64"><div>float64x2_t <b><b>vmaxq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmax-vector-floating-point-maximum-vector">FMAX</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmin_s8" type="checkbox"><label for="vmin_s8"><div>int8x8_t <b><b>vmin_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_s8" type="checkbox"><label for="vminq_s8"><div>int8x16_t <b><b>vminq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_s16" type="checkbox"><label for="vmin_s16"><div>int16x4_t <b><b>vmin_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_s16" type="checkbox"><label for="vminq_s16"><div>int16x8_t <b><b>vminq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_s32" type="checkbox"><label for="vmin_s32"><div>int32x2_t <b><b>vmin_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_s32" type="checkbox"><label for="vminq_s32"><div>int32x4_t <b><b>vminq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_u8" type="checkbox"><label for="vmin_u8"><div>uint8x8_t <b><b>vmin_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_u8" type="checkbox"><label for="vminq_u8"><div>uint8x16_t <b><b>vminq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_u16" type="checkbox"><label for="vmin_u16"><div>uint16x4_t <b><b>vmin_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_u16" type="checkbox"><label for="vminq_u16"><div>uint16x8_t <b><b>vminq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_u32" type="checkbox"><label for="vmin_u32"><div>uint32x2_t <b><b>vmin_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_u32" type="checkbox"><label for="vminq_u32"><div>uint32x4_t <b><b>vminq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_f32" type="checkbox"><label for="vmin_f32"><div>float32x2_t <b><b>vmin_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmin-vector-floating-point-minimum-vector">FMIN</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_f32" type="checkbox"><label for="vminq_f32"><div>float32x4_t <b><b>vminq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmin-vector-floating-point-minimum-vector">FMIN</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_f64" type="checkbox"><label for="vmin_f64"><div>float64x1_t <b><b>vmin_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmin-vector-floating-point-minimum-vector">FMIN</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminq_f64" type="checkbox"><label for="vminq_f64"><div>float64x2_t <b><b>vminq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmin-vector-floating-point-minimum-vector">FMIN</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnm_f32" type="checkbox"><label for="vmaxnm_f32"><div>float32x2_t <b><b>vmaxnm_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point maximum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnm-vector-floating-point-maximum-number-vector">FMAXNM</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmq_f32" type="checkbox"><label for="vmaxnmq_f32"><div>float32x4_t <b><b>vmaxnmq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point maximum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnm-vector-floating-point-maximum-number-vector">FMAXNM</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnm_f64" type="checkbox"><label for="vmaxnm_f64"><div>float64x1_t <b><b>vmaxnm_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point maximum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnm-vector-floating-point-maximum-number-vector">FMAXNM</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmq_f64" type="checkbox"><label for="vmaxnmq_f64"><div>float64x2_t <b><b>vmaxnmq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point maximum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnm-vector-floating-point-maximum-number-vector">FMAXNM</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnm_f32" type="checkbox"><label for="vminnm_f32"><div>float32x2_t <b><b>vminnm_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point minimum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnm-vector-floating-point-minimum-number-vector">FMINNM</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminnmq_f32" type="checkbox"><label for="vminnmq_f32"><div>float32x4_t <b><b>vminnmq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point minimum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnm-vector-floating-point-minimum-number-vector">FMINNM</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminnm_f64" type="checkbox"><label for="vminnm_f64"><div>float64x1_t <b><b>vminnm_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point minimum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnm-vector-floating-point-minimum-number-vector">FMINNM</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnmq_f64" type="checkbox"><label for="vminnmq_f64"><div>float64x2_t <b><b>vminnmq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point minimum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnm-vector-floating-point-minimum-number-vector">FMINNM</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshl_s8" type="checkbox"><label for="vshl_s8"><div>int8x8_t <b><b>vshl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_s8" type="checkbox"><label for="vshlq_s8"><div>int8x16_t <b><b>vshlq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_s16" type="checkbox"><label for="vshl_s16"><div>int16x4_t <b><b>vshl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_s16" type="checkbox"><label for="vshlq_s16"><div>int16x8_t <b><b>vshlq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_s32" type="checkbox"><label for="vshl_s32"><div>int32x2_t <b><b>vshl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_s32" type="checkbox"><label for="vshlq_s32"><div>int32x4_t <b><b>vshlq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_s64" type="checkbox"><label for="vshl_s64"><div>int64x1_t <b><b>vshl_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_s64" type="checkbox"><label for="vshlq_s64"><div>int64x2_t <b><b>vshlq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_u8" type="checkbox"><label for="vshl_u8"><div>uint8x8_t <b><b>vshl_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_u8" type="checkbox"><label for="vshlq_u8"><div>uint8x16_t <b><b>vshlq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_u16" type="checkbox"><label for="vshl_u16"><div>uint16x4_t <b><b>vshl_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_u16" type="checkbox"><label for="vshlq_u16"><div>uint16x8_t <b><b>vshlq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_u32" type="checkbox"><label for="vshl_u32"><div>uint32x2_t <b><b>vshl_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_u32" type="checkbox"><label for="vshlq_u32"><div>uint32x4_t <b><b>vshlq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_u64" type="checkbox"><label for="vshl_u64"><div>uint64x1_t <b><b>vshl_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_u64" type="checkbox"><label for="vshlq_u64"><div>uint64x2_t <b><b>vshlq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshld_s64" type="checkbox"><label for="vshld_s64"><div>int64_t <b><b>vshld_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshld_u64" type="checkbox"><label for="vshld_u64"><div>uint64_t <b><b>vshld_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_s8" type="checkbox"><label for="vqshl_s8"><div>int8x8_t <b><b>vqshl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_s8" type="checkbox"><label for="vqshlq_s8"><div>int8x16_t <b><b>vqshlq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_s16" type="checkbox"><label for="vqshl_s16"><div>int16x4_t <b><b>vqshl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_s16" type="checkbox"><label for="vqshlq_s16"><div>int16x8_t <b><b>vqshlq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_s32" type="checkbox"><label for="vqshl_s32"><div>int32x2_t <b><b>vqshl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_s32" type="checkbox"><label for="vqshlq_s32"><div>int32x4_t <b><b>vqshlq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_s64" type="checkbox"><label for="vqshl_s64"><div>int64x1_t <b><b>vqshl_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_s64" type="checkbox"><label for="vqshlq_s64"><div>int64x2_t <b><b>vqshlq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_u8" type="checkbox"><label for="vqshl_u8"><div>uint8x8_t <b><b>vqshl_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_u8" type="checkbox"><label for="vqshlq_u8"><div>uint8x16_t <b><b>vqshlq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_u16" type="checkbox"><label for="vqshl_u16"><div>uint16x4_t <b><b>vqshl_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_u16" type="checkbox"><label for="vqshlq_u16"><div>uint16x8_t <b><b>vqshlq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_u32" type="checkbox"><label for="vqshl_u32"><div>uint32x2_t <b><b>vqshl_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_u32" type="checkbox"><label for="vqshlq_u32"><div>uint32x4_t <b><b>vqshlq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_u64" type="checkbox"><label for="vqshl_u64"><div>uint64x1_t <b><b>vqshl_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_u64" type="checkbox"><label for="vqshlq_u64"><div>uint64x2_t <b><b>vqshlq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlb_s8" type="checkbox"><label for="vqshlb_s8"><div>int8_t <b><b>vqshlb_s8</b></b> (int8_t a, int8_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Bd,Bn,Bm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
-b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlh_s16" type="checkbox"><label for="vqshlh_s16"><div>int16_t <b><b>vqshlh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Hd,Hn,Hm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshls_s32" type="checkbox"><label for="vqshls_s32"><div>int32_t <b><b>vqshls_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshld_s64" type="checkbox"><label for="vqshld_s64"><div>int64_t <b><b>vqshld_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlb_u8" type="checkbox"><label for="vqshlb_u8"><div>uint8_t <b><b>vqshlb_u8</b></b> (uint8_t a, int8_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Bd,Bn,Bm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
-b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlh_u16" type="checkbox"><label for="vqshlh_u16"><div>uint16_t <b><b>vqshlh_u16</b></b> (uint16_t a, int16_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Hd,Hn,Hm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshls_u32" type="checkbox"><label for="vqshls_u32"><div>uint32_t <b><b>vqshls_u32</b></b> (uint32_t a, int32_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshld_u64" type="checkbox"><label for="vqshld_u64"><div>uint64_t <b><b>vqshld_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_s8" type="checkbox"><label for="vrshl_s8"><div>int8x8_t <b><b>vrshl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_s8" type="checkbox"><label for="vrshlq_s8"><div>int8x16_t <b><b>vrshlq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_s16" type="checkbox"><label for="vrshl_s16"><div>int16x4_t <b><b>vrshl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_s16" type="checkbox"><label for="vrshlq_s16"><div>int16x8_t <b><b>vrshlq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_s32" type="checkbox"><label for="vrshl_s32"><div>int32x2_t <b><b>vrshl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_s32" type="checkbox"><label for="vrshlq_s32"><div>int32x4_t <b><b>vrshlq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_s64" type="checkbox"><label for="vrshl_s64"><div>int64x1_t <b><b>vrshl_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_s64" type="checkbox"><label for="vrshlq_s64"><div>int64x2_t <b><b>vrshlq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_u8" type="checkbox"><label for="vrshl_u8"><div>uint8x8_t <b><b>vrshl_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_u8" type="checkbox"><label for="vrshlq_u8"><div>uint8x16_t <b><b>vrshlq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_u16" type="checkbox"><label for="vrshl_u16"><div>uint16x4_t <b><b>vrshl_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_u16" type="checkbox"><label for="vrshlq_u16"><div>uint16x8_t <b><b>vrshlq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_u32" type="checkbox"><label for="vrshl_u32"><div>uint32x2_t <b><b>vrshl_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_u32" type="checkbox"><label for="vrshlq_u32"><div>uint32x4_t <b><b>vrshlq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_u64" type="checkbox"><label for="vrshl_u64"><div>uint64x1_t <b><b>vrshl_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_u64" type="checkbox"><label for="vrshlq_u64"><div>uint64x2_t <b><b>vrshlq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshld_s64" type="checkbox"><label for="vrshld_s64"><div>int64_t <b><b>vrshld_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshld_u64" type="checkbox"><label for="vrshld_u64"><div>uint64_t <b><b>vrshld_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_s8" type="checkbox"><label for="vqrshl_s8"><div>int8x8_t <b><b>vqrshl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_s8" type="checkbox"><label for="vqrshlq_s8"><div>int8x16_t <b><b>vqrshlq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_s16" type="checkbox"><label for="vqrshl_s16"><div>int16x4_t <b><b>vqrshl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_s16" type="checkbox"><label for="vqrshlq_s16"><div>int16x8_t <b><b>vqrshlq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_s32" type="checkbox"><label for="vqrshl_s32"><div>int32x2_t <b><b>vqrshl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_s32" type="checkbox"><label for="vqrshlq_s32"><div>int32x4_t <b><b>vqrshlq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_s64" type="checkbox"><label for="vqrshl_s64"><div>int64x1_t <b><b>vqrshl_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_s64" type="checkbox"><label for="vqrshlq_s64"><div>int64x2_t <b><b>vqrshlq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_u8" type="checkbox"><label for="vqrshl_u8"><div>uint8x8_t <b><b>vqrshl_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_u8" type="checkbox"><label for="vqrshlq_u8"><div>uint8x16_t <b><b>vqrshlq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_u16" type="checkbox"><label for="vqrshl_u16"><div>uint16x4_t <b><b>vqrshl_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_u16" type="checkbox"><label for="vqrshlq_u16"><div>uint16x8_t <b><b>vqrshlq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_u32" type="checkbox"><label for="vqrshl_u32"><div>uint32x2_t <b><b>vqrshl_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_u32" type="checkbox"><label for="vqrshlq_u32"><div>uint32x4_t <b><b>vqrshlq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_u64" type="checkbox"><label for="vqrshl_u64"><div>uint64x1_t <b><b>vqrshl_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_u64" type="checkbox"><label for="vqrshlq_u64"><div>uint64x2_t <b><b>vqrshlq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlb_s8" type="checkbox"><label for="vqrshlb_s8"><div>int8_t <b><b>vqrshlb_s8</b></b> (int8_t a, int8_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Bd,Bn,Bm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
-b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlh_s16" type="checkbox"><label for="vqrshlh_s16"><div>int16_t <b><b>vqrshlh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Hd,Hn,Hm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshls_s32" type="checkbox"><label for="vqrshls_s32"><div>int32_t <b><b>vqrshls_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshld_s64" type="checkbox"><label for="vqrshld_s64"><div>int64_t <b><b>vqrshld_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlb_u8" type="checkbox"><label for="vqrshlb_u8"><div>uint8_t <b><b>vqrshlb_u8</b></b> (uint8_t a, int8_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Bd,Bn,Bm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
-b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlh_u16" type="checkbox"><label for="vqrshlh_u16"><div>uint16_t <b><b>vqrshlh_u16</b></b> (uint16_t a, int16_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Hd,Hn,Hm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshls_u32" type="checkbox"><label for="vqrshls_u32"><div>uint32_t <b><b>vqrshls_u32</b></b> (uint32_t a, int32_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshld_u64" type="checkbox"><label for="vqrshld_u64"><div>uint64_t <b><b>vqrshld_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_s8" type="checkbox"><label for="vshr_n_s8"><div>int8x8_t <b><b>vshr_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_s8" type="checkbox"><label for="vshrq_n_s8"><div>int8x16_t <b><b>vshrq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_s16" type="checkbox"><label for="vshr_n_s16"><div>int16x4_t <b><b>vshr_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_s16" type="checkbox"><label for="vshrq_n_s16"><div>int16x8_t <b><b>vshrq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_s32" type="checkbox"><label for="vshr_n_s32"><div>int32x2_t <b><b>vshr_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_s32" type="checkbox"><label for="vshrq_n_s32"><div>int32x4_t <b><b>vshrq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_s64" type="checkbox"><label for="vshr_n_s64"><div>int64x1_t <b><b>vshr_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_s64" type="checkbox"><label for="vshrq_n_s64"><div>int64x2_t <b><b>vshrq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_u8" type="checkbox"><label for="vshr_n_u8"><div>uint8x8_t <b><b>vshr_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_u8" type="checkbox"><label for="vshrq_n_u8"><div>uint8x16_t <b><b>vshrq_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_u16" type="checkbox"><label for="vshr_n_u16"><div>uint16x4_t <b><b>vshr_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_u16" type="checkbox"><label for="vshrq_n_u16"><div>uint16x8_t <b><b>vshrq_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_u32" type="checkbox"><label for="vshr_n_u32"><div>uint32x2_t <b><b>vshr_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_u32" type="checkbox"><label for="vshrq_n_u32"><div>uint32x4_t <b><b>vshrq_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_u64" type="checkbox"><label for="vshr_n_u64"><div>uint64x1_t <b><b>vshr_n_u64</b></b> (uint64x1_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_u64" type="checkbox"><label for="vshrq_n_u64"><div>uint64x2_t <b><b>vshrq_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrd_n_s64" type="checkbox"><label for="vshrd_n_s64"><div>int64_t <b><b>vshrd_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrd_n_u64" type="checkbox"><label for="vshrd_n_u64"><div>uint64_t <b><b>vshrd_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_s8" type="checkbox"><label for="vshl_n_s8"><div>int8x8_t <b><b>vshl_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_s8" type="checkbox"><label for="vshlq_n_s8"><div>int8x16_t <b><b>vshlq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_s16" type="checkbox"><label for="vshl_n_s16"><div>int16x4_t <b><b>vshl_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_s16" type="checkbox"><label for="vshlq_n_s16"><div>int16x8_t <b><b>vshlq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_s32" type="checkbox"><label for="vshl_n_s32"><div>int32x2_t <b><b>vshl_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_s32" type="checkbox"><label for="vshlq_n_s32"><div>int32x4_t <b><b>vshlq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_s64" type="checkbox"><label for="vshl_n_s64"><div>int64x1_t <b><b>vshl_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_s64" type="checkbox"><label for="vshlq_n_s64"><div>int64x2_t <b><b>vshlq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_u8" type="checkbox"><label for="vshl_n_u8"><div>uint8x8_t <b><b>vshl_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_u8" type="checkbox"><label for="vshlq_n_u8"><div>uint8x16_t <b><b>vshlq_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_u16" type="checkbox"><label for="vshl_n_u16"><div>uint16x4_t <b><b>vshl_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_u16" type="checkbox"><label for="vshlq_n_u16"><div>uint16x8_t <b><b>vshlq_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_u32" type="checkbox"><label for="vshl_n_u32"><div>uint32x2_t <b><b>vshl_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_u32" type="checkbox"><label for="vshlq_n_u32"><div>uint32x4_t <b><b>vshlq_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_u64" type="checkbox"><label for="vshl_n_u64"><div>uint64x1_t <b><b>vshl_n_u64</b></b> (uint64x1_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_u64" type="checkbox"><label for="vshlq_n_u64"><div>uint64x2_t <b><b>vshlq_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshld_n_s64" type="checkbox"><label for="vshld_n_s64"><div>int64_t <b><b>vshld_n_s64</b></b> (int64_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshld_n_u64" type="checkbox"><label for="vshld_n_u64"><div>uint64_t <b><b>vshld_n_u64</b></b> (uint64_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_s8" type="checkbox"><label for="vrshr_n_s8"><div>int8x8_t <b><b>vrshr_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_s8" type="checkbox"><label for="vrshrq_n_s8"><div>int8x16_t <b><b>vrshrq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_s16" type="checkbox"><label for="vrshr_n_s16"><div>int16x4_t <b><b>vrshr_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_s16" type="checkbox"><label for="vrshrq_n_s16"><div>int16x8_t <b><b>vrshrq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_s32" type="checkbox"><label for="vrshr_n_s32"><div>int32x2_t <b><b>vrshr_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_s32" type="checkbox"><label for="vrshrq_n_s32"><div>int32x4_t <b><b>vrshrq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_s64" type="checkbox"><label for="vrshr_n_s64"><div>int64x1_t <b><b>vrshr_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_s64" type="checkbox"><label for="vrshrq_n_s64"><div>int64x2_t <b><b>vrshrq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_u8" type="checkbox"><label for="vrshr_n_u8"><div>uint8x8_t <b><b>vrshr_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_u8" type="checkbox"><label for="vrshrq_n_u8"><div>uint8x16_t <b><b>vrshrq_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_u16" type="checkbox"><label for="vrshr_n_u16"><div>uint16x4_t <b><b>vrshr_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_u16" type="checkbox"><label for="vrshrq_n_u16"><div>uint16x8_t <b><b>vrshrq_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_u32" type="checkbox"><label for="vrshr_n_u32"><div>uint32x2_t <b><b>vrshr_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_u32" type="checkbox"><label for="vrshrq_n_u32"><div>uint32x4_t <b><b>vrshrq_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_u64" type="checkbox"><label for="vrshr_n_u64"><div>uint64x1_t <b><b>vrshr_n_u64</b></b> (uint64x1_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_u64" type="checkbox"><label for="vrshrq_n_u64"><div>uint64x2_t <b><b>vrshrq_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrd_n_s64" type="checkbox"><label for="vrshrd_n_s64"><div>int64_t <b><b>vrshrd_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrd_n_u64" type="checkbox"><label for="vrshrd_n_u64"><div>uint64_t <b><b>vrshrd_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_s8" type="checkbox"><label for="vsra_n_s8"><div>int8x8_t <b><b>vsra_n_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_s8" type="checkbox"><label for="vsraq_n_s8"><div>int8x16_t <b><b>vsraq_n_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_s16" type="checkbox"><label for="vsra_n_s16"><div>int16x4_t <b><b>vsra_n_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_s16" type="checkbox"><label for="vsraq_n_s16"><div>int16x8_t <b><b>vsraq_n_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_s32" type="checkbox"><label for="vsra_n_s32"><div>int32x2_t <b><b>vsra_n_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_s32" type="checkbox"><label for="vsraq_n_s32"><div>int32x4_t <b><b>vsraq_n_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_s64" type="checkbox"><label for="vsra_n_s64"><div>int64x1_t <b><b>vsra_n_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_s64" type="checkbox"><label for="vsraq_n_s64"><div>int64x2_t <b><b>vsraq_n_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_u8" type="checkbox"><label for="vsra_n_u8"><div>uint8x8_t <b><b>vsra_n_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_u8" type="checkbox"><label for="vsraq_n_u8"><div>uint8x16_t <b><b>vsraq_n_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_u16" type="checkbox"><label for="vsra_n_u16"><div>uint16x4_t <b><b>vsra_n_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_u16" type="checkbox"><label for="vsraq_n_u16"><div>uint16x8_t <b><b>vsraq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_u32" type="checkbox"><label for="vsra_n_u32"><div>uint32x2_t <b><b>vsra_n_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_u32" type="checkbox"><label for="vsraq_n_u32"><div>uint32x4_t <b><b>vsraq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_u64" type="checkbox"><label for="vsra_n_u64"><div>uint64x1_t <b><b>vsra_n_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_u64" type="checkbox"><label for="vsraq_n_u64"><div>uint64x2_t <b><b>vsraq_n_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsrad_n_s64" type="checkbox"><label for="vsrad_n_s64"><div>int64_t <b><b>vsrad_n_s64</b></b> (int64_t a, int64_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsrad_n_u64" type="checkbox"><label for="vsrad_n_u64"><div>uint64_t <b><b>vsrad_n_u64</b></b> (uint64_t a, uint64_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_s8" type="checkbox"><label for="vrsra_n_s8"><div>int8x8_t <b><b>vrsra_n_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_s8" type="checkbox"><label for="vrsraq_n_s8"><div>int8x16_t <b><b>vrsraq_n_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_s16" type="checkbox"><label for="vrsra_n_s16"><div>int16x4_t <b><b>vrsra_n_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_s16" type="checkbox"><label for="vrsraq_n_s16"><div>int16x8_t <b><b>vrsraq_n_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_s32" type="checkbox"><label for="vrsra_n_s32"><div>int32x2_t <b><b>vrsra_n_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_s32" type="checkbox"><label for="vrsraq_n_s32"><div>int32x4_t <b><b>vrsraq_n_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_s64" type="checkbox"><label for="vrsra_n_s64"><div>int64x1_t <b><b>vrsra_n_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_s64" type="checkbox"><label for="vrsraq_n_s64"><div>int64x2_t <b><b>vrsraq_n_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_u8" type="checkbox"><label for="vrsra_n_u8"><div>uint8x8_t <b><b>vrsra_n_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_u8" type="checkbox"><label for="vrsraq_n_u8"><div>uint8x16_t <b><b>vrsraq_n_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_u16" type="checkbox"><label for="vrsra_n_u16"><div>uint16x4_t <b><b>vrsra_n_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_u16" type="checkbox"><label for="vrsraq_n_u16"><div>uint16x8_t <b><b>vrsraq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_u32" type="checkbox"><label for="vrsra_n_u32"><div>uint32x2_t <b><b>vrsra_n_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_u32" type="checkbox"><label for="vrsraq_n_u32"><div>uint32x4_t <b><b>vrsraq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_u64" type="checkbox"><label for="vrsra_n_u64"><div>uint64x1_t <b><b>vrsra_n_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_u64" type="checkbox"><label for="vrsraq_n_u64"><div>uint64x2_t <b><b>vrsraq_n_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsrad_n_s64" type="checkbox"><label for="vrsrad_n_s64"><div>int64_t <b><b>vrsrad_n_s64</b></b> (int64_t a, int64_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsrad_n_u64" type="checkbox"><label for="vrsrad_n_u64"><div>uint64_t <b><b>vrsrad_n_u64</b></b> (uint64_t a, uint64_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2;
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_s8" type="checkbox"><label for="vqshl_n_s8"><div>int8x8_t <b><b>vqshl_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_s8" type="checkbox"><label for="vqshlq_n_s8"><div>int8x16_t <b><b>vqshlq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_s16" type="checkbox"><label for="vqshl_n_s16"><div>int16x4_t <b><b>vqshl_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_s16" type="checkbox"><label for="vqshlq_n_s16"><div>int16x8_t <b><b>vqshlq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_s32" type="checkbox"><label for="vqshl_n_s32"><div>int32x2_t <b><b>vqshl_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_s32" type="checkbox"><label for="vqshlq_n_s32"><div>int32x4_t <b><b>vqshlq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_s64" type="checkbox"><label for="vqshl_n_s64"><div>int64x1_t <b><b>vqshl_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_s64" type="checkbox"><label for="vqshlq_n_s64"><div>int64x2_t <b><b>vqshlq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_u8" type="checkbox"><label for="vqshl_n_u8"><div>uint8x8_t <b><b>vqshl_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_u8" type="checkbox"><label for="vqshlq_n_u8"><div>uint8x16_t <b><b>vqshlq_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_u16" type="checkbox"><label for="vqshl_n_u16"><div>uint16x4_t <b><b>vqshl_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_u16" type="checkbox"><label for="vqshlq_n_u16"><div>uint16x8_t <b><b>vqshlq_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_u32" type="checkbox"><label for="vqshl_n_u32"><div>uint32x2_t <b><b>vqshl_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_u32" type="checkbox"><label for="vqshlq_n_u32"><div>uint32x4_t <b><b>vqshlq_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_u64" type="checkbox"><label for="vqshl_n_u64"><div>uint64x1_t <b><b>vqshl_n_u64</b></b> (uint64x1_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_u64" type="checkbox"><label for="vqshlq_n_u64"><div>uint64x2_t <b><b>vqshlq_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlb_n_s8" type="checkbox"><label for="vqshlb_n_s8"><div>int8_t <b><b>vqshlb_n_s8</b></b> (int8_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Bd,Bn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlh_n_s16" type="checkbox"><label for="vqshlh_n_s16"><div>int16_t <b><b>vqshlh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Hd,Hn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshls_n_s32" type="checkbox"><label for="vqshls_n_s32"><div>int32_t <b><b>vqshls_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Sd,Sn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshld_n_s64" type="checkbox"><label for="vqshld_n_s64"><div>int64_t <b><b>vqshld_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlb_n_u8" type="checkbox"><label for="vqshlb_n_u8"><div>uint8_t <b><b>vqshlb_n_u8</b></b> (uint8_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Bd,Bn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlh_n_u16" type="checkbox"><label for="vqshlh_n_u16"><div>uint16_t <b><b>vqshlh_n_u16</b></b> (uint16_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Hd,Hn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshls_n_u32" type="checkbox"><label for="vqshls_n_u32"><div>uint32_t <b><b>vqshls_n_u32</b></b> (uint32_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Sd,Sn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshld_n_u64" type="checkbox"><label for="vqshld_n_u64"><div>uint64_t <b><b>vqshld_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer round_const = 0;
-integer shift;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
-    if rounding then
-        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
-    if saturating then
-        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-        if sat then FPSR.QC = '1';
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlu_n_s8" type="checkbox"><label for="vqshlu_n_s8"><div>uint8x8_t <b><b>vqshlu_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshluq_n_s8" type="checkbox"><label for="vqshluq_n_s8"><div>uint8x16_t <b><b>vqshluq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlu_n_s16" type="checkbox"><label for="vqshlu_n_s16"><div>uint16x4_t <b><b>vqshlu_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshluq_n_s16" type="checkbox"><label for="vqshluq_n_s16"><div>uint16x8_t <b><b>vqshluq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlu_n_s32" type="checkbox"><label for="vqshlu_n_s32"><div>uint32x2_t <b><b>vqshlu_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshluq_n_s32" type="checkbox"><label for="vqshluq_n_s32"><div>uint32x4_t <b><b>vqshluq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlu_n_s64" type="checkbox"><label for="vqshlu_n_s64"><div>uint64x1_t <b><b>vqshlu_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshluq_n_s64" type="checkbox"><label for="vqshluq_n_s64"><div>uint64x2_t <b><b>vqshluq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlub_n_s8" type="checkbox"><label for="vqshlub_n_s8"><div>uint8_t <b><b>vqshlub_n_s8</b></b> (int8_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Bd,Bn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshluh_n_s16" type="checkbox"><label for="vqshluh_n_s16"><div>uint16_t <b><b>vqshluh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Hd,Hn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlus_n_s32" type="checkbox"><label for="vqshlus_n_s32"><div>uint32_t <b><b>vqshlus_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Sd,Sn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlud_n_s64" type="checkbox"><label for="vqshlud_n_s64"><div>uint64_t <b><b>vqshlud_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_s16" type="checkbox"><label for="vshrn_n_s16"><div>int8x8_t <b><b>vshrn_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.8B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_s32" type="checkbox"><label for="vshrn_n_s32"><div>int16x4_t <b><b>vshrn_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.4H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_s64" type="checkbox"><label for="vshrn_n_s64"><div>int32x2_t <b><b>vshrn_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.2S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_u16" type="checkbox"><label for="vshrn_n_u16"><div>uint8x8_t <b><b>vshrn_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.8B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_u32" type="checkbox"><label for="vshrn_n_u32"><div>uint16x4_t <b><b>vshrn_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.4H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_u64" type="checkbox"><label for="vshrn_n_u64"><div>uint32x2_t <b><b>vshrn_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.2S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_s16" type="checkbox"><label for="vshrn_high_n_s16"><div>int8x16_t <b><b>vshrn_high_n_s16</b></b> (int8x8_t r, int16x8_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.16B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_s32" type="checkbox"><label for="vshrn_high_n_s32"><div>int16x8_t <b><b>vshrn_high_n_s32</b></b> (int16x4_t r, int32x4_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.8H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_s64" type="checkbox"><label for="vshrn_high_n_s64"><div>int32x4_t <b><b>vshrn_high_n_s64</b></b> (int32x2_t r, int64x2_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.4S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_u16" type="checkbox"><label for="vshrn_high_n_u16"><div>uint8x16_t <b><b>vshrn_high_n_u16</b></b> (uint8x8_t r, uint16x8_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.16B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_u32" type="checkbox"><label for="vshrn_high_n_u32"><div>uint16x8_t <b><b>vshrn_high_n_u32</b></b> (uint16x4_t r, uint32x4_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.8H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_u64" type="checkbox"><label for="vshrn_high_n_u64"><div>uint32x4_t <b><b>vshrn_high_n_u64</b></b> (uint32x2_t r, uint64x2_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.4S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_n_s16" type="checkbox"><label for="vqshrun_n_s16"><div>uint8x8_t <b><b>vqshrun_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Vd.8B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_n_s32" type="checkbox"><label for="vqshrun_n_s32"><div>uint16x4_t <b><b>vqshrun_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Vd.4H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_n_s64" type="checkbox"><label for="vqshrun_n_s64"><div>uint32x2_t <b><b>vqshrun_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Vd.2S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrunh_n_s16" type="checkbox"><label for="vqshrunh_n_s16"><div>uint8_t <b><b>vqshrunh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Bd,Hn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshruns_n_s32" type="checkbox"><label for="vqshruns_n_s32"><div>uint16_t <b><b>vqshruns_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Hd,Sn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrund_n_s64" type="checkbox"><label for="vqshrund_n_s64"><div>uint32_t <b><b>vqshrund_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Sd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_high_n_s16" type="checkbox"><label for="vqshrun_high_n_s16"><div>uint8x16_t <b><b>vqshrun_high_n_s16</b></b> (uint8x8_t r, int16x8_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN2</a> Vd.16B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_high_n_s32" type="checkbox"><label for="vqshrun_high_n_s32"><div>uint16x8_t <b><b>vqshrun_high_n_s32</b></b> (uint16x4_t r, int32x4_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN2</a> Vd.8H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_high_n_s64" type="checkbox"><label for="vqshrun_high_n_s64"><div>uint32x4_t <b><b>vqshrun_high_n_s64</b></b> (uint32x2_t r, int64x2_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN2</a> Vd.4S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_n_s16" type="checkbox"><label for="vqrshrun_n_s16"><div>uint8x8_t <b><b>vqrshrun_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Vd.8B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_n_s32" type="checkbox"><label for="vqrshrun_n_s32"><div>uint16x4_t <b><b>vqrshrun_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Vd.4H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_n_s64" type="checkbox"><label for="vqrshrun_n_s64"><div>uint32x2_t <b><b>vqrshrun_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Vd.2S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrunh_n_s16" type="checkbox"><label for="vqrshrunh_n_s16"><div>uint8_t <b><b>vqrshrunh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Bd,Hn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshruns_n_s32" type="checkbox"><label for="vqrshruns_n_s32"><div>uint16_t <b><b>vqrshruns_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Hd,Sn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrund_n_s64" type="checkbox"><label for="vqrshrund_n_s64"><div>uint32_t <b><b>vqrshrund_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Sd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_high_n_s16" type="checkbox"><label for="vqrshrun_high_n_s16"><div>uint8x16_t <b><b>vqrshrun_high_n_s16</b></b> (uint8x8_t r, int16x8_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN2</a> Vd.16B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_high_n_s32" type="checkbox"><label for="vqrshrun_high_n_s32"><div>uint16x8_t <b><b>vqrshrun_high_n_s32</b></b> (uint16x4_t r, int32x4_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN2</a> Vd.8H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_high_n_s64" type="checkbox"><label for="vqrshrun_high_n_s64"><div>uint32x4_t <b><b>vqrshrun_high_n_s64</b></b> (uint32x2_t r, int64x2_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN2</a> Vd.4S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_s16" type="checkbox"><label for="vqshrn_n_s16"><div>int8x8_t <b><b>vqshrn_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Vd.8B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_s32" type="checkbox"><label for="vqshrn_n_s32"><div>int16x4_t <b><b>vqshrn_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Vd.4H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_s64" type="checkbox"><label for="vqshrn_n_s64"><div>int32x2_t <b><b>vqshrn_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Vd.2S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_u16" type="checkbox"><label for="vqshrn_n_u16"><div>uint8x8_t <b><b>vqshrn_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Vd.8B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_u32" type="checkbox"><label for="vqshrn_n_u32"><div>uint16x4_t <b><b>vqshrn_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Vd.4H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_u64" type="checkbox"><label for="vqshrn_n_u64"><div>uint32x2_t <b><b>vqshrn_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Vd.2S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrnh_n_s16" type="checkbox"><label for="vqshrnh_n_s16"><div>int8_t <b><b>vqshrnh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Bd,Hn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrns_n_s32" type="checkbox"><label for="vqshrns_n_s32"><div>int16_t <b><b>vqshrns_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Hd,Sn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrnd_n_s64" type="checkbox"><label for="vqshrnd_n_s64"><div>int32_t <b><b>vqshrnd_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Sd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrnh_n_u16" type="checkbox"><label for="vqshrnh_n_u16"><div>uint8_t <b><b>vqshrnh_n_u16</b></b> (uint16_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Bd,Hn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrns_n_u32" type="checkbox"><label for="vqshrns_n_u32"><div>uint16_t <b><b>vqshrns_n_u32</b></b> (uint32_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Hd,Sn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrnd_n_u64" type="checkbox"><label for="vqshrnd_n_u64"><div>uint32_t <b><b>vqshrnd_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Sd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_s16" type="checkbox"><label for="vqshrn_high_n_s16"><div>int8x16_t <b><b>vqshrn_high_n_s16</b></b> (int8x8_t r, int16x8_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN2</a> Vd.16B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_s32" type="checkbox"><label for="vqshrn_high_n_s32"><div>int16x8_t <b><b>vqshrn_high_n_s32</b></b> (int16x4_t r, int32x4_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN2</a> Vd.8H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_s64" type="checkbox"><label for="vqshrn_high_n_s64"><div>int32x4_t <b><b>vqshrn_high_n_s64</b></b> (int32x2_t r, int64x2_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN2</a> Vd.4S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_u16" type="checkbox"><label for="vqshrn_high_n_u16"><div>uint8x16_t <b><b>vqshrn_high_n_u16</b></b> (uint8x8_t r, uint16x8_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN2</a> Vd.16B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_u32" type="checkbox"><label for="vqshrn_high_n_u32"><div>uint16x8_t <b><b>vqshrn_high_n_u32</b></b> (uint16x4_t r, uint32x4_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN2</a> Vd.8H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_u64" type="checkbox"><label for="vqshrn_high_n_u64"><div>uint32x4_t <b><b>vqshrn_high_n_u64</b></b> (uint32x2_t r, uint64x2_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN2</a> Vd.4S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_s16" type="checkbox"><label for="vrshrn_n_s16"><div>int8x8_t <b><b>vrshrn_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.8B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_s32" type="checkbox"><label for="vrshrn_n_s32"><div>int16x4_t <b><b>vrshrn_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.4H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_s64" type="checkbox"><label for="vrshrn_n_s64"><div>int32x2_t <b><b>vrshrn_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.2S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_u16" type="checkbox"><label for="vrshrn_n_u16"><div>uint8x8_t <b><b>vrshrn_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.8B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_u32" type="checkbox"><label for="vrshrn_n_u32"><div>uint16x4_t <b><b>vrshrn_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.4H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_u64" type="checkbox"><label for="vrshrn_n_u64"><div>uint32x2_t <b><b>vrshrn_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.2S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_s16" type="checkbox"><label for="vrshrn_high_n_s16"><div>int8x16_t <b><b>vrshrn_high_n_s16</b></b> (int8x8_t r, int16x8_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.16B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_s32" type="checkbox"><label for="vrshrn_high_n_s32"><div>int16x8_t <b><b>vrshrn_high_n_s32</b></b> (int16x4_t r, int32x4_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.8H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_s64" type="checkbox"><label for="vrshrn_high_n_s64"><div>int32x4_t <b><b>vrshrn_high_n_s64</b></b> (int32x2_t r, int64x2_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.4S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_u16" type="checkbox"><label for="vrshrn_high_n_u16"><div>uint8x16_t <b><b>vrshrn_high_n_u16</b></b> (uint8x8_t r, uint16x8_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.16B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_u32" type="checkbox"><label for="vrshrn_high_n_u32"><div>uint16x8_t <b><b>vrshrn_high_n_u32</b></b> (uint16x4_t r, uint32x4_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.8H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_u64" type="checkbox"><label for="vrshrn_high_n_u64"><div>uint32x4_t <b><b>vrshrn_high_n_u64</b></b> (uint32x2_t r, uint64x2_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.4S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; 32(Vd) <br />
-a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_s16" type="checkbox"><label for="vqrshrn_n_s16"><div>int8x8_t <b><b>vqrshrn_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Vd.8B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_s32" type="checkbox"><label for="vqrshrn_n_s32"><div>int16x4_t <b><b>vqrshrn_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Vd.4H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_s64" type="checkbox"><label for="vqrshrn_n_s64"><div>int32x2_t <b><b>vqrshrn_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Vd.2S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_u16" type="checkbox"><label for="vqrshrn_n_u16"><div>uint8x8_t <b><b>vqrshrn_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Vd.8B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_u32" type="checkbox"><label for="vqrshrn_n_u32"><div>uint16x4_t <b><b>vqrshrn_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Vd.4H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_u64" type="checkbox"><label for="vqrshrn_n_u64"><div>uint32x2_t <b><b>vqrshrn_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Vd.2S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrnh_n_s16" type="checkbox"><label for="vqrshrnh_n_s16"><div>int8_t <b><b>vqrshrnh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Bd,Hn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrns_n_s32" type="checkbox"><label for="vqrshrns_n_s32"><div>int16_t <b><b>vqrshrns_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Hd,Sn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrnd_n_s64" type="checkbox"><label for="vqrshrnd_n_s64"><div>int32_t <b><b>vqrshrnd_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Sd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrnh_n_u16" type="checkbox"><label for="vqrshrnh_n_u16"><div>uint8_t <b><b>vqrshrnh_n_u16</b></b> (uint16_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Bd,Hn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrns_n_u32" type="checkbox"><label for="vqrshrns_n_u32"><div>uint16_t <b><b>vqrshrns_n_u32</b></b> (uint32_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Hd,Sn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrnd_n_u64" type="checkbox"><label for="vqrshrnd_n_u64"><div>uint32_t <b><b>vqrshrnd_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Sd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_s16" type="checkbox"><label for="vqrshrn_high_n_s16"><div>int8x16_t <b><b>vqrshrn_high_n_s16</b></b> (int8x8_t r, int16x8_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN2</a> Vd.16B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_s32" type="checkbox"><label for="vqrshrn_high_n_s32"><div>int16x8_t <b><b>vqrshrn_high_n_s32</b></b> (int16x4_t r, int32x4_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN2</a> Vd.8H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_s64" type="checkbox"><label for="vqrshrn_high_n_s64"><div>int32x4_t <b><b>vqrshrn_high_n_s64</b></b> (int32x2_t r, int64x2_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN2</a> Vd.4S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_u16" type="checkbox"><label for="vqrshrn_high_n_u16"><div>uint8x16_t <b><b>vqrshrn_high_n_u16</b></b> (uint8x8_t r, uint16x8_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN2</a> Vd.16B,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_u32" type="checkbox"><label for="vqrshrn_high_n_u32"><div>uint16x8_t <b><b>vqrshrn_high_n_u32</b></b> (uint16x4_t r, uint32x4_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN2</a> Vd.8H,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_u64" type="checkbox"><label for="vqrshrn_high_n_u64"><div>uint32x4_t <b><b>vqrshrn_high_n_u64</b></b> (uint32x2_t r, uint64x2_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN2</a> Vd.4S,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s8" type="checkbox"><label for="vshll_n_s8"><div>int16x8_t <b><b>vshll_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.8H,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s16" type="checkbox"><label for="vshll_n_s16"><div>int32x4_t <b><b>vshll_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.4S,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s32" type="checkbox"><label for="vshll_n_s32"><div>int64x2_t <b><b>vshll_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.2D,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u8" type="checkbox"><label for="vshll_n_u8"><div>uint16x8_t <b><b>vshll_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.8H,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u16" type="checkbox"><label for="vshll_n_u16"><div>uint32x4_t <b><b>vshll_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.4S,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u32" type="checkbox"><label for="vshll_n_u32"><div>uint64x2_t <b><b>vshll_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.2D,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s8" type="checkbox"><label for="vshll_high_n_s8"><div>int16x8_t <b><b>vshll_high_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.8H,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s16" type="checkbox"><label for="vshll_high_n_s16"><div>int32x4_t <b><b>vshll_high_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.4S,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s32" type="checkbox"><label for="vshll_high_n_s32"><div>int64x2_t <b><b>vshll_high_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.2D,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u8" type="checkbox"><label for="vshll_high_n_u8"><div>uint16x8_t <b><b>vshll_high_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.8H,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u16" type="checkbox"><label for="vshll_high_n_u16"><div>uint32x4_t <b><b>vshll_high_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.4S,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u32" type="checkbox"><label for="vshll_high_n_u32"><div>uint64x2_t <b><b>vshll_high_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.2D,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s8" type="checkbox"><label for="vshll_n_s8"><div>int16x8_t <b><b>vshll_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.8H,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-8 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s16" type="checkbox"><label for="vshll_n_s16"><div>int32x4_t <b><b>vshll_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.4S,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-16 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s32" type="checkbox"><label for="vshll_n_s32"><div>int64x2_t <b><b>vshll_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.2D,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-32 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u8" type="checkbox"><label for="vshll_n_u8"><div>uint16x8_t <b><b>vshll_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.8H,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-8 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u16" type="checkbox"><label for="vshll_n_u16"><div>uint32x4_t <b><b>vshll_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.4S,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-16 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u32" type="checkbox"><label for="vshll_n_u32"><div>uint64x2_t <b><b>vshll_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.2D,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-32 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s8" type="checkbox"><label for="vshll_high_n_s8"><div>int16x8_t <b><b>vshll_high_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.8H,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-8 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s16" type="checkbox"><label for="vshll_high_n_s16"><div>int32x4_t <b><b>vshll_high_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.4S,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-16 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s32" type="checkbox"><label for="vshll_high_n_s32"><div>int64x2_t <b><b>vshll_high_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.2D,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-32 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u8" type="checkbox"><label for="vshll_high_n_u8"><div>uint16x8_t <b><b>vshll_high_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.8H,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-8 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u16" type="checkbox"><label for="vshll_high_n_u16"><div>uint32x4_t <b><b>vshll_high_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.4S,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-16 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u32" type="checkbox"><label for="vshll_high_n_u32"><div>uint64x2_t <b><b>vshll_high_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.2D,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-32 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_s8" type="checkbox"><label for="vsri_n_s8"><div>int8x8_t <b><b>vsri_n_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_s8" type="checkbox"><label for="vsriq_n_s8"><div>int8x16_t <b><b>vsriq_n_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_s16" type="checkbox"><label for="vsri_n_s16"><div>int16x4_t <b><b>vsri_n_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_s16" type="checkbox"><label for="vsriq_n_s16"><div>int16x8_t <b><b>vsriq_n_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_s32" type="checkbox"><label for="vsri_n_s32"><div>int32x2_t <b><b>vsri_n_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_s32" type="checkbox"><label for="vsriq_n_s32"><div>int32x4_t <b><b>vsriq_n_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_s64" type="checkbox"><label for="vsri_n_s64"><div>int64x1_t <b><b>vsri_n_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_s64" type="checkbox"><label for="vsriq_n_s64"><div>int64x2_t <b><b>vsriq_n_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_u8" type="checkbox"><label for="vsri_n_u8"><div>uint8x8_t <b><b>vsri_n_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_u8" type="checkbox"><label for="vsriq_n_u8"><div>uint8x16_t <b><b>vsriq_n_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_u16" type="checkbox"><label for="vsri_n_u16"><div>uint16x4_t <b><b>vsri_n_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_u16" type="checkbox"><label for="vsriq_n_u16"><div>uint16x8_t <b><b>vsriq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_u32" type="checkbox"><label for="vsri_n_u32"><div>uint32x2_t <b><b>vsri_n_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_u32" type="checkbox"><label for="vsriq_n_u32"><div>uint32x4_t <b><b>vsriq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_u64" type="checkbox"><label for="vsri_n_u64"><div>uint64x1_t <b><b>vsri_n_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_u64" type="checkbox"><label for="vsriq_n_u64"><div>uint64x2_t <b><b>vsriq_n_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_p64" type="checkbox"><label for="vsri_n_p64"><div>poly64x1_t <b><b>vsri_n_p64</b></b> (poly64x1_t a, poly64x1_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_p64" type="checkbox"><label for="vsriq_n_p64"><div>poly64x2_t <b><b>vsriq_n_p64</b></b> (poly64x2_t a, poly64x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_p8" type="checkbox"><label for="vsri_n_p8"><div>poly8x8_t <b><b>vsri_n_p8</b></b> (poly8x8_t a, poly8x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_p8" type="checkbox"><label for="vsriq_n_p8"><div>poly8x16_t <b><b>vsriq_n_p8</b></b> (poly8x16_t a, poly8x16_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_p16" type="checkbox"><label for="vsri_n_p16"><div>poly16x4_t <b><b>vsri_n_p16</b></b> (poly16x4_t a, poly16x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_p16" type="checkbox"><label for="vsriq_n_p16"><div>poly16x8_t <b><b>vsriq_n_p16</b></b> (poly16x8_t a, poly16x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsrid_n_s64" type="checkbox"><label for="vsrid_n_s64"><div>int64_t <b><b>vsrid_n_s64</b></b> (int64_t a, int64_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsrid_n_u64" type="checkbox"><label for="vsrid_n_u64"><div>uint64_t <b><b>vsrid_n_u64</b></b> (uint64_t a, uint64_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_s8" type="checkbox"><label for="vsli_n_s8"><div>int8x8_t <b><b>vsli_n_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_s8" type="checkbox"><label for="vsliq_n_s8"><div>int8x16_t <b><b>vsliq_n_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_s16" type="checkbox"><label for="vsli_n_s16"><div>int16x4_t <b><b>vsli_n_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_s16" type="checkbox"><label for="vsliq_n_s16"><div>int16x8_t <b><b>vsliq_n_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_s32" type="checkbox"><label for="vsli_n_s32"><div>int32x2_t <b><b>vsli_n_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_s32" type="checkbox"><label for="vsliq_n_s32"><div>int32x4_t <b><b>vsliq_n_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_s64" type="checkbox"><label for="vsli_n_s64"><div>int64x1_t <b><b>vsli_n_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_s64" type="checkbox"><label for="vsliq_n_s64"><div>int64x2_t <b><b>vsliq_n_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_u8" type="checkbox"><label for="vsli_n_u8"><div>uint8x8_t <b><b>vsli_n_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_u8" type="checkbox"><label for="vsliq_n_u8"><div>uint8x16_t <b><b>vsliq_n_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_u16" type="checkbox"><label for="vsli_n_u16"><div>uint16x4_t <b><b>vsli_n_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_u16" type="checkbox"><label for="vsliq_n_u16"><div>uint16x8_t <b><b>vsliq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_u32" type="checkbox"><label for="vsli_n_u32"><div>uint32x2_t <b><b>vsli_n_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_u32" type="checkbox"><label for="vsliq_n_u32"><div>uint32x4_t <b><b>vsliq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_u64" type="checkbox"><label for="vsli_n_u64"><div>uint64x1_t <b><b>vsli_n_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_u64" type="checkbox"><label for="vsliq_n_u64"><div>uint64x2_t <b><b>vsliq_n_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_p64" type="checkbox"><label for="vsli_n_p64"><div>poly64x1_t <b><b>vsli_n_p64</b></b> (poly64x1_t a, poly64x1_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_p64" type="checkbox"><label for="vsliq_n_p64"><div>poly64x2_t <b><b>vsliq_n_p64</b></b> (poly64x2_t a, poly64x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_p8" type="checkbox"><label for="vsli_n_p8"><div>poly8x8_t <b><b>vsli_n_p8</b></b> (poly8x8_t a, poly8x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8B,Vn.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_p8" type="checkbox"><label for="vsliq_n_p8"><div>poly8x16_t <b><b>vsliq_n_p8</b></b> (poly8x16_t a, poly8x16_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.16B,Vn.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_p16" type="checkbox"><label for="vsli_n_p16"><div>poly16x4_t <b><b>vsli_n_p16</b></b> (poly16x4_t a, poly16x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4H,Vn.4H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_p16" type="checkbox"><label for="vsliq_n_p16"><div>poly16x8_t <b><b>vsliq_n_p16</b></b> (poly16x8_t a, poly16x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8H,Vn.8H,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vslid_n_s64" type="checkbox"><label for="vslid_n_s64"><div>int64_t <b><b>vslid_n_s64</b></b> (int64_t a, int64_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vslid_n_u64" type="checkbox"><label for="vslid_n_u64"><div>uint64_t <b><b>vslid_n_u64</b></b> (uint64_t a, uint64_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Dn <br />
-0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
-bits(esize) shifted;
-
-for e = 0 to elements-1
-    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_s32_f32" type="checkbox"><label for="vcvt_s32_f32"><div>int32x2_t <b><b>vcvt_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_s32_f32" type="checkbox"><label for="vcvtq_s32_f32"><div>int32x4_t <b><b>vcvtq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_u32_f32" type="checkbox"><label for="vcvt_u32_f32"><div>uint32x2_t <b><b>vcvt_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_u32_f32" type="checkbox"><label for="vcvtq_u32_f32"><div>uint32x4_t <b><b>vcvtq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtn_s32_f32" type="checkbox"><label for="vcvtn_s32_f32"><div>int32x2_t <b><b>vcvtn_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnq_s32_f32" type="checkbox"><label for="vcvtnq_s32_f32"><div>int32x4_t <b><b>vcvtnq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtn_u32_f32" type="checkbox"><label for="vcvtn_u32_f32"><div>uint32x2_t <b><b>vcvtn_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnq_u32_f32" type="checkbox"><label for="vcvtnq_u32_f32"><div>uint32x4_t <b><b>vcvtnq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtm_s32_f32" type="checkbox"><label for="vcvtm_s32_f32"><div>int32x2_t <b><b>vcvtm_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmq_s32_f32" type="checkbox"><label for="vcvtmq_s32_f32"><div>int32x4_t <b><b>vcvtmq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtm_u32_f32" type="checkbox"><label for="vcvtm_u32_f32"><div>uint32x2_t <b><b>vcvtm_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmq_u32_f32" type="checkbox"><label for="vcvtmq_u32_f32"><div>uint32x4_t <b><b>vcvtmq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtp_s32_f32" type="checkbox"><label for="vcvtp_s32_f32"><div>int32x2_t <b><b>vcvtp_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpq_s32_f32" type="checkbox"><label for="vcvtpq_s32_f32"><div>int32x4_t <b><b>vcvtpq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtp_u32_f32" type="checkbox"><label for="vcvtp_u32_f32"><div>uint32x2_t <b><b>vcvtp_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpq_u32_f32" type="checkbox"><label for="vcvtpq_u32_f32"><div>uint32x4_t <b><b>vcvtpq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvta_s32_f32" type="checkbox"><label for="vcvta_s32_f32"><div>int32x2_t <b><b>vcvta_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtaq_s32_f32" type="checkbox"><label for="vcvtaq_s32_f32"><div>int32x4_t <b><b>vcvtaq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvta_u32_f32" type="checkbox"><label for="vcvta_u32_f32"><div>uint32x2_t <b><b>vcvta_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtaq_u32_f32" type="checkbox"><label for="vcvtaq_u32_f32"><div>uint32x4_t <b><b>vcvtaq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_s32_f32" type="checkbox"><label for="vcvts_s32_f32"><div>int32_t <b><b>vcvts_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_u32_f32" type="checkbox"><label for="vcvts_u32_f32"><div>uint32_t <b><b>vcvts_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtns_s32_f32" type="checkbox"><label for="vcvtns_s32_f32"><div>int32_t <b><b>vcvtns_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtns_u32_f32" type="checkbox"><label for="vcvtns_u32_f32"><div>uint32_t <b><b>vcvtns_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtms_s32_f32" type="checkbox"><label for="vcvtms_s32_f32"><div>int32_t <b><b>vcvtms_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtms_u32_f32" type="checkbox"><label for="vcvtms_u32_f32"><div>uint32_t <b><b>vcvtms_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtps_s32_f32" type="checkbox"><label for="vcvtps_s32_f32"><div>int32_t <b><b>vcvtps_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtps_u32_f32" type="checkbox"><label for="vcvtps_u32_f32"><div>uint32_t <b><b>vcvtps_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtas_s32_f32" type="checkbox"><label for="vcvtas_s32_f32"><div>int32_t <b><b>vcvtas_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtas_u32_f32" type="checkbox"><label for="vcvtas_u32_f32"><div>uint32_t <b><b>vcvtas_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_s64_f64" type="checkbox"><label for="vcvt_s64_f64"><div>int64x1_t <b><b>vcvt_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_s64_f64" type="checkbox"><label for="vcvtq_s64_f64"><div>int64x2_t <b><b>vcvtq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_u64_f64" type="checkbox"><label for="vcvt_u64_f64"><div>uint64x1_t <b><b>vcvt_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_u64_f64" type="checkbox"><label for="vcvtq_u64_f64"><div>uint64x2_t <b><b>vcvtq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtn_s64_f64" type="checkbox"><label for="vcvtn_s64_f64"><div>int64x1_t <b><b>vcvtn_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnq_s64_f64" type="checkbox"><label for="vcvtnq_s64_f64"><div>int64x2_t <b><b>vcvtnq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtn_u64_f64" type="checkbox"><label for="vcvtn_u64_f64"><div>uint64x1_t <b><b>vcvtn_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnq_u64_f64" type="checkbox"><label for="vcvtnq_u64_f64"><div>uint64x2_t <b><b>vcvtnq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtm_s64_f64" type="checkbox"><label for="vcvtm_s64_f64"><div>int64x1_t <b><b>vcvtm_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmq_s64_f64" type="checkbox"><label for="vcvtmq_s64_f64"><div>int64x2_t <b><b>vcvtmq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtm_u64_f64" type="checkbox"><label for="vcvtm_u64_f64"><div>uint64x1_t <b><b>vcvtm_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmq_u64_f64" type="checkbox"><label for="vcvtmq_u64_f64"><div>uint64x2_t <b><b>vcvtmq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtp_s64_f64" type="checkbox"><label for="vcvtp_s64_f64"><div>int64x1_t <b><b>vcvtp_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpq_s64_f64" type="checkbox"><label for="vcvtpq_s64_f64"><div>int64x2_t <b><b>vcvtpq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtp_u64_f64" type="checkbox"><label for="vcvtp_u64_f64"><div>uint64x1_t <b><b>vcvtp_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpq_u64_f64" type="checkbox"><label for="vcvtpq_u64_f64"><div>uint64x2_t <b><b>vcvtpq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvta_s64_f64" type="checkbox"><label for="vcvta_s64_f64"><div>int64x1_t <b><b>vcvta_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtaq_s64_f64" type="checkbox"><label for="vcvtaq_s64_f64"><div>int64x2_t <b><b>vcvtaq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvta_u64_f64" type="checkbox"><label for="vcvta_u64_f64"><div>uint64x1_t <b><b>vcvta_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtaq_u64_f64" type="checkbox"><label for="vcvtaq_u64_f64"><div>uint64x2_t <b><b>vcvtaq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_s64_f64" type="checkbox"><label for="vcvtd_s64_f64"><div>int64_t <b><b>vcvtd_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_u64_f64" type="checkbox"><label for="vcvtd_u64_f64"><div>uint64_t <b><b>vcvtd_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnd_s64_f64" type="checkbox"><label for="vcvtnd_s64_f64"><div>int64_t <b><b>vcvtnd_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnd_u64_f64" type="checkbox"><label for="vcvtnd_u64_f64"><div>uint64_t <b><b>vcvtnd_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmd_s64_f64" type="checkbox"><label for="vcvtmd_s64_f64"><div>int64_t <b><b>vcvtmd_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmd_u64_f64" type="checkbox"><label for="vcvtmd_u64_f64"><div>uint64_t <b><b>vcvtmd_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpd_s64_f64" type="checkbox"><label for="vcvtpd_s64_f64"><div>int64_t <b><b>vcvtpd_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpd_u64_f64" type="checkbox"><label for="vcvtpd_u64_f64"><div>uint64_t <b><b>vcvtpd_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtad_s64_f64" type="checkbox"><label for="vcvtad_s64_f64"><div>int64_t <b><b>vcvtad_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtad_u64_f64" type="checkbox"><label for="vcvtad_u64_f64"><div>uint64_t <b><b>vcvtad_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_s32_f32" type="checkbox"><label for="vcvt_n_s32_f32"><div>int32x2_t <b><b>vcvt_n_s32_f32</b></b> (float32x2_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_s32_f32" type="checkbox"><label for="vcvtq_n_s32_f32"><div>int32x4_t <b><b>vcvtq_n_s32_f32</b></b> (float32x4_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_u32_f32" type="checkbox"><label for="vcvt_n_u32_f32"><div>uint32x2_t <b><b>vcvt_n_u32_f32</b></b> (float32x2_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_u32_f32" type="checkbox"><label for="vcvtq_n_u32_f32"><div>uint32x4_t <b><b>vcvtq_n_u32_f32</b></b> (float32x4_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_n_s32_f32" type="checkbox"><label for="vcvts_n_s32_f32"><div>int32_t <b><b>vcvts_n_s32_f32</b></b> (float32_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Sd,Sn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_n_u32_f32" type="checkbox"><label for="vcvts_n_u32_f32"><div>uint32_t <b><b>vcvts_n_u32_f32</b></b> (float32_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Sd,Sn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_s64_f64" type="checkbox"><label for="vcvt_n_s64_f64"><div>int64x1_t <b><b>vcvt_n_s64_f64</b></b> (float64x1_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_s64_f64" type="checkbox"><label for="vcvtq_n_s64_f64"><div>int64x2_t <b><b>vcvtq_n_s64_f64</b></b> (float64x2_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_u64_f64" type="checkbox"><label for="vcvt_n_u64_f64"><div>uint64x1_t <b><b>vcvt_n_u64_f64</b></b> (float64x1_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_u64_f64" type="checkbox"><label for="vcvtq_n_u64_f64"><div>uint64x2_t <b><b>vcvtq_n_u64_f64</b></b> (float64x2_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_n_s64_f64" type="checkbox"><label for="vcvtd_n_s64_f64"><div>int64_t <b><b>vcvtd_n_s64_f64</b></b> (float64_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_n_u64_f64" type="checkbox"><label for="vcvtd_n_u64_f64"><div>uint64_t <b><b>vcvtd_n_u64_f64</b></b> (float64_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f32_s32" type="checkbox"><label for="vcvt_f32_s32"><div>float32x2_t <b><b>vcvt_f32_s32</b></b> (int32x2_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_f32_s32" type="checkbox"><label for="vcvtq_f32_s32"><div>float32x4_t <b><b>vcvtq_f32_s32</b></b> (int32x4_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f32_u32" type="checkbox"><label for="vcvt_f32_u32"><div>float32x2_t <b><b>vcvt_f32_u32</b></b> (uint32x2_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_f32_u32" type="checkbox"><label for="vcvtq_f32_u32"><div>float32x4_t <b><b>vcvtq_f32_u32</b></b> (uint32x4_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_f32_s32" type="checkbox"><label for="vcvts_f32_s32"><div>float32_t <b><b>vcvts_f32_s32</b></b> (int32_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_f32_u32" type="checkbox"><label for="vcvts_f32_u32"><div>float32_t <b><b>vcvts_f32_u32</b></b> (uint32_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f64_s64" type="checkbox"><label for="vcvt_f64_s64"><div>float64x1_t <b><b>vcvt_f64_s64</b></b> (int64x1_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_f64_s64" type="checkbox"><label for="vcvtq_f64_s64"><div>float64x2_t <b><b>vcvtq_f64_s64</b></b> (int64x2_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f64_u64" type="checkbox"><label for="vcvt_f64_u64"><div>float64x1_t <b><b>vcvt_f64_u64</b></b> (uint64x1_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_f64_u64" type="checkbox"><label for="vcvtq_f64_u64"><div>float64x2_t <b><b>vcvtq_f64_u64</b></b> (uint64x2_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_f64_s64" type="checkbox"><label for="vcvtd_f64_s64"><div>float64_t <b><b>vcvtd_f64_s64</b></b> (int64_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_f64_u64" type="checkbox"><label for="vcvtd_f64_u64"><div>float64_t <b><b>vcvtd_f64_u64</b></b> (uint64_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_f32_s32" type="checkbox"><label for="vcvt_n_f32_s32"><div>float32x2_t <b><b>vcvt_n_f32_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_f32_s32" type="checkbox"><label for="vcvtq_n_f32_s32"><div>float32x4_t <b><b>vcvtq_n_f32_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_f32_u32" type="checkbox"><label for="vcvt_n_f32_u32"><div>float32x2_t <b><b>vcvt_n_f32_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.2S,Vn.2S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_f32_u32" type="checkbox"><label for="vcvtq_n_f32_u32"><div>float32x4_t <b><b>vcvtq_n_f32_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.4S,Vn.4S,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_n_f32_s32" type="checkbox"><label for="vcvts_n_f32_s32"><div>float32_t <b><b>vcvts_n_f32_s32</b></b> (int32_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Sd,Sn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_n_f32_u32" type="checkbox"><label for="vcvts_n_f32_u32"><div>float32_t <b><b>vcvts_n_f32_u32</b></b> (uint32_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Sd,Sn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_f64_s64" type="checkbox"><label for="vcvt_n_f64_s64"><div>float64x1_t <b><b>vcvt_n_f64_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_f64_s64" type="checkbox"><label for="vcvtq_n_f64_s64"><div>float64x2_t <b><b>vcvtq_n_f64_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_f64_u64" type="checkbox"><label for="vcvt_n_f64_u64"><div>float64x1_t <b><b>vcvt_n_f64_u64</b></b> (uint64x1_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_f64_u64" type="checkbox"><label for="vcvtq_n_f64_u64"><div>float64x2_t <b><b>vcvtq_n_f64_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.2D,Vn.2D,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_n_f64_s64" type="checkbox"><label for="vcvtd_n_f64_s64"><div>float64_t <b><b>vcvtd_n_f64_s64</b></b> (int64_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_n_f64_u64" type="checkbox"><label for="vcvtd_n_f64_u64"><div>float64_t <b><b>vcvtd_n_f64_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Dd,Dn,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
-bits(esize) element;
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f16_f32" type="checkbox"><label for="vcvt_f16_f32"><div>float16x4_t <b><b>vcvt_f16_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to lower precision narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&amp;FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtn-fcvtn2-floating-point-convert-to-lower-precision-narrow-vector">FCVTN</a> Vd.4H,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_high_f16_f32" type="checkbox"><label for="vcvt_high_f16_f32"><div>float16x8_t <b><b>vcvt_high_f16_f32</b></b> (float16x4_t r, float32x4_t a)<span class="right">Floating-point convert to lower precision narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&amp;FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtn-fcvtn2-floating-point-convert-to-lower-precision-narrow-vector">FCVTN2</a> Vd.8H,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f32_f64" type="checkbox"><label for="vcvt_f32_f64"><div>float32x2_t <b><b>vcvt_f32_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to lower precision narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&amp;FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtn-fcvtn2-floating-point-convert-to-lower-precision-narrow-vector">FCVTN</a> Vd.2S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_high_f32_f64" type="checkbox"><label for="vcvt_high_f32_f64"><div>float32x4_t <b><b>vcvt_high_f32_f64</b></b> (float32x2_t r, float64x2_t a)<span class="right">Floating-point convert to lower precision narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&amp;FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtn-fcvtn2-floating-point-convert-to-lower-precision-narrow-vector">FCVTN2</a> Vd.4S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f32_f16" type="checkbox"><label for="vcvt_f32_f16"><div>float32x4_t <b><b>vcvt_f32_f16</b></b> (float16x4_t a)<span class="right">Floating-point convert to higher precision long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&amp;FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes each result to the equivalent element of the vector in the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtl-fcvtl2-floating-point-convert-to-higher-precision-long-vector">FCVTL</a> Vd.4S,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_high_f32_f16" type="checkbox"><label for="vcvt_high_f32_f16"><div>float32x4_t <b><b>vcvt_high_f32_f16</b></b> (float16x8_t a)<span class="right">Floating-point convert to higher precision long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&amp;FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes each result to the equivalent element of the vector in the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtl-fcvtl2-floating-point-convert-to-higher-precision-long-vector">FCVTL2</a> Vd.4S,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f64_f32" type="checkbox"><label for="vcvt_f64_f32"><div>float64x2_t <b><b>vcvt_f64_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to higher precision long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&amp;FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes each result to the equivalent element of the vector in the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtl-fcvtl2-floating-point-convert-to-higher-precision-long-vector">FCVTL</a> Vd.2D,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_high_f64_f32" type="checkbox"><label for="vcvt_high_f64_f32"><div>float64x2_t <b><b>vcvt_high_f64_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to higher precision long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&amp;FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes each result to the equivalent element of the vector in the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtl-fcvtl2-floating-point-convert-to-higher-precision-long-vector">FCVTL2</a> Vd.2D,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(2*datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtx_f32_f64" type="checkbox"><label for="vcvtx_f32_f64"><div>float32x2_t <b><b>vcvtx_f32_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to lower precision narrow, rounding to odd</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&amp;FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtxn-fcvtxn2-floating-point-convert-to-lower-precision-narrow-rounding-to-odd-vector">FCVTXN</a> Vd.2S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.3" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr, FPRounding rounding)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding_ODD" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding_ODD</a>);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtxd_f32_f64" type="checkbox"><label for="vcvtxd_f32_f64"><div>float32_t <b><b>vcvtxd_f32_f64</b></b> (float64_t a)<span class="right">Floating-point convert to lower precision narrow, rounding to odd</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&amp;FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtxn-fcvtxn2-floating-point-convert-to-lower-precision-narrow-rounding-to-odd-vector">FCVTXN</a> Sd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.3" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr, FPRounding rounding)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding_ODD" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding_ODD</a>);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtx_high_f32_f64" type="checkbox"><label for="vcvtx_high_f32_f64"><div>float32x4_t <b><b>vcvtx_high_f32_f64</b></b> (float32x2_t r, float64x2_t a)<span class="right">Floating-point convert to lower precision narrow, rounding to odd</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&amp;FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtxn-fcvtxn2-floating-point-convert-to-lower-precision-narrow-rounding-to-odd-vector">FCVTXN2</a> Vd.4S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.3" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr, FPRounding rounding)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding_ODD" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
- FPRounding_NEGINF,  FPRounding_ZERO,
- FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding_ODD</a>);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrnd_f32" type="checkbox"><label for="vrnd_f32"><div>float32x2_t <b><b>vrnd_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintz-vector-floating-point-round-to-integral-toward-zero-vector">FRINTZ</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndq_f32" type="checkbox"><label for="vrndq_f32"><div>float32x4_t <b><b>vrndq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintz-vector-floating-point-round-to-integral-toward-zero-vector">FRINTZ</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrnd_f64" type="checkbox"><label for="vrnd_f64"><div>float64x1_t <b><b>vrnd_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintz-vector-floating-point-round-to-integral-toward-zero-vector">FRINTZ</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndq_f64" type="checkbox"><label for="vrndq_f64"><div>float64x2_t <b><b>vrndq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintz-vector-floating-point-round-to-integral-toward-zero-vector">FRINTZ</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndn_f32" type="checkbox"><label for="vrndn_f32"><div>float32x2_t <b><b>vrndn_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndnq_f32" type="checkbox"><label for="vrndnq_f32"><div>float32x4_t <b><b>vrndnq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndn_f64" type="checkbox"><label for="vrndn_f64"><div>float64x1_t <b><b>vrndn_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndnq_f64" type="checkbox"><label for="vrndnq_f64"><div>float64x2_t <b><b>vrndnq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndns_f32" type="checkbox"><label for="vrndns_f32"><div>float32_t <b><b>vrndns_f32</b></b> (float32_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndm_f32" type="checkbox"><label for="vrndm_f32"><div>float32x2_t <b><b>vrndm_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintm-vector-floating-point-round-to-integral-toward-minus-infinity-vector">FRINTM</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndmq_f32" type="checkbox"><label for="vrndmq_f32"><div>float32x4_t <b><b>vrndmq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintm-vector-floating-point-round-to-integral-toward-minus-infinity-vector">FRINTM</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndm_f64" type="checkbox"><label for="vrndm_f64"><div>float64x1_t <b><b>vrndm_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintm-vector-floating-point-round-to-integral-toward-minus-infinity-vector">FRINTM</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndmq_f64" type="checkbox"><label for="vrndmq_f64"><div>float64x2_t <b><b>vrndmq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintm-vector-floating-point-round-to-integral-toward-minus-infinity-vector">FRINTM</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndp_f32" type="checkbox"><label for="vrndp_f32"><div>float32x2_t <b><b>vrndp_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintp-vector-floating-point-round-to-integral-toward-plus-infinity-vector">FRINTP</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndpq_f32" type="checkbox"><label for="vrndpq_f32"><div>float32x4_t <b><b>vrndpq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintp-vector-floating-point-round-to-integral-toward-plus-infinity-vector">FRINTP</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndp_f64" type="checkbox"><label for="vrndp_f64"><div>float64x1_t <b><b>vrndp_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintp-vector-floating-point-round-to-integral-toward-plus-infinity-vector">FRINTP</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndpq_f64" type="checkbox"><label for="vrndpq_f64"><div>float64x2_t <b><b>vrndpq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintp-vector-floating-point-round-to-integral-toward-plus-infinity-vector">FRINTP</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrnda_f32" type="checkbox"><label for="vrnda_f32"><div>float32x2_t <b><b>vrnda_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinta-vector-floating-point-round-to-integral-to-nearest-with-ties-to-away-vector">FRINTA</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndaq_f32" type="checkbox"><label for="vrndaq_f32"><div>float32x4_t <b><b>vrndaq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinta-vector-floating-point-round-to-integral-to-nearest-with-ties-to-away-vector">FRINTA</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrnda_f64" type="checkbox"><label for="vrnda_f64"><div>float64x1_t <b><b>vrnda_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinta-vector-floating-point-round-to-integral-to-nearest-with-ties-to-away-vector">FRINTA</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndaq_f64" type="checkbox"><label for="vrndaq_f64"><div>float64x2_t <b><b>vrndaq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinta-vector-floating-point-round-to-integral-to-nearest-with-ties-to-away-vector">FRINTA</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndi_f32" type="checkbox"><label for="vrndi_f32"><div>float32x2_t <b><b>vrndi_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinti-vector-floating-point-round-to-integral-using-current-rounding-mode-vector">FRINTI</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndiq_f32" type="checkbox"><label for="vrndiq_f32"><div>float32x4_t <b><b>vrndiq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinti-vector-floating-point-round-to-integral-using-current-rounding-mode-vector">FRINTI</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndi_f64" type="checkbox"><label for="vrndi_f64"><div>float64x1_t <b><b>vrndi_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinti-vector-floating-point-round-to-integral-using-current-rounding-mode-vector">FRINTI</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndiq_f64" type="checkbox"><label for="vrndiq_f64"><div>float64x2_t <b><b>vrndiq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinti-vector-floating-point-round-to-integral-using-current-rounding-mode-vector">FRINTI</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndx_f32" type="checkbox"><label for="vrndx_f32"><div>float32x2_t <b><b>vrndx_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral exact, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintx-vector-floating-point-round-to-integral-exact-using-current-rounding-mode-vector">FRINTX</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndxq_f32" type="checkbox"><label for="vrndxq_f32"><div>float32x4_t <b><b>vrndxq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral exact, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintx-vector-floating-point-round-to-integral-exact-using-current-rounding-mode-vector">FRINTX</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndx_f64" type="checkbox"><label for="vrndx_f64"><div>float64x1_t <b><b>vrndx_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral exact, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintx-vector-floating-point-round-to-integral-exact-using-current-rounding-mode-vector">FRINTX</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndxq_f64" type="checkbox"><label for="vrndxq_f64"><div>float64x2_t <b><b>vrndxq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral exact, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintx-vector-floating-point-round-to-integral-exact-using-current-rounding-mode-vector">FRINTX</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_s16" type="checkbox"><label for="vmovn_s16"><div>int8x8_t <b><b>vmovn_s16</b></b> (int16x8_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.8B,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_s32" type="checkbox"><label for="vmovn_s32"><div>int16x4_t <b><b>vmovn_s32</b></b> (int32x4_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.4H,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_s64" type="checkbox"><label for="vmovn_s64"><div>int32x2_t <b><b>vmovn_s64</b></b> (int64x2_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.2S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_u16" type="checkbox"><label for="vmovn_u16"><div>uint8x8_t <b><b>vmovn_u16</b></b> (uint16x8_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.8B,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_u32" type="checkbox"><label for="vmovn_u32"><div>uint16x4_t <b><b>vmovn_u32</b></b> (uint32x4_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.4H,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_u64" type="checkbox"><label for="vmovn_u64"><div>uint32x2_t <b><b>vmovn_u64</b></b> (uint64x2_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.2S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_s16" type="checkbox"><label for="vmovn_high_s16"><div>int8x16_t <b><b>vmovn_high_s16</b></b> (int8x8_t r, int16x8_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.16B,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_s32" type="checkbox"><label for="vmovn_high_s32"><div>int16x8_t <b><b>vmovn_high_s32</b></b> (int16x4_t r, int32x4_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.8H,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_s64" type="checkbox"><label for="vmovn_high_s64"><div>int32x4_t <b><b>vmovn_high_s64</b></b> (int32x2_t r, int64x2_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.4S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_u16" type="checkbox"><label for="vmovn_high_u16"><div>uint8x16_t <b><b>vmovn_high_u16</b></b> (uint8x8_t r, uint16x8_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.16B,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_u32" type="checkbox"><label for="vmovn_high_u32"><div>uint16x8_t <b><b>vmovn_high_u32</b></b> (uint16x4_t r, uint32x4_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.8H,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_u64" type="checkbox"><label for="vmovn_high_u64"><div>uint32x4_t <b><b>vmovn_high_u64</b></b> (uint32x2_t r, uint64x2_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.4S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_s8" type="checkbox"><label for="vmovl_s8"><div>int16x8_t <b><b>vmovl_s8</b></b> (int8x8_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.8H,Vn.8B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_s16" type="checkbox"><label for="vmovl_s16"><div>int32x4_t <b><b>vmovl_s16</b></b> (int16x4_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.4S,Vn.4H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_s32" type="checkbox"><label for="vmovl_s32"><div>int64x2_t <b><b>vmovl_s32</b></b> (int32x2_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.2D,Vn.2S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_u8" type="checkbox"><label for="vmovl_u8"><div>uint16x8_t <b><b>vmovl_u8</b></b> (uint8x8_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.8H,Vn.8B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_u16" type="checkbox"><label for="vmovl_u16"><div>uint32x4_t <b><b>vmovl_u16</b></b> (uint16x4_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.4S,Vn.4H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_u32" type="checkbox"><label for="vmovl_u32"><div>uint64x2_t <b><b>vmovl_u32</b></b> (uint32x2_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.2D,Vn.2S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_s8" type="checkbox"><label for="vmovl_high_s8"><div>int16x8_t <b><b>vmovl_high_s8</b></b> (int8x16_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.8H,Vn.16B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_s16" type="checkbox"><label for="vmovl_high_s16"><div>int32x4_t <b><b>vmovl_high_s16</b></b> (int16x8_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.4S,Vn.8H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_s32" type="checkbox"><label for="vmovl_high_s32"><div>int64x2_t <b><b>vmovl_high_s32</b></b> (int32x4_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.2D,Vn.4S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_u8" type="checkbox"><label for="vmovl_high_u8"><div>uint16x8_t <b><b>vmovl_high_u8</b></b> (uint8x16_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.8H,Vn.16B,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_u16" type="checkbox"><label for="vmovl_high_u16"><div>uint32x4_t <b><b>vmovl_high_u16</b></b> (uint16x8_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.4S,Vn.8H,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_u32" type="checkbox"><label for="vmovl_high_u32"><div>uint64x2_t <b><b>vmovl_high_u32</b></b> (uint32x4_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.2D,Vn.4S,#0
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize*2) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_s16" type="checkbox"><label for="vqmovn_s16"><div>int8x8_t <b><b>vqmovn_s16</b></b> (int16x8_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Vd.8B,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_s32" type="checkbox"><label for="vqmovn_s32"><div>int16x4_t <b><b>vqmovn_s32</b></b> (int32x4_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Vd.4H,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_s64" type="checkbox"><label for="vqmovn_s64"><div>int32x2_t <b><b>vqmovn_s64</b></b> (int64x2_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Vd.2S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_u16" type="checkbox"><label for="vqmovn_u16"><div>uint8x8_t <b><b>vqmovn_u16</b></b> (uint16x8_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Vd.8B,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_u32" type="checkbox"><label for="vqmovn_u32"><div>uint16x4_t <b><b>vqmovn_u32</b></b> (uint32x4_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Vd.4H,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_u64" type="checkbox"><label for="vqmovn_u64"><div>uint32x2_t <b><b>vqmovn_u64</b></b> (uint64x2_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Vd.2S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovnh_s16" type="checkbox"><label for="vqmovnh_s16"><div>int8_t <b><b>vqmovnh_s16</b></b> (int16_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Bd,Hn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovns_s32" type="checkbox"><label for="vqmovns_s32"><div>int16_t <b><b>vqmovns_s32</b></b> (int32_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Hd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovnd_s64" type="checkbox"><label for="vqmovnd_s64"><div>int32_t <b><b>vqmovnd_s64</b></b> (int64_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Sd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovnh_u16" type="checkbox"><label for="vqmovnh_u16"><div>uint8_t <b><b>vqmovnh_u16</b></b> (uint16_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Bd,Hn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovns_u32" type="checkbox"><label for="vqmovns_u32"><div>uint16_t <b><b>vqmovns_u32</b></b> (uint32_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Hd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovnd_u64" type="checkbox"><label for="vqmovnd_u64"><div>uint32_t <b><b>vqmovnd_u64</b></b> (uint64_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Sd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_s16" type="checkbox"><label for="vqmovn_high_s16"><div>int8x16_t <b><b>vqmovn_high_s16</b></b> (int8x8_t r, int16x8_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN2</a> Vd.16B,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_s32" type="checkbox"><label for="vqmovn_high_s32"><div>int16x8_t <b><b>vqmovn_high_s32</b></b> (int16x4_t r, int32x4_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN2</a> Vd.8H,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_s64" type="checkbox"><label for="vqmovn_high_s64"><div>int32x4_t <b><b>vqmovn_high_s64</b></b> (int32x2_t r, int64x2_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN2</a> Vd.4S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_u16" type="checkbox"><label for="vqmovn_high_u16"><div>uint8x16_t <b><b>vqmovn_high_u16</b></b> (uint8x8_t r, uint16x8_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN2</a> Vd.16B,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_u32" type="checkbox"><label for="vqmovn_high_u32"><div>uint16x8_t <b><b>vqmovn_high_u32</b></b> (uint16x4_t r, uint32x4_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN2</a> Vd.8H,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_u64" type="checkbox"><label for="vqmovn_high_u64"><div>uint32x4_t <b><b>vqmovn_high_u64</b></b> (uint32x2_t r, uint64x2_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN2</a> Vd.4S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_s16" type="checkbox"><label for="vqmovun_s16"><div>uint8x8_t <b><b>vqmovun_s16</b></b> (int16x8_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Vd.8B,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_s32" type="checkbox"><label for="vqmovun_s32"><div>uint16x4_t <b><b>vqmovun_s32</b></b> (int32x4_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Vd.4H,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_s64" type="checkbox"><label for="vqmovun_s64"><div>uint32x2_t <b><b>vqmovun_s64</b></b> (int64x2_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Vd.2S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovunh_s16" type="checkbox"><label for="vqmovunh_s16"><div>uint8_t <b><b>vqmovunh_s16</b></b> (int16_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Bd,Hn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovuns_s32" type="checkbox"><label for="vqmovuns_s32"><div>uint16_t <b><b>vqmovuns_s32</b></b> (int32_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Hd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovund_s64" type="checkbox"><label for="vqmovund_s64"><div>uint32_t <b><b>vqmovund_s64</b></b> (int64_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Sd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_high_s16" type="checkbox"><label for="vqmovun_high_s16"><div>uint8x16_t <b><b>vqmovun_high_s16</b></b> (uint8x8_t r, int16x8_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN2</a> Vd.16B,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
-a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_high_s32" type="checkbox"><label for="vqmovun_high_s32"><div>uint16x8_t <b><b>vqmovun_high_s32</b></b> (uint16x4_t r, int32x4_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN2</a> Vd.8H,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
-a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_high_s64" type="checkbox"><label for="vqmovun_high_s64"><div>uint32x4_t <b><b>vqmovun_high_s64</b></b> (uint32x2_t r, int64x2_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN2</a> Vd.4S,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
-a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(2*esize) element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_s16" type="checkbox"><label for="vmla_lane_s16"><div>int16x4_t <b><b>vmla_lane_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_s16" type="checkbox"><label for="vmlaq_lane_s16"><div>int16x8_t <b><b>vmlaq_lane_s16</b></b> (int16x8_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_s32" type="checkbox"><label for="vmla_lane_s32"><div>int32x2_t <b><b>vmla_lane_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_s32" type="checkbox"><label for="vmlaq_lane_s32"><div>int32x4_t <b><b>vmlaq_lane_s32</b></b> (int32x4_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_u16" type="checkbox"><label for="vmla_lane_u16"><div>uint16x4_t <b><b>vmla_lane_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_u16" type="checkbox"><label for="vmlaq_lane_u16"><div>uint16x8_t <b><b>vmlaq_lane_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_u32" type="checkbox"><label for="vmla_lane_u32"><div>uint32x2_t <b><b>vmla_lane_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_u32" type="checkbox"><label for="vmlaq_lane_u32"><div>uint32x4_t <b><b>vmlaq_lane_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x2_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_f32" type="checkbox"><label for="vmla_lane_f32"><div>float32x2_t <b><b>vmla_lane_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t v, const int lane)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 1
-</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_f32" type="checkbox"><label for="vmlaq_lane_f32"><div>float32x4_t <b><b>vmlaq_lane_f32</b></b> (float32x4_t a, float32x4_t b, float32x2_t v, const int lane)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 3
-</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_s16" type="checkbox"><label for="vmla_laneq_s16"><div>int16x4_t <b><b>vmla_laneq_s16</b></b> (int16x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_s16" type="checkbox"><label for="vmlaq_laneq_s16"><div>int16x8_t <b><b>vmlaq_laneq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_s32" type="checkbox"><label for="vmla_laneq_s32"><div>int32x2_t <b><b>vmla_laneq_s32</b></b> (int32x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_s32" type="checkbox"><label for="vmlaq_laneq_s32"><div>int32x4_t <b><b>vmlaq_laneq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_u16" type="checkbox"><label for="vmla_laneq_u16"><div>uint16x4_t <b><b>vmla_laneq_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x8_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_u16" type="checkbox"><label for="vmlaq_laneq_u16"><div>uint16x8_t <b><b>vmlaq_laneq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_u32" type="checkbox"><label for="vmla_laneq_u32"><div>uint32x2_t <b><b>vmla_laneq_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_u32" type="checkbox"><label for="vmlaq_laneq_u32"><div>uint32x4_t <b><b>vmlaq_laneq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_f32" type="checkbox"><label for="vmla_laneq_f32"><div>float32x2_t <b><b>vmla_laneq_f32</b></b> (float32x2_t a, float32x2_t b, float32x4_t v, const int lane)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 1
-</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_f32" type="checkbox"><label for="vmlaq_laneq_f32"><div>float32x4_t <b><b>vmlaq_laneq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t v, const int lane)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 3
-</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_lane_s16" type="checkbox"><label for="vmlal_lane_s16"><div>int32x4_t <b><b>vmlal_lane_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_lane_s32" type="checkbox"><label for="vmlal_lane_s32"><div>int64x2_t <b><b>vmlal_lane_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_lane_u16" type="checkbox"><label for="vmlal_lane_u16"><div>uint32x4_t <b><b>vmlal_lane_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_lane_u32" type="checkbox"><label for="vmlal_lane_u32"><div>uint64x2_t <b><b>vmlal_lane_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_lane_s16" type="checkbox"><label for="vmlal_high_lane_s16"><div>int32x4_t <b><b>vmlal_high_lane_s16</b></b> (int32x4_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_lane_s32" type="checkbox"><label for="vmlal_high_lane_s32"><div>int64x2_t <b><b>vmlal_high_lane_s32</b></b> (int64x2_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_lane_u16" type="checkbox"><label for="vmlal_high_lane_u16"><div>uint32x4_t <b><b>vmlal_high_lane_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x4_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_lane_u32" type="checkbox"><label for="vmlal_high_lane_u32"><div>uint64x2_t <b><b>vmlal_high_lane_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x2_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_laneq_s16" type="checkbox"><label for="vmlal_laneq_s16"><div>int32x4_t <b><b>vmlal_laneq_s16</b></b> (int32x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_laneq_s32" type="checkbox"><label for="vmlal_laneq_s32"><div>int64x2_t <b><b>vmlal_laneq_s32</b></b> (int64x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_laneq_u16" type="checkbox"><label for="vmlal_laneq_u16"><div>uint32x4_t <b><b>vmlal_laneq_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x8_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_laneq_u32" type="checkbox"><label for="vmlal_laneq_u32"><div>uint64x2_t <b><b>vmlal_laneq_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x4_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_laneq_s16" type="checkbox"><label for="vmlal_high_laneq_s16"><div>int32x4_t <b><b>vmlal_high_laneq_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_laneq_s32" type="checkbox"><label for="vmlal_high_laneq_s32"><div>int64x2_t <b><b>vmlal_high_laneq_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_laneq_u16" type="checkbox"><label for="vmlal_high_laneq_u16"><div>uint32x4_t <b><b>vmlal_high_laneq_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_laneq_u32" type="checkbox"><label for="vmlal_high_laneq_u32"><div>uint64x2_t <b><b>vmlal_high_laneq_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_lane_s16" type="checkbox"><label for="vqdmlal_lane_s16"><div>int32x4_t <b><b>vqdmlal_lane_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_lane_s32" type="checkbox"><label for="vqdmlal_lane_s32"><div>int64x2_t <b><b>vqdmlal_lane_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlalh_lane_s16" type="checkbox"><label for="vqdmlalh_lane_s16"><div>int32_t <b><b>vqdmlalh_lane_s16</b></b> (int32_t a, int16_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Sd,Hn,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
-b &rarr; Hn <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlals_lane_s32" type="checkbox"><label for="vqdmlals_lane_s32"><div>int64_t <b><b>vqdmlals_lane_s32</b></b> (int64_t a, int32_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Dd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Sn <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_lane_s16" type="checkbox"><label for="vqdmlal_high_lane_s16"><div>int32x4_t <b><b>vqdmlal_high_lane_s16</b></b> (int32x4_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_lane_s32" type="checkbox"><label for="vqdmlal_high_lane_s32"><div>int64x2_t <b><b>vqdmlal_high_lane_s32</b></b> (int64x2_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_laneq_s16" type="checkbox"><label for="vqdmlal_laneq_s16"><div>int32x4_t <b><b>vqdmlal_laneq_s16</b></b> (int32x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_laneq_s32" type="checkbox"><label for="vqdmlal_laneq_s32"><div>int64x2_t <b><b>vqdmlal_laneq_s32</b></b> (int64x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlalh_laneq_s16" type="checkbox"><label for="vqdmlalh_laneq_s16"><div>int32_t <b><b>vqdmlalh_laneq_s16</b></b> (int32_t a, int16_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Sd,Hn,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
-b &rarr; Hn <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlals_laneq_s32" type="checkbox"><label for="vqdmlals_laneq_s32"><div>int64_t <b><b>vqdmlals_laneq_s32</b></b> (int64_t a, int32_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Dd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Sn <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_laneq_s16" type="checkbox"><label for="vqdmlal_high_laneq_s16"><div>int32x4_t <b><b>vqdmlal_high_laneq_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_laneq_s32" type="checkbox"><label for="vqdmlal_high_laneq_s32"><div>int64x2_t <b><b>vqdmlal_high_laneq_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_s16" type="checkbox"><label for="vmls_lane_s16"><div>int16x4_t <b><b>vmls_lane_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_s16" type="checkbox"><label for="vmlsq_lane_s16"><div>int16x8_t <b><b>vmlsq_lane_s16</b></b> (int16x8_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_s32" type="checkbox"><label for="vmls_lane_s32"><div>int32x2_t <b><b>vmls_lane_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_s32" type="checkbox"><label for="vmlsq_lane_s32"><div>int32x4_t <b><b>vmlsq_lane_s32</b></b> (int32x4_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_u16" type="checkbox"><label for="vmls_lane_u16"><div>uint16x4_t <b><b>vmls_lane_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_u16" type="checkbox"><label for="vmlsq_lane_u16"><div>uint16x8_t <b><b>vmlsq_lane_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_u32" type="checkbox"><label for="vmls_lane_u32"><div>uint32x2_t <b><b>vmls_lane_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_u32" type="checkbox"><label for="vmlsq_lane_u32"><div>uint32x4_t <b><b>vmlsq_lane_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_f32" type="checkbox"><label for="vmls_lane_f32"><div>float32x2_t <b><b>vmls_lane_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 1
-</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_f32" type="checkbox"><label for="vmlsq_lane_f32"><div>float32x4_t <b><b>vmlsq_lane_f32</b></b> (float32x4_t a, float32x4_t b, float32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 3
-</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_s16" type="checkbox"><label for="vmls_laneq_s16"><div>int16x4_t <b><b>vmls_laneq_s16</b></b> (int16x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_s16" type="checkbox"><label for="vmlsq_laneq_s16"><div>int16x8_t <b><b>vmlsq_laneq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_s32" type="checkbox"><label for="vmls_laneq_s32"><div>int32x2_t <b><b>vmls_laneq_s32</b></b> (int32x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_s32" type="checkbox"><label for="vmlsq_laneq_s32"><div>int32x4_t <b><b>vmlsq_laneq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_u16" type="checkbox"><label for="vmls_laneq_u16"><div>uint16x4_t <b><b>vmls_laneq_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x8_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_u16" type="checkbox"><label for="vmlsq_laneq_u16"><div>uint16x8_t <b><b>vmlsq_laneq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_u32" type="checkbox"><label for="vmls_laneq_u32"><div>uint32x2_t <b><b>vmls_laneq_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_u32" type="checkbox"><label for="vmlsq_laneq_u32"><div>uint32x4_t <b><b>vmlsq_laneq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_f32" type="checkbox"><label for="vmls_laneq_f32"><div>float32x2_t <b><b>vmls_laneq_f32</b></b> (float32x2_t a, float32x2_t b, float32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 1
-</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_f32" type="checkbox"><label for="vmlsq_laneq_f32"><div>float32x4_t <b><b>vmlsq_laneq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 3
-</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_lane_s16" type="checkbox"><label for="vmlsl_lane_s16"><div>int32x4_t <b><b>vmlsl_lane_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_lane_s32" type="checkbox"><label for="vmlsl_lane_s32"><div>int64x2_t <b><b>vmlsl_lane_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_lane_u16" type="checkbox"><label for="vmlsl_lane_u16"><div>uint32x4_t <b><b>vmlsl_lane_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_lane_u32" type="checkbox"><label for="vmlsl_lane_u32"><div>uint64x2_t <b><b>vmlsl_lane_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_lane_s16" type="checkbox"><label for="vmlsl_high_lane_s16"><div>int32x4_t <b><b>vmlsl_high_lane_s16</b></b> (int32x4_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_lane_s32" type="checkbox"><label for="vmlsl_high_lane_s32"><div>int64x2_t <b><b>vmlsl_high_lane_s32</b></b> (int64x2_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_lane_u16" type="checkbox"><label for="vmlsl_high_lane_u16"><div>uint32x4_t <b><b>vmlsl_high_lane_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x4_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_lane_u32" type="checkbox"><label for="vmlsl_high_lane_u32"><div>uint64x2_t <b><b>vmlsl_high_lane_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x2_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_laneq_s16" type="checkbox"><label for="vmlsl_laneq_s16"><div>int32x4_t <b><b>vmlsl_laneq_s16</b></b> (int32x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_laneq_s32" type="checkbox"><label for="vmlsl_laneq_s32"><div>int64x2_t <b><b>vmlsl_laneq_s32</b></b> (int64x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_laneq_u16" type="checkbox"><label for="vmlsl_laneq_u16"><div>uint32x4_t <b><b>vmlsl_laneq_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x8_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_laneq_u32" type="checkbox"><label for="vmlsl_laneq_u32"><div>uint64x2_t <b><b>vmlsl_laneq_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x4_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_laneq_s16" type="checkbox"><label for="vmlsl_high_laneq_s16"><div>int32x4_t <b><b>vmlsl_high_laneq_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_laneq_s32" type="checkbox"><label for="vmlsl_high_laneq_s32"><div>int64x2_t <b><b>vmlsl_high_laneq_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_laneq_u16" type="checkbox"><label for="vmlsl_high_laneq_u16"><div>uint32x4_t <b><b>vmlsl_high_laneq_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_laneq_u32" type="checkbox"><label for="vmlsl_high_laneq_u32"><div>uint64x2_t <b><b>vmlsl_high_laneq_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_lane_s16" type="checkbox"><label for="vqdmlsl_lane_s16"><div>int32x4_t <b><b>vqdmlsl_lane_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_lane_s32" type="checkbox"><label for="vqdmlsl_lane_s32"><div>int64x2_t <b><b>vqdmlsl_lane_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlslh_lane_s16" type="checkbox"><label for="vqdmlslh_lane_s16"><div>int32_t <b><b>vqdmlslh_lane_s16</b></b> (int32_t a, int16_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Sd,Hn,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
-b &rarr; Hn <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsls_lane_s32" type="checkbox"><label for="vqdmlsls_lane_s32"><div>int64_t <b><b>vqdmlsls_lane_s32</b></b> (int64_t a, int32_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Dd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Sn <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_lane_s16" type="checkbox"><label for="vqdmlsl_high_lane_s16"><div>int32x4_t <b><b>vqdmlsl_high_lane_s16</b></b> (int32x4_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_lane_s32" type="checkbox"><label for="vqdmlsl_high_lane_s32"><div>int64x2_t <b><b>vqdmlsl_high_lane_s32</b></b> (int64x2_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_laneq_s16" type="checkbox"><label for="vqdmlsl_laneq_s16"><div>int32x4_t <b><b>vqdmlsl_laneq_s16</b></b> (int32x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_laneq_s32" type="checkbox"><label for="vqdmlsl_laneq_s32"><div>int64x2_t <b><b>vqdmlsl_laneq_s32</b></b> (int64x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlslh_laneq_s16" type="checkbox"><label for="vqdmlslh_laneq_s16"><div>int32_t <b><b>vqdmlslh_laneq_s16</b></b> (int32_t a, int16_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Sd,Hn,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
-b &rarr; Hn <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsls_laneq_s32" type="checkbox"><label for="vqdmlsls_laneq_s32"><div>int64_t <b><b>vqdmlsls_laneq_s32</b></b> (int64_t a, int32_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Dd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
-b &rarr; Sn <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_laneq_s16" type="checkbox"><label for="vqdmlsl_high_laneq_s16"><div>int32x4_t <b><b>vqdmlsl_high_laneq_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_laneq_s32" type="checkbox"><label for="vqdmlsl_high_laneq_s32"><div>int64x2_t <b><b>vqdmlsl_high_laneq_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_s16" type="checkbox"><label for="vmul_n_s16"><div>int16x4_t <b><b>vmul_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_s16" type="checkbox"><label for="vmulq_n_s16"><div>int16x8_t <b><b>vmulq_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_s32" type="checkbox"><label for="vmul_n_s32"><div>int32x2_t <b><b>vmul_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_s32" type="checkbox"><label for="vmulq_n_s32"><div>int32x4_t <b><b>vmulq_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_u16" type="checkbox"><label for="vmul_n_u16"><div>uint16x4_t <b><b>vmul_n_u16</b></b> (uint16x4_t a, uint16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_u16" type="checkbox"><label for="vmulq_n_u16"><div>uint16x8_t <b><b>vmulq_n_u16</b></b> (uint16x8_t a, uint16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_u32" type="checkbox"><label for="vmul_n_u32"><div>uint32x2_t <b><b>vmul_n_u32</b></b> (uint32x2_t a, uint32_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_u32" type="checkbox"><label for="vmulq_n_u32"><div>uint32x4_t <b><b>vmulq_n_u32</b></b> (uint32x4_t a, uint32_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_f32" type="checkbox"><label for="vmul_n_f32"><div>float32x2_t <b><b>vmul_n_f32</b></b> (float32x2_t a, float32_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2S,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_f32" type="checkbox"><label for="vmulq_n_f32"><div>float32x4_t <b><b>vmulq_n_f32</b></b> (float32x4_t a, float32_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.4S,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_f64" type="checkbox"><label for="vmul_n_f64"><div>float64x1_t <b><b>vmul_n_f64</b></b> (float64x1_t a, float64_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Vm.D[0] </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_f64" type="checkbox"><label for="vmulq_n_f64"><div>float64x2_t <b><b>vmulq_n_f64</b></b> (float64x2_t a, float64_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2D,Vn.2D,Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.D[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_s16" type="checkbox"><label for="vmul_lane_s16"><div>int16x4_t <b><b>vmul_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_s16" type="checkbox"><label for="vmulq_lane_s16"><div>int16x8_t <b><b>vmulq_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_s32" type="checkbox"><label for="vmul_lane_s32"><div>int32x2_t <b><b>vmul_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_s32" type="checkbox"><label for="vmulq_lane_s32"><div>int32x4_t <b><b>vmulq_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_u16" type="checkbox"><label for="vmul_lane_u16"><div>uint16x4_t <b><b>vmul_lane_u16</b></b> (uint16x4_t a, uint16x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_u16" type="checkbox"><label for="vmulq_lane_u16"><div>uint16x8_t <b><b>vmulq_lane_u16</b></b> (uint16x8_t a, uint16x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_u32" type="checkbox"><label for="vmul_lane_u32"><div>uint32x2_t <b><b>vmul_lane_u32</b></b> (uint32x2_t a, uint32x2_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_u32" type="checkbox"><label for="vmulq_lane_u32"><div>uint32x4_t <b><b>vmulq_lane_u32</b></b> (uint32x4_t a, uint32x2_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_f32" type="checkbox"><label for="vmul_lane_f32"><div>float32x2_t <b><b>vmul_lane_f32</b></b> (float32x2_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_f32" type="checkbox"><label for="vmulq_lane_f32"><div>float32x4_t <b><b>vmulq_lane_f32</b></b> (float32x4_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_f64" type="checkbox"><label for="vmul_lane_f64"><div>float64x1_t <b><b>vmul_lane_f64</b></b> (float64x1_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-v &rarr; Vm.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_f64" type="checkbox"><label for="vmulq_lane_f64"><div>float64x2_t <b><b>vmulq_lane_f64</b></b> (float64x2_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2D,Vn.2D,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-v &rarr; Vm.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmuls_lane_f32" type="checkbox"><label for="vmuls_lane_f32"><div>float32_t <b><b>vmuls_lane_f32</b></b> (float32_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Sd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmuld_lane_f64" type="checkbox"><label for="vmuld_lane_f64"><div>float64_t <b><b>vmuld_lane_f64</b></b> (float64_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-v &rarr; Vm.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_s16" type="checkbox"><label for="vmul_laneq_s16"><div>int16x4_t <b><b>vmul_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_s16" type="checkbox"><label for="vmulq_laneq_s16"><div>int16x8_t <b><b>vmulq_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_s32" type="checkbox"><label for="vmul_laneq_s32"><div>int32x2_t <b><b>vmul_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_s32" type="checkbox"><label for="vmulq_laneq_s32"><div>int32x4_t <b><b>vmulq_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_u16" type="checkbox"><label for="vmul_laneq_u16"><div>uint16x4_t <b><b>vmul_laneq_u16</b></b> (uint16x4_t a, uint16x8_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_u16" type="checkbox"><label for="vmulq_laneq_u16"><div>uint16x8_t <b><b>vmulq_laneq_u16</b></b> (uint16x8_t a, uint16x8_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_u32" type="checkbox"><label for="vmul_laneq_u32"><div>uint32x2_t <b><b>vmul_laneq_u32</b></b> (uint32x2_t a, uint32x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_u32" type="checkbox"><label for="vmulq_laneq_u32"><div>uint32x4_t <b><b>vmulq_laneq_u32</b></b> (uint32x4_t a, uint32x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if poly then
-        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
-    else
-        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_f32" type="checkbox"><label for="vmul_laneq_f32"><div>float32x2_t <b><b>vmul_laneq_f32</b></b> (float32x2_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_f32" type="checkbox"><label for="vmulq_laneq_f32"><div>float32x4_t <b><b>vmulq_laneq_f32</b></b> (float32x4_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_f64" type="checkbox"><label for="vmul_laneq_f64"><div>float64x1_t <b><b>vmul_laneq_f64</b></b> (float64x1_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-v &rarr; Vm.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_f64" type="checkbox"><label for="vmulq_laneq_f64"><div>float64x2_t <b><b>vmulq_laneq_f64</b></b> (float64x2_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2D,Vn.2D,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-v &rarr; Vm.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmuls_laneq_f32" type="checkbox"><label for="vmuls_laneq_f32"><div>float32_t <b><b>vmuls_laneq_f32</b></b> (float32_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Sd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmuld_laneq_f64" type="checkbox"><label for="vmuld_laneq_f64"><div>float64_t <b><b>vmuld_laneq_f64</b></b> (float64_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-v &rarr; Vm.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_n_s16" type="checkbox"><label for="vmull_n_s16"><div>int32x4_t <b><b>vmull_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.4S,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_n_s32" type="checkbox"><label for="vmull_n_s32"><div>int64x2_t <b><b>vmull_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.2D,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_n_u16" type="checkbox"><label for="vmull_n_u16"><div>uint32x4_t <b><b>vmull_n_u16</b></b> (uint16x4_t a, uint16_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.4S,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_n_u32" type="checkbox"><label for="vmull_n_u32"><div>uint64x2_t <b><b>vmull_n_u32</b></b> (uint32x2_t a, uint32_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.2D,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_n_s16" type="checkbox"><label for="vmull_high_n_s16"><div>int32x4_t <b><b>vmull_high_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.4S,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_n_s32" type="checkbox"><label for="vmull_high_n_s32"><div>int64x2_t <b><b>vmull_high_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.2D,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_n_u16" type="checkbox"><label for="vmull_high_n_u16"><div>uint32x4_t <b><b>vmull_high_n_u16</b></b> (uint16x8_t a, uint16_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.4S,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_n_u32" type="checkbox"><label for="vmull_high_n_u32"><div>uint64x2_t <b><b>vmull_high_n_u32</b></b> (uint32x4_t a, uint32_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.2D,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_lane_s16" type="checkbox"><label for="vmull_lane_s16"><div>int32x4_t <b><b>vmull_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_lane_s32" type="checkbox"><label for="vmull_lane_s32"><div>int64x2_t <b><b>vmull_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_lane_u16" type="checkbox"><label for="vmull_lane_u16"><div>uint32x4_t <b><b>vmull_lane_u16</b></b> (uint16x4_t a, uint16x4_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_lane_u32" type="checkbox"><label for="vmull_lane_u32"><div>uint64x2_t <b><b>vmull_lane_u32</b></b> (uint32x2_t a, uint32x2_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_lane_s16" type="checkbox"><label for="vmull_high_lane_s16"><div>int32x4_t <b><b>vmull_high_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_lane_s32" type="checkbox"><label for="vmull_high_lane_s32"><div>int64x2_t <b><b>vmull_high_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_lane_u16" type="checkbox"><label for="vmull_high_lane_u16"><div>uint32x4_t <b><b>vmull_high_lane_u16</b></b> (uint16x8_t a, uint16x4_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_lane_u32" type="checkbox"><label for="vmull_high_lane_u32"><div>uint64x2_t <b><b>vmull_high_lane_u32</b></b> (uint32x4_t a, uint32x2_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_laneq_s16" type="checkbox"><label for="vmull_laneq_s16"><div>int32x4_t <b><b>vmull_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_laneq_s32" type="checkbox"><label for="vmull_laneq_s32"><div>int64x2_t <b><b>vmull_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_laneq_u16" type="checkbox"><label for="vmull_laneq_u16"><div>uint32x4_t <b><b>vmull_laneq_u16</b></b> (uint16x4_t a, uint16x8_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_laneq_u32" type="checkbox"><label for="vmull_laneq_u32"><div>uint64x2_t <b><b>vmull_laneq_u32</b></b> (uint32x2_t a, uint32x4_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_laneq_s16" type="checkbox"><label for="vmull_high_laneq_s16"><div>int32x4_t <b><b>vmull_high_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_laneq_s32" type="checkbox"><label for="vmull_high_laneq_s32"><div>int64x2_t <b><b>vmull_high_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_laneq_u16" type="checkbox"><label for="vmull_high_laneq_u16"><div>uint32x4_t <b><b>vmull_high_laneq_u16</b></b> (uint16x8_t a, uint16x8_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_laneq_u32" type="checkbox"><label for="vmull_high_laneq_u32"><div>uint64x2_t <b><b>vmull_high_laneq_u32</b></b> (uint32x4_t a, uint32x4_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_n_s16" type="checkbox"><label for="vqdmull_n_s16"><div>int32x4_t <b><b>vqdmull_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.4S,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_n_s32" type="checkbox"><label for="vqdmull_n_s32"><div>int64x2_t <b><b>vqdmull_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.2D,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_n_s16" type="checkbox"><label for="vqdmull_high_n_s16"><div>int32x4_t <b><b>vqdmull_high_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.4S,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_n_s32" type="checkbox"><label for="vqdmull_high_n_s32"><div>int64x2_t <b><b>vqdmull_high_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.2D,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_lane_s16" type="checkbox"><label for="vqdmull_lane_s16"><div>int32x4_t <b><b>vqdmull_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_lane_s32" type="checkbox"><label for="vqdmull_lane_s32"><div>int64x2_t <b><b>vqdmull_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmullh_lane_s16" type="checkbox"><label for="vqdmullh_lane_s16"><div>int32_t <b><b>vqdmullh_lane_s16</b></b> (int16_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Sd,Hn,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulls_lane_s32" type="checkbox"><label for="vqdmulls_lane_s32"><div>int64_t <b><b>vqdmulls_lane_s32</b></b> (int32_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Dd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_lane_s16" type="checkbox"><label for="vqdmull_high_lane_s16"><div>int32x4_t <b><b>vqdmull_high_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_lane_s32" type="checkbox"><label for="vqdmull_high_lane_s32"><div>int64x2_t <b><b>vqdmull_high_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_laneq_s16" type="checkbox"><label for="vqdmull_laneq_s16"><div>int32x4_t <b><b>vqdmull_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_laneq_s32" type="checkbox"><label for="vqdmull_laneq_s32"><div>int64x2_t <b><b>vqdmull_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmullh_laneq_s16" type="checkbox"><label for="vqdmullh_laneq_s16"><div>int32_t <b><b>vqdmullh_laneq_s16</b></b> (int16_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Sd,Hn,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulls_laneq_s32" type="checkbox"><label for="vqdmulls_laneq_s32"><div>int64_t <b><b>vqdmulls_laneq_s32</b></b> (int32_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Dd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_laneq_s16" type="checkbox"><label for="vqdmull_high_laneq_s16"><div>int32x4_t <b><b>vqdmull_high_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_laneq_s32" type="checkbox"><label for="vqdmull_high_laneq_s32"><div>int64x2_t <b><b>vqdmull_high_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_n_s16" type="checkbox"><label for="vqdmulh_n_s16"><div>int16x4_t <b><b>vqdmulh_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4H,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_n_s16" type="checkbox"><label for="vqdmulhq_n_s16"><div>int16x8_t <b><b>vqdmulhq_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.8H,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_n_s32" type="checkbox"><label for="vqdmulh_n_s32"><div>int32x2_t <b><b>vqdmulh_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.2S,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_n_s32" type="checkbox"><label for="vqdmulhq_n_s32"><div>int32x4_t <b><b>vqdmulhq_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4S,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_lane_s16" type="checkbox"><label for="vqdmulh_lane_s16"><div>int16x4_t <b><b>vqdmulh_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_lane_s16" type="checkbox"><label for="vqdmulhq_lane_s16"><div>int16x8_t <b><b>vqdmulhq_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_lane_s32" type="checkbox"><label for="vqdmulh_lane_s32"><div>int32x2_t <b><b>vqdmulh_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_lane_s32" type="checkbox"><label for="vqdmulhq_lane_s32"><div>int32x4_t <b><b>vqdmulhq_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhh_lane_s16" type="checkbox"><label for="vqdmulhh_lane_s16"><div>int16_t <b><b>vqdmulhh_lane_s16</b></b> (int16_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Hd,Hn,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhs_lane_s32" type="checkbox"><label for="vqdmulhs_lane_s32"><div>int32_t <b><b>vqdmulhs_lane_s32</b></b> (int32_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Sd,Sn,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_laneq_s16" type="checkbox"><label for="vqdmulh_laneq_s16"><div>int16x4_t <b><b>vqdmulh_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_laneq_s16" type="checkbox"><label for="vqdmulhq_laneq_s16"><div>int16x8_t <b><b>vqdmulhq_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_laneq_s32" type="checkbox"><label for="vqdmulh_laneq_s32"><div>int32x2_t <b><b>vqdmulh_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_laneq_s32" type="checkbox"><label for="vqdmulhq_laneq_s32"><div>int32x4_t <b><b>vqdmulhq_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhh_laneq_s16" type="checkbox"><label for="vqdmulhh_laneq_s16"><div>int16_t <b><b>vqdmulhh_laneq_s16</b></b> (int16_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Hd,Hn,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhs_laneq_s32" type="checkbox"><label for="vqdmulhs_laneq_s32"><div>int32_t <b><b>vqdmulhs_laneq_s32</b></b> (int32_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Sd,Sn,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_n_s16" type="checkbox"><label for="vqrdmulh_n_s16"><div>int16x4_t <b><b>vqrdmulh_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4H,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_n_s16" type="checkbox"><label for="vqrdmulhq_n_s16"><div>int16x8_t <b><b>vqrdmulhq_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.8H,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_n_s32" type="checkbox"><label for="vqrdmulh_n_s32"><div>int32x2_t <b><b>vqrdmulh_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.2S,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_n_s32" type="checkbox"><label for="vqrdmulhq_n_s32"><div>int32x4_t <b><b>vqrdmulhq_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4S,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_lane_s16" type="checkbox"><label for="vqrdmulh_lane_s16"><div>int16x4_t <b><b>vqrdmulh_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_lane_s16" type="checkbox"><label for="vqrdmulhq_lane_s16"><div>int16x8_t <b><b>vqrdmulhq_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_lane_s32" type="checkbox"><label for="vqrdmulh_lane_s32"><div>int32x2_t <b><b>vqrdmulh_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_lane_s32" type="checkbox"><label for="vqrdmulhq_lane_s32"><div>int32x4_t <b><b>vqrdmulhq_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhh_lane_s16" type="checkbox"><label for="vqrdmulhh_lane_s16"><div>int16_t <b><b>vqrdmulhh_lane_s16</b></b> (int16_t a, int16x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Hd,Hn,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-v &rarr; Vm.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhs_lane_s32" type="checkbox"><label for="vqrdmulhs_lane_s32"><div>int32_t <b><b>vqrdmulhs_lane_s32</b></b> (int32_t a, int32x2_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Sd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-v &rarr; Vm.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_laneq_s16" type="checkbox"><label for="vqrdmulh_laneq_s16"><div>int16x4_t <b><b>vqrdmulh_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4H,Vn.4H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_laneq_s16" type="checkbox"><label for="vqrdmulhq_laneq_s16"><div>int16x8_t <b><b>vqrdmulhq_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.8H,Vn.8H,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_laneq_s32" type="checkbox"><label for="vqrdmulh_laneq_s32"><div>int32x2_t <b><b>vqrdmulh_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.2S,Vn.2S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_laneq_s32" type="checkbox"><label for="vqrdmulhq_laneq_s32"><div>int32x4_t <b><b>vqrdmulhq_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4S,Vn.4S,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhh_laneq_s16" type="checkbox"><label for="vqrdmulhh_laneq_s16"><div>int16_t <b><b>vqrdmulhh_laneq_s16</b></b> (int16_t a, int16x8_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Hd,Hn,Vm.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
-v &rarr; Vm.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhs_laneq_s32" type="checkbox"><label for="vqrdmulhs_laneq_s32"><div>int32_t <b><b>vqrdmulhs_laneq_s32</b></b> (int32_t a, int32x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Sd,Sn,Vm.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-v &rarr; Vm.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
-integer element1;
-integer element2;
-integer product;
-boolean sat;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    product = (2 * element1 * element2) + round_const;
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_s16" type="checkbox"><label for="vmla_n_s16"><div>int16x4_t <b><b>vmla_n_s16</b></b> (int16x4_t a, int16x4_t b, int16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_s16" type="checkbox"><label for="vmlaq_n_s16"><div>int16x8_t <b><b>vmlaq_n_s16</b></b> (int16x8_t a, int16x8_t b, int16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_s32" type="checkbox"><label for="vmla_n_s32"><div>int32x2_t <b><b>vmla_n_s32</b></b> (int32x2_t a, int32x2_t b, int32_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_s32" type="checkbox"><label for="vmlaq_n_s32"><div>int32x4_t <b><b>vmlaq_n_s32</b></b> (int32x4_t a, int32x4_t b, int32_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_u16" type="checkbox"><label for="vmla_n_u16"><div>uint16x4_t <b><b>vmla_n_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_u16" type="checkbox"><label for="vmlaq_n_u16"><div>uint16x8_t <b><b>vmlaq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_u32" type="checkbox"><label for="vmla_n_u32"><div>uint32x2_t <b><b>vmla_n_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_u32" type="checkbox"><label for="vmlaq_n_u32"><div>uint32x4_t <b><b>vmlaq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_f32" type="checkbox"><label for="vmla_n_f32"><div>float32x2_t <b><b>vmla_n_f32</b></b> (float32x2_t a, float32x2_t b, float32_t c)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c) for i = 0 to 1
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
-b &rarr; N/A <br />
-c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_f32" type="checkbox"><label for="vmlaq_n_f32"><div>float32x4_t <b><b>vmlaq_n_f32</b></b> (float32x4_t a, float32x4_t b, float32_t c)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c) for i = 0 to 3
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
-b &rarr; N/A <br />
-c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_n_s16" type="checkbox"><label for="vmlal_n_s16"><div>int32x4_t <b><b>vmlal_n_s16</b></b> (int32x4_t a, int16x4_t b, int16_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.4S,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_n_s32" type="checkbox"><label for="vmlal_n_s32"><div>int64x2_t <b><b>vmlal_n_s32</b></b> (int64x2_t a, int32x2_t b, int32_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.2D,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_n_u16" type="checkbox"><label for="vmlal_n_u16"><div>uint32x4_t <b><b>vmlal_n_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.4S,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_n_u32" type="checkbox"><label for="vmlal_n_u32"><div>uint64x2_t <b><b>vmlal_n_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.2D,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_n_s16" type="checkbox"><label for="vmlal_high_n_s16"><div>int32x4_t <b><b>vmlal_high_n_s16</b></b> (int32x4_t a, int16x8_t b, int16_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.4S,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_n_s32" type="checkbox"><label for="vmlal_high_n_s32"><div>int64x2_t <b><b>vmlal_high_n_s32</b></b> (int64x2_t a, int32x4_t b, int32_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.2D,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_n_u16" type="checkbox"><label for="vmlal_high_n_u16"><div>uint32x4_t <b><b>vmlal_high_n_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.4S,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_n_u32" type="checkbox"><label for="vmlal_high_n_u32"><div>uint64x2_t <b><b>vmlal_high_n_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.2D,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_n_s16" type="checkbox"><label for="vqdmlal_n_s16"><div>int32x4_t <b><b>vqdmlal_n_s16</b></b> (int32x4_t a, int16x4_t b, int16_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.4S,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_n_s32" type="checkbox"><label for="vqdmlal_n_s32"><div>int64x2_t <b><b>vqdmlal_n_s32</b></b> (int64x2_t a, int32x2_t b, int32_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.2D,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_n_s16" type="checkbox"><label for="vqdmlal_high_n_s16"><div>int32x4_t <b><b>vqdmlal_high_n_s16</b></b> (int32x4_t a, int16x8_t b, int16_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.4S,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_n_s32" type="checkbox"><label for="vqdmlal_high_n_s32"><div>int64x2_t <b><b>vqdmlal_high_n_s32</b></b> (int64x2_t a, int32x4_t b, int32_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.2D,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_s16" type="checkbox"><label for="vmls_n_s16"><div>int16x4_t <b><b>vmls_n_s16</b></b> (int16x4_t a, int16x4_t b, int16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_s16" type="checkbox"><label for="vmlsq_n_s16"><div>int16x8_t <b><b>vmlsq_n_s16</b></b> (int16x8_t a, int16x8_t b, int16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_s32" type="checkbox"><label for="vmls_n_s32"><div>int32x2_t <b><b>vmls_n_s32</b></b> (int32x2_t a, int32x2_t b, int32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_s32" type="checkbox"><label for="vmlsq_n_s32"><div>int32x4_t <b><b>vmlsq_n_s32</b></b> (int32x4_t a, int32x4_t b, int32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_u16" type="checkbox"><label for="vmls_n_u16"><div>uint16x4_t <b><b>vmls_n_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_u16" type="checkbox"><label for="vmlsq_n_u16"><div>uint16x8_t <b><b>vmlsq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_u32" type="checkbox"><label for="vmls_n_u32"><div>uint32x2_t <b><b>vmls_n_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_u32" type="checkbox"><label for="vmlsq_n_u32"><div>uint32x4_t <b><b>vmlsq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-bits(esize) product;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
-    if sub_op then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_f32" type="checkbox"><label for="vmls_n_f32"><div>float32x2_t <b><b>vmls_n_f32</b></b> (float32x2_t a, float32x2_t b, float32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c) for i = 0 to 1
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
-b &rarr; N/A <br />
-c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_f32" type="checkbox"><label for="vmlsq_n_f32"><div>float32x4_t <b><b>vmlsq_n_f32</b></b> (float32x4_t a, float32x4_t b, float32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c) for i = 0 to 3
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
-b &rarr; N/A <br />
-c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_n_s16" type="checkbox"><label for="vmlsl_n_s16"><div>int32x4_t <b><b>vmlsl_n_s16</b></b> (int32x4_t a, int16x4_t b, int16_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.4S,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_n_s32" type="checkbox"><label for="vmlsl_n_s32"><div>int64x2_t <b><b>vmlsl_n_s32</b></b> (int64x2_t a, int32x2_t b, int32_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.2D,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_n_u16" type="checkbox"><label for="vmlsl_n_u16"><div>uint32x4_t <b><b>vmlsl_n_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.4S,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_n_u32" type="checkbox"><label for="vmlsl_n_u32"><div>uint64x2_t <b><b>vmlsl_n_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.2D,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_n_s16" type="checkbox"><label for="vmlsl_high_n_s16"><div>int32x4_t <b><b>vmlsl_high_n_s16</b></b> (int32x4_t a, int16x8_t b, int16_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.4S,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_n_s32" type="checkbox"><label for="vmlsl_high_n_s32"><div>int64x2_t <b><b>vmlsl_high_n_s32</b></b> (int64x2_t a, int32x4_t b, int32_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.2D,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_n_u16" type="checkbox"><label for="vmlsl_high_n_u16"><div>uint32x4_t <b><b>vmlsl_high_n_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.4S,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_n_u32" type="checkbox"><label for="vmlsl_high_n_u32"><div>uint64x2_t <b><b>vmlsl_high_n_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.2D,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-bits(2*esize) accum;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
-    product = (element1*element2)&lt;2*esize-1:0&gt;;
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_n_s16" type="checkbox"><label for="vqdmlsl_n_s16"><div>int32x4_t <b><b>vqdmlsl_n_s16</b></b> (int32x4_t a, int16x4_t b, int16_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.4S,Vn.4H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_n_s32" type="checkbox"><label for="vqdmlsl_n_s32"><div>int64x2_t <b><b>vqdmlsl_n_s32</b></b> (int64x2_t a, int32x2_t b, int32_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.2D,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_n_s16" type="checkbox"><label for="vqdmlsl_high_n_s16"><div>int32x4_t <b><b>vqdmlsl_high_n_s16</b></b> (int32x4_t a, int16x8_t b, int16_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.4S,Vn.8H,Vm.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H <br />
-c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_n_s32" type="checkbox"><label for="vqdmlsl_high_n_s32"><div>int64x2_t <b><b>vqdmlsl_high_n_s32</b></b> (int64x2_t a, int32x4_t b, int32_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.2D,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S <br />
-c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(2*datasize) result;
-integer element1;
-integer element2;
-bits(2*esize) product;
-integer accum;
-boolean sat1;
-boolean sat2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
-    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
-    if sub_op then
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    else
-        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
-    if sat1 || sat2 then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabs_s8" type="checkbox"><label for="vabs_s8"><div>int8x8_t <b><b>vabs_s8</b></b> (int8x8_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_s8" type="checkbox"><label for="vabsq_s8"><div>int8x16_t <b><b>vabsq_s8</b></b> (int8x16_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabs_s16" type="checkbox"><label for="vabs_s16"><div>int16x4_t <b><b>vabs_s16</b></b> (int16x4_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_s16" type="checkbox"><label for="vabsq_s16"><div>int16x8_t <b><b>vabsq_s16</b></b> (int16x8_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabs_s32" type="checkbox"><label for="vabs_s32"><div>int32x2_t <b><b>vabs_s32</b></b> (int32x2_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_s32" type="checkbox"><label for="vabsq_s32"><div>int32x4_t <b><b>vabsq_s32</b></b> (int32x4_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabs_f32" type="checkbox"><label for="vabs_f32"><div>float32x2_t <b><b>vabs_f32</b></b> (float32x2_t a)<span class="right">Floating-point absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabs-vector-floating-point-absolute-value-vector">FABS</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    if neg then
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_f32" type="checkbox"><label for="vabsq_f32"><div>float32x4_t <b><b>vabsq_f32</b></b> (float32x4_t a)<span class="right">Floating-point absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabs-vector-floating-point-absolute-value-vector">FABS</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    if neg then
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabs_s64" type="checkbox"><label for="vabs_s64"><div>int64x1_t <b><b>vabs_s64</b></b> (int64x1_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabsd_s64" type="checkbox"><label for="vabsd_s64"><div>int64_t <b><b>vabsd_s64</b></b> (int64_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_s64" type="checkbox"><label for="vabsq_s64"><div>int64x2_t <b><b>vabsq_s64</b></b> (int64x2_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabs_f64" type="checkbox"><label for="vabs_f64"><div>float64x1_t <b><b>vabs_f64</b></b> (float64x1_t a)<span class="right">Floating-point absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabs-vector-floating-point-absolute-value-vector">FABS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    if neg then
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_f64" type="checkbox"><label for="vabsq_f64"><div>float64x2_t <b><b>vabsq_f64</b></b> (float64x2_t a)<span class="right">Floating-point absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabs-vector-floating-point-absolute-value-vector">FABS</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    if neg then
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabs_s8" type="checkbox"><label for="vqabs_s8"><div>int8x8_t <b><b>vqabs_s8</b></b> (int8x8_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabsq_s8" type="checkbox"><label for="vqabsq_s8"><div>int8x16_t <b><b>vqabsq_s8</b></b> (int8x16_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabs_s16" type="checkbox"><label for="vqabs_s16"><div>int16x4_t <b><b>vqabs_s16</b></b> (int16x4_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabsq_s16" type="checkbox"><label for="vqabsq_s16"><div>int16x8_t <b><b>vqabsq_s16</b></b> (int16x8_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabs_s32" type="checkbox"><label for="vqabs_s32"><div>int32x2_t <b><b>vqabs_s32</b></b> (int32x2_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabsq_s32" type="checkbox"><label for="vqabsq_s32"><div>int32x4_t <b><b>vqabsq_s32</b></b> (int32x4_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabs_s64" type="checkbox"><label for="vqabs_s64"><div>int64x1_t <b><b>vqabs_s64</b></b> (int64x1_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabsq_s64" type="checkbox"><label for="vqabsq_s64"><div>int64x2_t <b><b>vqabsq_s64</b></b> (int64x2_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabsb_s8" type="checkbox"><label for="vqabsb_s8"><div>int8_t <b><b>vqabsb_s8</b></b> (int8_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Bd,Bn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabsh_s16" type="checkbox"><label for="vqabsh_s16"><div>int16_t <b><b>vqabsh_s16</b></b> (int16_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Hd,Hn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabss_s32" type="checkbox"><label for="vqabss_s32"><div>int32_t <b><b>vqabss_s32</b></b> (int32_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabsd_s64" type="checkbox"><label for="vqabsd_s64"><div>int64_t <b><b>vqabsd_s64</b></b> (int64_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vneg_s8" type="checkbox"><label for="vneg_s8"><div>int8x8_t <b><b>vneg_s8</b></b> (int8x8_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_s8" type="checkbox"><label for="vnegq_s8"><div>int8x16_t <b><b>vnegq_s8</b></b> (int8x16_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vneg_s16" type="checkbox"><label for="vneg_s16"><div>int16x4_t <b><b>vneg_s16</b></b> (int16x4_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_s16" type="checkbox"><label for="vnegq_s16"><div>int16x8_t <b><b>vnegq_s16</b></b> (int16x8_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vneg_s32" type="checkbox"><label for="vneg_s32"><div>int32x2_t <b><b>vneg_s32</b></b> (int32x2_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_s32" type="checkbox"><label for="vnegq_s32"><div>int32x4_t <b><b>vnegq_s32</b></b> (int32x4_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vneg_f32" type="checkbox"><label for="vneg_f32"><div>float32x2_t <b><b>vneg_f32</b></b> (float32x2_t a)<span class="right">Floating-point negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fneg-vector-floating-point-negate-vector">FNEG</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    if neg then
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_f32" type="checkbox"><label for="vnegq_f32"><div>float32x4_t <b><b>vnegq_f32</b></b> (float32x4_t a)<span class="right">Floating-point negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fneg-vector-floating-point-negate-vector">FNEG</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    if neg then
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vneg_s64" type="checkbox"><label for="vneg_s64"><div>int64x1_t <b><b>vneg_s64</b></b> (int64x1_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vnegd_s64" type="checkbox"><label for="vnegd_s64"><div>int64_t <b><b>vnegd_s64</b></b> (int64_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_s64" type="checkbox"><label for="vnegq_s64"><div>int64x2_t <b><b>vnegq_s64</b></b> (int64x2_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vneg_f64" type="checkbox"><label for="vneg_f64"><div>float64x1_t <b><b>vneg_f64</b></b> (float64x1_t a)<span class="right">Floating-point negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fneg-vector-floating-point-negate-vector">FNEG</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    if neg then
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_f64" type="checkbox"><label for="vnegq_f64"><div>float64x2_t <b><b>vnegq_f64</b></b> (float64x2_t a)<span class="right">Floating-point negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fneg-vector-floating-point-negate-vector">FNEG</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    if neg then
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqneg_s8" type="checkbox"><label for="vqneg_s8"><div>int8x8_t <b><b>vqneg_s8</b></b> (int8x8_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqnegq_s8" type="checkbox"><label for="vqnegq_s8"><div>int8x16_t <b><b>vqnegq_s8</b></b> (int8x16_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqneg_s16" type="checkbox"><label for="vqneg_s16"><div>int16x4_t <b><b>vqneg_s16</b></b> (int16x4_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqnegq_s16" type="checkbox"><label for="vqnegq_s16"><div>int16x8_t <b><b>vqnegq_s16</b></b> (int16x8_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqneg_s32" type="checkbox"><label for="vqneg_s32"><div>int32x2_t <b><b>vqneg_s32</b></b> (int32x2_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqnegq_s32" type="checkbox"><label for="vqnegq_s32"><div>int32x4_t <b><b>vqnegq_s32</b></b> (int32x4_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqneg_s64" type="checkbox"><label for="vqneg_s64"><div>int64x1_t <b><b>vqneg_s64</b></b> (int64x1_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegq_s64" type="checkbox"><label for="vqnegq_s64"><div>int64x2_t <b><b>vqnegq_s64</b></b> (int64x2_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegb_s8" type="checkbox"><label for="vqnegb_s8"><div>int8_t <b><b>vqnegb_s8</b></b> (int8_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Bd,Bn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegh_s16" type="checkbox"><label for="vqnegh_s16"><div>int16_t <b><b>vqnegh_s16</b></b> (int16_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Hd,Hn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegs_s32" type="checkbox"><label for="vqnegs_s32"><div>int32_t <b><b>vqnegs_s32</b></b> (int32_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegd_s64" type="checkbox"><label for="vqnegd_s64"><div>int64_t <b><b>vqnegd_s64</b></b> (int64_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element;
-boolean sat;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    if neg then
-        element = -element;
-    else
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
-    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
-    if sat then FPSR.QC = '1';
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcls_s8" type="checkbox"><label for="vcls_s8"><div>int8x8_t <b><b>vcls_s8</b></b> (int8x8_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclsq_s8" type="checkbox"><label for="vclsq_s8"><div>int8x16_t <b><b>vclsq_s8</b></b> (int8x16_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcls_s16" type="checkbox"><label for="vcls_s16"><div>int16x4_t <b><b>vcls_s16</b></b> (int16x4_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclsq_s16" type="checkbox"><label for="vclsq_s16"><div>int16x8_t <b><b>vclsq_s16</b></b> (int16x8_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcls_s32" type="checkbox"><label for="vcls_s32"><div>int32x2_t <b><b>vcls_s32</b></b> (int32x2_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclsq_s32" type="checkbox"><label for="vclsq_s32"><div>int32x4_t <b><b>vclsq_s32</b></b> (int32x4_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_s8" type="checkbox"><label for="vclz_s8"><div>int8x8_t <b><b>vclz_s8</b></b> (int8x8_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_s8" type="checkbox"><label for="vclzq_s8"><div>int8x16_t <b><b>vclzq_s8</b></b> (int8x16_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_s16" type="checkbox"><label for="vclz_s16"><div>int16x4_t <b><b>vclz_s16</b></b> (int16x4_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_s16" type="checkbox"><label for="vclzq_s16"><div>int16x8_t <b><b>vclzq_s16</b></b> (int16x8_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_s32" type="checkbox"><label for="vclz_s32"><div>int32x2_t <b><b>vclz_s32</b></b> (int32x2_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_s32" type="checkbox"><label for="vclzq_s32"><div>int32x4_t <b><b>vclzq_s32</b></b> (int32x4_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_u8" type="checkbox"><label for="vclz_u8"><div>uint8x8_t <b><b>vclz_u8</b></b> (uint8x8_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_u8" type="checkbox"><label for="vclzq_u8"><div>uint8x16_t <b><b>vclzq_u8</b></b> (uint8x16_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_u16" type="checkbox"><label for="vclz_u16"><div>uint16x4_t <b><b>vclz_u16</b></b> (uint16x4_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_u16" type="checkbox"><label for="vclzq_u16"><div>uint16x8_t <b><b>vclzq_u16</b></b> (uint16x8_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_u32" type="checkbox"><label for="vclz_u32"><div>uint32x2_t <b><b>vclz_u32</b></b> (uint32x2_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_u32" type="checkbox"><label for="vclzq_u32"><div>uint32x4_t <b><b>vclzq_u32</b></b> (uint32x4_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    else
-        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcnt_s8" type="checkbox"><label for="vcnt_s8"><div>int8x8_t <b><b>vcnt_s8</b></b> (int8x8_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcntq_s8" type="checkbox"><label for="vcntq_s8"><div>int8x16_t <b><b>vcntq_s8</b></b> (int8x16_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcnt_u8" type="checkbox"><label for="vcnt_u8"><div>uint8x8_t <b><b>vcnt_u8</b></b> (uint8x8_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcntq_u8" type="checkbox"><label for="vcntq_u8"><div>uint8x16_t <b><b>vcntq_u8</b></b> (uint8x16_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcnt_p8" type="checkbox"><label for="vcnt_p8"><div>poly8x8_t <b><b>vcnt_p8</b></b> (poly8x8_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcntq_p8" type="checkbox"><label for="vcntq_p8"><div>poly8x16_t <b><b>vcntq_p8</b></b> (poly8x16_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-integer count;
-for e = 0 to elements-1
-    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpe_u32" type="checkbox"><label for="vrecpe_u32"><div>uint32x2_t <b><b>vrecpe_u32</b></b> (uint32x2_t a)<span class="right">Unsigned reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Reciprocal Estimate. This instruction reads each vector element from the source SIMD&amp;FP register, calculates an approximate inverse for the unsigned integer value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urecpe-unsigned-reciprocal-estimate">URECPE</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(32) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 32];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedRecipEstimate.1" title="function: bits(N) UnsignedRecipEstimate(bits(N) operand)">UnsignedRecipEstimate</a>(element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpeq_u32" type="checkbox"><label for="vrecpeq_u32"><div>uint32x4_t <b><b>vrecpeq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Reciprocal Estimate. This instruction reads each vector element from the source SIMD&amp;FP register, calculates an approximate inverse for the unsigned integer value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urecpe-unsigned-reciprocal-estimate">URECPE</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(32) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 32];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedRecipEstimate.1" title="function: bits(N) UnsignedRecipEstimate(bits(N) operand)">UnsignedRecipEstimate</a>(element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpe_f32" type="checkbox"><label for="vrecpe_f32"><div>float32x2_t <b><b>vrecpe_f32</b></b> (float32x2_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpeq_f32" type="checkbox"><label for="vrecpeq_f32"><div>float32x4_t <b><b>vrecpeq_f32</b></b> (float32x4_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpe_f64" type="checkbox"><label for="vrecpe_f64"><div>float64x1_t <b><b>vrecpe_f64</b></b> (float64x1_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpeq_f64" type="checkbox"><label for="vrecpeq_f64"><div>float64x2_t <b><b>vrecpeq_f64</b></b> (float64x2_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpes_f32" type="checkbox"><label for="vrecpes_f32"><div>float32_t <b><b>vrecpes_f32</b></b> (float32_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecped_f64" type="checkbox"><label for="vrecped_f64"><div>float64_t <b><b>vrecped_f64</b></b> (float64_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecps_f32" type="checkbox"><label for="vrecps_f32"><div>float32x2_t <b><b>vrecps_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpsq_f32" type="checkbox"><label for="vrecpsq_f32"><div>float32x4_t <b><b>vrecpsq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecps_f64" type="checkbox"><label for="vrecps_f64"><div>float64x1_t <b><b>vrecps_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpsq_f64" type="checkbox"><label for="vrecpsq_f64"><div>float64x2_t <b><b>vrecpsq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpss_f32" type="checkbox"><label for="vrecpss_f32"><div>float32_t <b><b>vrecpss_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpsd_f64" type="checkbox"><label for="vrecpsd_f64"><div>float64_t <b><b>vrecpsd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqrt_f32" type="checkbox"><label for="vsqrt_f32"><div>float32x2_t <b><b>vsqrt_f32</b></b> (float32x2_t a)<span class="right">Floating-point square root</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsqrt-vector-floating-point-square-root-vector">FSQRT</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSqrt.2" title="function: bits(N) FPSqrt(bits(N) op, FPCRType fpcr)">FPSqrt</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqrtq_f32" type="checkbox"><label for="vsqrtq_f32"><div>float32x4_t <b><b>vsqrtq_f32</b></b> (float32x4_t a)<span class="right">Floating-point square root</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsqrt-vector-floating-point-square-root-vector">FSQRT</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSqrt.2" title="function: bits(N) FPSqrt(bits(N) op, FPCRType fpcr)">FPSqrt</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqrt_f64" type="checkbox"><label for="vsqrt_f64"><div>float64x1_t <b><b>vsqrt_f64</b></b> (float64x1_t a)<span class="right">Floating-point square root</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsqrt-vector-floating-point-square-root-vector">FSQRT</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSqrt.2" title="function: bits(N) FPSqrt(bits(N) op, FPCRType fpcr)">FPSqrt</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqrtq_f64" type="checkbox"><label for="vsqrtq_f64"><div>float64x2_t <b><b>vsqrtq_f64</b></b> (float64x2_t a)<span class="right">Floating-point square root</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsqrt-vector-floating-point-square-root-vector">FSQRT</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSqrt.2" title="function: bits(N) FPSqrt(bits(N) op, FPCRType fpcr)">FPSqrt</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrte_u32" type="checkbox"><label for="vrsqrte_u32"><div>uint32x2_t <b><b>vrsqrte_u32</b></b> (uint32x2_t a)<span class="right">Unsigned reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Reciprocal Square Root Estimate. This instruction reads each vector element from the source SIMD&amp;FP register, calculates an approximate inverse square root for each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursqrte-unsigned-reciprocal-square-root-estimate">URSQRTE</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(32) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 32];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedRSqrtEstimate.1" title="function: bits(N) UnsignedRSqrtEstimate(bits(N) operand)">UnsignedRSqrtEstimate</a>(element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrteq_u32" type="checkbox"><label for="vrsqrteq_u32"><div>uint32x4_t <b><b>vrsqrteq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Reciprocal Square Root Estimate. This instruction reads each vector element from the source SIMD&amp;FP register, calculates an approximate inverse square root for each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursqrte-unsigned-reciprocal-square-root-estimate">URSQRTE</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(32) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 32];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedRSqrtEstimate.1" title="function: bits(N) UnsignedRSqrtEstimate(bits(N) operand)">UnsignedRSqrtEstimate</a>(element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrte_f32" type="checkbox"><label for="vrsqrte_f32"><div>float32x2_t <b><b>vrsqrte_f32</b></b> (float32x2_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrteq_f32" type="checkbox"><label for="vrsqrteq_f32"><div>float32x4_t <b><b>vrsqrteq_f32</b></b> (float32x4_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrte_f64" type="checkbox"><label for="vrsqrte_f64"><div>float64x1_t <b><b>vrsqrte_f64</b></b> (float64x1_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrteq_f64" type="checkbox"><label for="vrsqrteq_f64"><div>float64x2_t <b><b>vrsqrteq_f64</b></b> (float64x2_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Vd.2D,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtes_f32" type="checkbox"><label for="vrsqrtes_f32"><div>float32_t <b><b>vrsqrtes_f32</b></b> (float32_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrted_f64" type="checkbox"><label for="vrsqrted_f64"><div>float64_t <b><b>vrsqrted_f64</b></b> (float64_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrts_f32" type="checkbox"><label for="vrsqrts_f32"><div>float32x2_t <b><b>vrsqrts_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtsq_f32" type="checkbox"><label for="vrsqrtsq_f32"><div>float32x4_t <b><b>vrsqrtsq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrts_f64" type="checkbox"><label for="vrsqrts_f64"><div>float64x1_t <b><b>vrsqrts_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtsq_f64" type="checkbox"><label for="vrsqrtsq_f64"><div>float64x2_t <b><b>vrsqrtsq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtss_f32" type="checkbox"><label for="vrsqrtss_f32"><div>float32_t <b><b>vrsqrtss_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Sd,Sn,Sm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
-b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtsd_f64" type="checkbox"><label for="vrsqrtsd_f64"><div>float64_t <b><b>vrsqrtsd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_s8" type="checkbox"><label for="vmvn_s8"><div>int8x8_t <b><b>vmvn_s8</b></b> (int8x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_s8" type="checkbox"><label for="vmvnq_s8"><div>int8x16_t <b><b>vmvnq_s8</b></b> (int8x16_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_s16" type="checkbox"><label for="vmvn_s16"><div>int16x4_t <b><b>vmvn_s16</b></b> (int16x4_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_s16" type="checkbox"><label for="vmvnq_s16"><div>int16x8_t <b><b>vmvnq_s16</b></b> (int16x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_s32" type="checkbox"><label for="vmvn_s32"><div>int32x2_t <b><b>vmvn_s32</b></b> (int32x2_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_s32" type="checkbox"><label for="vmvnq_s32"><div>int32x4_t <b><b>vmvnq_s32</b></b> (int32x4_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_u8" type="checkbox"><label for="vmvn_u8"><div>uint8x8_t <b><b>vmvn_u8</b></b> (uint8x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_u8" type="checkbox"><label for="vmvnq_u8"><div>uint8x16_t <b><b>vmvnq_u8</b></b> (uint8x16_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_u16" type="checkbox"><label for="vmvn_u16"><div>uint16x4_t <b><b>vmvn_u16</b></b> (uint16x4_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_u16" type="checkbox"><label for="vmvnq_u16"><div>uint16x8_t <b><b>vmvnq_u16</b></b> (uint16x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_u32" type="checkbox"><label for="vmvn_u32"><div>uint32x2_t <b><b>vmvn_u32</b></b> (uint32x2_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_u32" type="checkbox"><label for="vmvnq_u32"><div>uint32x4_t <b><b>vmvnq_u32</b></b> (uint32x4_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_p8" type="checkbox"><label for="vmvn_p8"><div>poly8x8_t <b><b>vmvn_p8</b></b> (poly8x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_p8" type="checkbox"><label for="vmvnq_p8"><div>poly8x16_t <b><b>vmvnq_p8</b></b> (poly8x16_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4><p>The description of 
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
-        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_s8" type="checkbox"><label for="vand_s8"><div>int8x8_t <b><b>vand_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_s8" type="checkbox"><label for="vandq_s8"><div>int8x16_t <b><b>vandq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_s16" type="checkbox"><label for="vand_s16"><div>int16x4_t <b><b>vand_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_s16" type="checkbox"><label for="vandq_s16"><div>int16x8_t <b><b>vandq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_s32" type="checkbox"><label for="vand_s32"><div>int32x2_t <b><b>vand_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_s32" type="checkbox"><label for="vandq_s32"><div>int32x4_t <b><b>vandq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_s64" type="checkbox"><label for="vand_s64"><div>int64x1_t <b><b>vand_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Dd,Dn,Dm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
-b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_s64" type="checkbox"><label for="vandq_s64"><div>int64x2_t <b><b>vandq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_u8" type="checkbox"><label for="vand_u8"><div>uint8x8_t <b><b>vand_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_u8" type="checkbox"><label for="vandq_u8"><div>uint8x16_t <b><b>vandq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_u16" type="checkbox"><label for="vand_u16"><div>uint16x4_t <b><b>vand_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_u16" type="checkbox"><label for="vandq_u16"><div>uint16x8_t <b><b>vandq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_u32" type="checkbox"><label for="vand_u32"><div>uint32x2_t <b><b>vand_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_u32" type="checkbox"><label for="vandq_u32"><div>uint32x4_t <b><b>vandq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_u64" type="checkbox"><label for="vand_u64"><div>uint64x1_t <b><b>vand_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_u64" type="checkbox"><label for="vandq_u64"><div>uint64x2_t <b><b>vandq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_s8" type="checkbox"><label for="vorr_s8"><div>int8x8_t <b><b>vorr_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_s8" type="checkbox"><label for="vorrq_s8"><div>int8x16_t <b><b>vorrq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_s16" type="checkbox"><label for="vorr_s16"><div>int16x4_t <b><b>vorr_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_s16" type="checkbox"><label for="vorrq_s16"><div>int16x8_t <b><b>vorrq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_s32" type="checkbox"><label for="vorr_s32"><div>int32x2_t <b><b>vorr_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_s32" type="checkbox"><label for="vorrq_s32"><div>int32x4_t <b><b>vorrq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_s64" type="checkbox"><label for="vorr_s64"><div>int64x1_t <b><b>vorr_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_s64" type="checkbox"><label for="vorrq_s64"><div>int64x2_t <b><b>vorrq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_u8" type="checkbox"><label for="vorr_u8"><div>uint8x8_t <b><b>vorr_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_u8" type="checkbox"><label for="vorrq_u8"><div>uint8x16_t <b><b>vorrq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_u16" type="checkbox"><label for="vorr_u16"><div>uint16x4_t <b><b>vorr_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_u16" type="checkbox"><label for="vorrq_u16"><div>uint16x8_t <b><b>vorrq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_u32" type="checkbox"><label for="vorr_u32"><div>uint32x2_t <b><b>vorr_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_u32" type="checkbox"><label for="vorrq_u32"><div>uint32x4_t <b><b>vorrq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_u64" type="checkbox"><label for="vorr_u64"><div>uint64x1_t <b><b>vorr_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_u64" type="checkbox"><label for="vorrq_u64"><div>uint64x2_t <b><b>vorrq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_s8" type="checkbox"><label for="veor_s8"><div>int8x8_t <b><b>veor_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_s8" type="checkbox"><label for="veorq_s8"><div>int8x16_t <b><b>veorq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_s16" type="checkbox"><label for="veor_s16"><div>int16x4_t <b><b>veor_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_s16" type="checkbox"><label for="veorq_s16"><div>int16x8_t <b><b>veorq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_s32" type="checkbox"><label for="veor_s32"><div>int32x2_t <b><b>veor_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_s32" type="checkbox"><label for="veorq_s32"><div>int32x4_t <b><b>veorq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_s64" type="checkbox"><label for="veor_s64"><div>int64x1_t <b><b>veor_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_s64" type="checkbox"><label for="veorq_s64"><div>int64x2_t <b><b>veorq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_u8" type="checkbox"><label for="veor_u8"><div>uint8x8_t <b><b>veor_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_u8" type="checkbox"><label for="veorq_u8"><div>uint8x16_t <b><b>veorq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_u16" type="checkbox"><label for="veor_u16"><div>uint16x4_t <b><b>veor_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_u16" type="checkbox"><label for="veorq_u16"><div>uint16x8_t <b><b>veorq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_u32" type="checkbox"><label for="veor_u32"><div>uint32x2_t <b><b>veor_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_u32" type="checkbox"><label for="veorq_u32"><div>uint32x4_t <b><b>veorq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_u64" type="checkbox"><label for="veor_u64"><div>uint64x1_t <b><b>veor_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_u64" type="checkbox"><label for="veorq_u64"><div>uint64x2_t <b><b>veorq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand2;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_s8" type="checkbox"><label for="vbic_s8"><div>int8x8_t <b><b>vbic_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_s8" type="checkbox"><label for="vbicq_s8"><div>int8x16_t <b><b>vbicq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_s16" type="checkbox"><label for="vbic_s16"><div>int16x4_t <b><b>vbic_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_s16" type="checkbox"><label for="vbicq_s16"><div>int16x8_t <b><b>vbicq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_s32" type="checkbox"><label for="vbic_s32"><div>int32x2_t <b><b>vbic_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_s32" type="checkbox"><label for="vbicq_s32"><div>int32x4_t <b><b>vbicq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_s64" type="checkbox"><label for="vbic_s64"><div>int64x1_t <b><b>vbic_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_s64" type="checkbox"><label for="vbicq_s64"><div>int64x2_t <b><b>vbicq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_u8" type="checkbox"><label for="vbic_u8"><div>uint8x8_t <b><b>vbic_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_u8" type="checkbox"><label for="vbicq_u8"><div>uint8x16_t <b><b>vbicq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_u16" type="checkbox"><label for="vbic_u16"><div>uint16x4_t <b><b>vbic_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_u16" type="checkbox"><label for="vbicq_u16"><div>uint16x8_t <b><b>vbicq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_u32" type="checkbox"><label for="vbic_u32"><div>uint32x2_t <b><b>vbic_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_u32" type="checkbox"><label for="vbicq_u32"><div>uint32x4_t <b><b>vbicq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_u64" type="checkbox"><label for="vbic_u64"><div>uint64x1_t <b><b>vbic_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_u64" type="checkbox"><label for="vbicq_u64"><div>uint64x2_t <b><b>vbicq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 AND operand2;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_s8" type="checkbox"><label for="vorn_s8"><div>int8x8_t <b><b>vorn_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_s8" type="checkbox"><label for="vornq_s8"><div>int8x16_t <b><b>vornq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_s16" type="checkbox"><label for="vorn_s16"><div>int16x4_t <b><b>vorn_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_s16" type="checkbox"><label for="vornq_s16"><div>int16x8_t <b><b>vornq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_s32" type="checkbox"><label for="vorn_s32"><div>int32x2_t <b><b>vorn_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_s32" type="checkbox"><label for="vornq_s32"><div>int32x4_t <b><b>vornq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_s64" type="checkbox"><label for="vorn_s64"><div>int64x1_t <b><b>vorn_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_s64" type="checkbox"><label for="vornq_s64"><div>int64x2_t <b><b>vornq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_u8" type="checkbox"><label for="vorn_u8"><div>uint8x8_t <b><b>vorn_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_u8" type="checkbox"><label for="vornq_u8"><div>uint8x16_t <b><b>vornq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_u16" type="checkbox"><label for="vorn_u16"><div>uint16x4_t <b><b>vorn_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_u16" type="checkbox"><label for="vornq_u16"><div>uint16x8_t <b><b>vornq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_u32" type="checkbox"><label for="vorn_u32"><div>uint32x2_t <b><b>vorn_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_u32" type="checkbox"><label for="vornq_u32"><div>uint32x4_t <b><b>vornq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_u64" type="checkbox"><label for="vorn_u64"><div>uint64x1_t <b><b>vorn_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_u64" type="checkbox"><label for="vornq_u64"><div>uint64x2_t <b><b>vornq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-operand2 = NOT(operand2);
-
-result = operand1 OR operand2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_s8" type="checkbox"><label for="vbsl_s8"><div>int8x8_t <b><b>vbsl_s8</b></b> (uint8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_s8" type="checkbox"><label for="vbslq_s8"><div>int8x16_t <b><b>vbslq_s8</b></b> (uint8x16_t a, int8x16_t b, int8x16_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_s16" type="checkbox"><label for="vbsl_s16"><div>int16x4_t <b><b>vbsl_s16</b></b> (uint16x4_t a, int16x4_t b, int16x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_s16" type="checkbox"><label for="vbslq_s16"><div>int16x8_t <b><b>vbslq_s16</b></b> (uint16x8_t a, int16x8_t b, int16x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_s32" type="checkbox"><label for="vbsl_s32"><div>int32x2_t <b><b>vbsl_s32</b></b> (uint32x2_t a, int32x2_t b, int32x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_s32" type="checkbox"><label for="vbslq_s32"><div>int32x4_t <b><b>vbslq_s32</b></b> (uint32x4_t a, int32x4_t b, int32x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_s64" type="checkbox"><label for="vbsl_s64"><div>int64x1_t <b><b>vbsl_s64</b></b> (uint64x1_t a, int64x1_t b, int64x1_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_s64" type="checkbox"><label for="vbslq_s64"><div>int64x2_t <b><b>vbslq_s64</b></b> (uint64x2_t a, int64x2_t b, int64x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_u8" type="checkbox"><label for="vbsl_u8"><div>uint8x8_t <b><b>vbsl_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_u8" type="checkbox"><label for="vbslq_u8"><div>uint8x16_t <b><b>vbslq_u8</b></b> (uint8x16_t a, uint8x16_t b, uint8x16_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_u16" type="checkbox"><label for="vbsl_u16"><div>uint16x4_t <b><b>vbsl_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_u16" type="checkbox"><label for="vbslq_u16"><div>uint16x8_t <b><b>vbslq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_u32" type="checkbox"><label for="vbsl_u32"><div>uint32x2_t <b><b>vbsl_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_u32" type="checkbox"><label for="vbslq_u32"><div>uint32x4_t <b><b>vbslq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_u64" type="checkbox"><label for="vbsl_u64"><div>uint64x1_t <b><b>vbsl_u64</b></b> (uint64x1_t a, uint64x1_t b, uint64x1_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_u64" type="checkbox"><label for="vbslq_u64"><div>uint64x2_t <b><b>vbslq_u64</b></b> (uint64x2_t a, uint64x2_t b, uint64x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_p64" type="checkbox"><label for="vbsl_p64"><div>poly64x1_t <b><b>vbsl_p64</b></b> (poly64x1_t a, poly64x1_t b, poly64x1_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_p64" type="checkbox"><label for="vbslq_p64"><div>poly64x2_t <b><b>vbslq_p64</b></b> (poly64x2_t a, poly64x2_t b, poly64x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_f32" type="checkbox"><label for="vbsl_f32"><div>float32x2_t <b><b>vbsl_f32</b></b> (uint32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_f32" type="checkbox"><label for="vbslq_f32"><div>float32x4_t <b><b>vbslq_f32</b></b> (uint32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_p8" type="checkbox"><label for="vbsl_p8"><div>poly8x8_t <b><b>vbsl_p8</b></b> (uint8x8_t a, poly8x8_t b, poly8x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_p8" type="checkbox"><label for="vbslq_p8"><div>poly8x16_t <b><b>vbslq_p8</b></b> (uint8x16_t a, poly8x16_t b, poly8x16_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_p16" type="checkbox"><label for="vbsl_p16"><div>poly16x4_t <b><b>vbsl_p16</b></b> (uint16x4_t a, poly16x4_t b, poly16x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_p16" type="checkbox"><label for="vbslq_p16"><div>poly16x8_t <b><b>vbslq_p16</b></b> (uint16x8_t a, poly16x8_t b, poly16x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_f64" type="checkbox"><label for="vbsl_f64"><div>float64x1_t <b><b>vbsl_f64</b></b> (uint64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-b &rarr; Vn.8B <br />
-c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_f64" type="checkbox"><label for="vbslq_f64"><div>float64x2_t <b><b>vbslq_f64</b></b> (uint64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-b &rarr; Vn.16B <br />
-c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_s8" type="checkbox"><label for="vcopy_lane_s8"><div>int8x8_t <b><b>vcopy_lane_s8</b></b> (int8x8_t a, const int lane1, int8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-0 &lt;&lt; lane1 &lt;&lt; 7 <br />
-b &rarr; Vn.8B <br />
-0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_s8" type="checkbox"><label for="vcopyq_lane_s8"><div>int8x16_t <b><b>vcopyq_lane_s8</b></b> (int8x16_t a, const int lane1, int8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-0 &lt;&lt; lane1 &lt;&lt; 15 <br />
-b &rarr; Vn.8B <br />
-0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_s16" type="checkbox"><label for="vcopy_lane_s16"><div>int16x4_t <b><b>vcopy_lane_s16</b></b> (int16x4_t a, const int lane1, int16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-0 &lt;&lt; lane1 &lt;&lt; 3 <br />
-b &rarr; Vn.4H <br />
-0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_s16" type="checkbox"><label for="vcopyq_lane_s16"><div>int16x8_t <b><b>vcopyq_lane_s16</b></b> (int16x8_t a, const int lane1, int16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-0 &lt;&lt; lane1 &lt;&lt; 7 <br />
-b &rarr; Vn.4H <br />
-0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_s32" type="checkbox"><label for="vcopy_lane_s32"><div>int32x2_t <b><b>vcopy_lane_s32</b></b> (int32x2_t a, const int lane1, int32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.2S <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_s32" type="checkbox"><label for="vcopyq_lane_s32"><div>int32x4_t <b><b>vcopyq_lane_s32</b></b> (int32x4_t a, const int lane1, int32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-0 &lt;&lt; lane1 &lt;&lt; 3 <br />
-b &rarr; Vn.2S <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_s64" type="checkbox"><label for="vcopy_lane_s64"><div>int64x1_t <b><b>vcopy_lane_s64</b></b> (int64x1_t a, const int lane1, int64x1_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
-0 &lt;&lt; lane1 &lt;&lt; 0 <br />
-b &rarr; Vn.1D <br />
-0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_s64" type="checkbox"><label for="vcopyq_lane_s64"><div>int64x2_t <b><b>vcopyq_lane_s64</b></b> (int64x2_t a, const int lane1, int64x1_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.1D <br />
-0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_u8" type="checkbox"><label for="vcopy_lane_u8"><div>uint8x8_t <b><b>vcopy_lane_u8</b></b> (uint8x8_t a, const int lane1, uint8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-0 &lt;&lt; lane1 &lt;&lt; 7 <br />
-b &rarr; Vn.8B <br />
-0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_u8" type="checkbox"><label for="vcopyq_lane_u8"><div>uint8x16_t <b><b>vcopyq_lane_u8</b></b> (uint8x16_t a, const int lane1, uint8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-0 &lt;&lt; lane1 &lt;&lt; 15 <br />
-b &rarr; Vn.8B <br />
-0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_u16" type="checkbox"><label for="vcopy_lane_u16"><div>uint16x4_t <b><b>vcopy_lane_u16</b></b> (uint16x4_t a, const int lane1, uint16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-0 &lt;&lt; lane1 &lt;&lt; 3 <br />
-b &rarr; Vn.4H <br />
-0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_u16" type="checkbox"><label for="vcopyq_lane_u16"><div>uint16x8_t <b><b>vcopyq_lane_u16</b></b> (uint16x8_t a, const int lane1, uint16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-0 &lt;&lt; lane1 &lt;&lt; 7 <br />
-b &rarr; Vn.4H <br />
-0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_u32" type="checkbox"><label for="vcopy_lane_u32"><div>uint32x2_t <b><b>vcopy_lane_u32</b></b> (uint32x2_t a, const int lane1, uint32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.2S <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_u32" type="checkbox"><label for="vcopyq_lane_u32"><div>uint32x4_t <b><b>vcopyq_lane_u32</b></b> (uint32x4_t a, const int lane1, uint32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-0 &lt;&lt; lane1 &lt;&lt; 3 <br />
-b &rarr; Vn.2S <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_u64" type="checkbox"><label for="vcopy_lane_u64"><div>uint64x1_t <b><b>vcopy_lane_u64</b></b> (uint64x1_t a, const int lane1, uint64x1_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
-0 &lt;&lt; lane1 &lt;&lt; 0 <br />
-b &rarr; Vn.1D <br />
-0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_u64" type="checkbox"><label for="vcopyq_lane_u64"><div>uint64x2_t <b><b>vcopyq_lane_u64</b></b> (uint64x2_t a, const int lane1, uint64x1_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.1D <br />
-0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_p64" type="checkbox"><label for="vcopy_lane_p64"><div>poly64x1_t <b><b>vcopy_lane_p64</b></b> (poly64x1_t a, const int lane1, poly64x1_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
-0 &lt;&lt; lane1 &lt;&lt; 0 <br />
-b &rarr; Vn.1D <br />
-0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_p64" type="checkbox"><label for="vcopyq_lane_p64"><div>poly64x2_t <b><b>vcopyq_lane_p64</b></b> (poly64x2_t a, const int lane1, poly64x1_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.1D <br />
-0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_f32" type="checkbox"><label for="vcopy_lane_f32"><div>float32x2_t <b><b>vcopy_lane_f32</b></b> (float32x2_t a, const int lane1, float32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.2S <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_f32" type="checkbox"><label for="vcopyq_lane_f32"><div>float32x4_t <b><b>vcopyq_lane_f32</b></b> (float32x4_t a, const int lane1, float32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-0 &lt;&lt; lane1 &lt;&lt; 3 <br />
-b &rarr; Vn.2S <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_f64" type="checkbox"><label for="vcopy_lane_f64"><div>float64x1_t <b><b>vcopy_lane_f64</b></b> (float64x1_t a, const int lane1, float64x1_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
-0 &lt;&lt; lane1 &lt;&lt; 0 <br />
-b &rarr; Vn.1D <br />
-0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_f64" type="checkbox"><label for="vcopyq_lane_f64"><div>float64x2_t <b><b>vcopyq_lane_f64</b></b> (float64x2_t a, const int lane1, float64x1_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.1D <br />
-0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_p8" type="checkbox"><label for="vcopy_lane_p8"><div>poly8x8_t <b><b>vcopy_lane_p8</b></b> (poly8x8_t a, const int lane1, poly8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-0 &lt;&lt; lane1 &lt;&lt; 7 <br />
-b &rarr; Vn.8B <br />
-0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_p8" type="checkbox"><label for="vcopyq_lane_p8"><div>poly8x16_t <b><b>vcopyq_lane_p8</b></b> (poly8x16_t a, const int lane1, poly8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-0 &lt;&lt; lane1 &lt;&lt; 15 <br />
-b &rarr; Vn.8B <br />
-0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_p16" type="checkbox"><label for="vcopy_lane_p16"><div>poly16x4_t <b><b>vcopy_lane_p16</b></b> (poly16x4_t a, const int lane1, poly16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-0 &lt;&lt; lane1 &lt;&lt; 3 <br />
-b &rarr; Vn.4H <br />
-0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_p16" type="checkbox"><label for="vcopyq_lane_p16"><div>poly16x8_t <b><b>vcopyq_lane_p16</b></b> (poly16x8_t a, const int lane1, poly16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-0 &lt;&lt; lane1 &lt;&lt; 7 <br />
-b &rarr; Vn.4H <br />
-0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_s8" type="checkbox"><label for="vcopy_laneq_s8"><div>int8x8_t <b><b>vcopy_laneq_s8</b></b> (int8x8_t a, const int lane1, int8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-0 &lt;&lt; lane1 &lt;&lt; 7 <br />
-b &rarr; Vn.16B <br />
-0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_s8" type="checkbox"><label for="vcopyq_laneq_s8"><div>int8x16_t <b><b>vcopyq_laneq_s8</b></b> (int8x16_t a, const int lane1, int8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-0 &lt;&lt; lane1 &lt;&lt; 15 <br />
-b &rarr; Vn.16B <br />
-0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_s16" type="checkbox"><label for="vcopy_laneq_s16"><div>int16x4_t <b><b>vcopy_laneq_s16</b></b> (int16x4_t a, const int lane1, int16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-0 &lt;&lt; lane1 &lt;&lt; 3 <br />
-b &rarr; Vn.8H <br />
-0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_s16" type="checkbox"><label for="vcopyq_laneq_s16"><div>int16x8_t <b><b>vcopyq_laneq_s16</b></b> (int16x8_t a, const int lane1, int16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-0 &lt;&lt; lane1 &lt;&lt; 7 <br />
-b &rarr; Vn.8H <br />
-0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_s32" type="checkbox"><label for="vcopy_laneq_s32"><div>int32x2_t <b><b>vcopy_laneq_s32</b></b> (int32x2_t a, const int lane1, int32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.4S <br />
-0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_s32" type="checkbox"><label for="vcopyq_laneq_s32"><div>int32x4_t <b><b>vcopyq_laneq_s32</b></b> (int32x4_t a, const int lane1, int32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-0 &lt;&lt; lane1 &lt;&lt; 3 <br />
-b &rarr; Vn.4S <br />
-0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_s64" type="checkbox"><label for="vcopy_laneq_s64"><div>int64x1_t <b><b>vcopy_laneq_s64</b></b> (int64x1_t a, const int lane1, int64x2_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
-0 &lt;&lt; lane1 &lt;&lt; 0 <br />
-b &rarr; Vn.2D <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_s64" type="checkbox"><label for="vcopyq_laneq_s64"><div>int64x2_t <b><b>vcopyq_laneq_s64</b></b> (int64x2_t a, const int lane1, int64x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.2D <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_u8" type="checkbox"><label for="vcopy_laneq_u8"><div>uint8x8_t <b><b>vcopy_laneq_u8</b></b> (uint8x8_t a, const int lane1, uint8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-0 &lt;&lt; lane1 &lt;&lt; 7 <br />
-b &rarr; Vn.16B <br />
-0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_u8" type="checkbox"><label for="vcopyq_laneq_u8"><div>uint8x16_t <b><b>vcopyq_laneq_u8</b></b> (uint8x16_t a, const int lane1, uint8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-0 &lt;&lt; lane1 &lt;&lt; 15 <br />
-b &rarr; Vn.16B <br />
-0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_u16" type="checkbox"><label for="vcopy_laneq_u16"><div>uint16x4_t <b><b>vcopy_laneq_u16</b></b> (uint16x4_t a, const int lane1, uint16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-0 &lt;&lt; lane1 &lt;&lt; 3 <br />
-b &rarr; Vn.8H <br />
-0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_u16" type="checkbox"><label for="vcopyq_laneq_u16"><div>uint16x8_t <b><b>vcopyq_laneq_u16</b></b> (uint16x8_t a, const int lane1, uint16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-0 &lt;&lt; lane1 &lt;&lt; 7 <br />
-b &rarr; Vn.8H <br />
-0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_u32" type="checkbox"><label for="vcopy_laneq_u32"><div>uint32x2_t <b><b>vcopy_laneq_u32</b></b> (uint32x2_t a, const int lane1, uint32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.4S <br />
-0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_u32" type="checkbox"><label for="vcopyq_laneq_u32"><div>uint32x4_t <b><b>vcopyq_laneq_u32</b></b> (uint32x4_t a, const int lane1, uint32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-0 &lt;&lt; lane1 &lt;&lt; 3 <br />
-b &rarr; Vn.4S <br />
-0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_u64" type="checkbox"><label for="vcopy_laneq_u64"><div>uint64x1_t <b><b>vcopy_laneq_u64</b></b> (uint64x1_t a, const int lane1, uint64x2_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
-0 &lt;&lt; lane1 &lt;&lt; 0 <br />
-b &rarr; Vn.2D <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_u64" type="checkbox"><label for="vcopyq_laneq_u64"><div>uint64x2_t <b><b>vcopyq_laneq_u64</b></b> (uint64x2_t a, const int lane1, uint64x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.2D <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_p64" type="checkbox"><label for="vcopy_laneq_p64"><div>poly64x1_t <b><b>vcopy_laneq_p64</b></b> (poly64x1_t a, const int lane1, poly64x2_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
-0 &lt;&lt; lane1 &lt;&lt; 0 <br />
-b &rarr; Vn.2D <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_p64" type="checkbox"><label for="vcopyq_laneq_p64"><div>poly64x2_t <b><b>vcopyq_laneq_p64</b></b> (poly64x2_t a, const int lane1, poly64x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.2D <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_f32" type="checkbox"><label for="vcopy_laneq_f32"><div>float32x2_t <b><b>vcopy_laneq_f32</b></b> (float32x2_t a, const int lane1, float32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.4S <br />
-0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_f32" type="checkbox"><label for="vcopyq_laneq_f32"><div>float32x4_t <b><b>vcopyq_laneq_f32</b></b> (float32x4_t a, const int lane1, float32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-0 &lt;&lt; lane1 &lt;&lt; 3 <br />
-b &rarr; Vn.4S <br />
-0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_f64" type="checkbox"><label for="vcopy_laneq_f64"><div>float64x1_t <b><b>vcopy_laneq_f64</b></b> (float64x1_t a, const int lane1, float64x2_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
-0 &lt;&lt; lane1 &lt;&lt; 0 <br />
-b &rarr; Vn.2D <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_f64" type="checkbox"><label for="vcopyq_laneq_f64"><div>float64x2_t <b><b>vcopyq_laneq_f64</b></b> (float64x2_t a, const int lane1, float64x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-0 &lt;&lt; lane1 &lt;&lt; 1 <br />
-b &rarr; Vn.2D <br />
-0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_p8" type="checkbox"><label for="vcopy_laneq_p8"><div>poly8x8_t <b><b>vcopy_laneq_p8</b></b> (poly8x8_t a, const int lane1, poly8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-0 &lt;&lt; lane1 &lt;&lt; 7 <br />
-b &rarr; Vn.16B <br />
-0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_p8" type="checkbox"><label for="vcopyq_laneq_p8"><div>poly8x16_t <b><b>vcopyq_laneq_p8</b></b> (poly8x16_t a, const int lane1, poly8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-0 &lt;&lt; lane1 &lt;&lt; 15 <br />
-b &rarr; Vn.16B <br />
-0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_p16" type="checkbox"><label for="vcopy_laneq_p16"><div>poly16x4_t <b><b>vcopy_laneq_p16</b></b> (poly16x4_t a, const int lane1, poly16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-0 &lt;&lt; lane1 &lt;&lt; 3 <br />
-b &rarr; Vn.8H <br />
-0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_p16" type="checkbox"><label for="vcopyq_laneq_p16"><div>poly16x8_t <b><b>vcopyq_laneq_p16</b></b> (poly16x8_t a, const int lane1, poly16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-0 &lt;&lt; lane1 &lt;&lt; 7 <br />
-b &rarr; Vn.8H <br />
-0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbit_s8" type="checkbox"><label for="vrbit_s8"><div>int8x8_t <b><b>vrbit_s8</b></b> (int8x8_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-bits(esize) rev;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    for i = 0 to esize-1
-        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbitq_s8" type="checkbox"><label for="vrbitq_s8"><div>int8x16_t <b><b>vrbitq_s8</b></b> (int8x16_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-bits(esize) rev;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    for i = 0 to esize-1
-        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbit_u8" type="checkbox"><label for="vrbit_u8"><div>uint8x8_t <b><b>vrbit_u8</b></b> (uint8x8_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-bits(esize) rev;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    for i = 0 to esize-1
-        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbitq_u8" type="checkbox"><label for="vrbitq_u8"><div>uint8x16_t <b><b>vrbitq_u8</b></b> (uint8x16_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-bits(esize) rev;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    for i = 0 to esize-1
-        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbit_p8" type="checkbox"><label for="vrbit_p8"><div>poly8x8_t <b><b>vrbit_p8</b></b> (poly8x8_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-bits(esize) rev;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    for i = 0 to esize-1
-        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbitq_p8" type="checkbox"><label for="vrbitq_p8"><div>poly8x16_t <b><b>vrbitq_p8</b></b> (poly8x16_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-bits(esize) rev;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    for i = 0 to esize-1
-        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_s8" type="checkbox"><label for="vcreate_s8"><div>int8x8_t <b><b>vcreate_s8</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_s16" type="checkbox"><label for="vcreate_s16"><div>int16x4_t <b><b>vcreate_s16</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_s32" type="checkbox"><label for="vcreate_s32"><div>int32x2_t <b><b>vcreate_s32</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_s64" type="checkbox"><label for="vcreate_s64"><div>int64x1_t <b><b>vcreate_s64</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_u8" type="checkbox"><label for="vcreate_u8"><div>uint8x8_t <b><b>vcreate_u8</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_u16" type="checkbox"><label for="vcreate_u16"><div>uint16x4_t <b><b>vcreate_u16</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_u32" type="checkbox"><label for="vcreate_u32"><div>uint32x2_t <b><b>vcreate_u32</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_u64" type="checkbox"><label for="vcreate_u64"><div>uint64x1_t <b><b>vcreate_u64</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_p64" type="checkbox"><label for="vcreate_p64"><div>poly64x1_t <b><b>vcreate_p64</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_f16" type="checkbox"><label for="vcreate_f16"><div>float16x4_t <b><b>vcreate_f16</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_f32" type="checkbox"><label for="vcreate_f32"><div>float32x2_t <b><b>vcreate_f32</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_p8" type="checkbox"><label for="vcreate_p8"><div>poly8x8_t <b><b>vcreate_p8</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_p16" type="checkbox"><label for="vcreate_p16"><div>poly16x4_t <b><b>vcreate_p16</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_f64" type="checkbox"><label for="vcreate_f64"><div>float64x1_t <b><b>vcreate_f64</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_s8" type="checkbox"><label for="vdup_n_s8"><div>int8x8_t <b><b>vdup_n_s8</b></b> (int8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_s8" type="checkbox"><label for="vdupq_n_s8"><div>int8x16_t <b><b>vdupq_n_s8</b></b> (int8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_s16" type="checkbox"><label for="vdup_n_s16"><div>int16x4_t <b><b>vdup_n_s16</b></b> (int16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_s16" type="checkbox"><label for="vdupq_n_s16"><div>int16x8_t <b><b>vdupq_n_s16</b></b> (int16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_s32" type="checkbox"><label for="vdup_n_s32"><div>int32x2_t <b><b>vdup_n_s32</b></b> (int32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_s32" type="checkbox"><label for="vdupq_n_s32"><div>int32x4_t <b><b>vdupq_n_s32</b></b> (int32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_s64" type="checkbox"><label for="vdup_n_s64"><div>int64x1_t <b><b>vdup_n_s64</b></b> (int64_t value)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Dd.D[0],xn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_s64" type="checkbox"><label for="vdupq_n_s64"><div>int64x2_t <b><b>vdupq_n_s64</b></b> (int64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_u8" type="checkbox"><label for="vdup_n_u8"><div>uint8x8_t <b><b>vdup_n_u8</b></b> (uint8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_u8" type="checkbox"><label for="vdupq_n_u8"><div>uint8x16_t <b><b>vdupq_n_u8</b></b> (uint8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_u16" type="checkbox"><label for="vdup_n_u16"><div>uint16x4_t <b><b>vdup_n_u16</b></b> (uint16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_u16" type="checkbox"><label for="vdupq_n_u16"><div>uint16x8_t <b><b>vdupq_n_u16</b></b> (uint16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_u32" type="checkbox"><label for="vdup_n_u32"><div>uint32x2_t <b><b>vdup_n_u32</b></b> (uint32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_u32" type="checkbox"><label for="vdupq_n_u32"><div>uint32x4_t <b><b>vdupq_n_u32</b></b> (uint32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_u64" type="checkbox"><label for="vdup_n_u64"><div>uint64x1_t <b><b>vdup_n_u64</b></b> (uint64_t value)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Dd.D[0],xn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_u64" type="checkbox"><label for="vdupq_n_u64"><div>uint64x2_t <b><b>vdupq_n_u64</b></b> (uint64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_p64" type="checkbox"><label for="vdup_n_p64"><div>poly64x1_t <b><b>vdup_n_p64</b></b> (poly64_t value)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Dd.D[0],xn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_p64" type="checkbox"><label for="vdupq_n_p64"><div>poly64x2_t <b><b>vdupq_n_p64</b></b> (poly64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_f32" type="checkbox"><label for="vdup_n_f32"><div>float32x2_t <b><b>vdup_n_f32</b></b> (float32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_f32" type="checkbox"><label for="vdupq_n_f32"><div>float32x4_t <b><b>vdupq_n_f32</b></b> (float32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_p8" type="checkbox"><label for="vdup_n_p8"><div>poly8x8_t <b><b>vdup_n_p8</b></b> (poly8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_p8" type="checkbox"><label for="vdupq_n_p8"><div>poly8x16_t <b><b>vdupq_n_p8</b></b> (poly8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_p16" type="checkbox"><label for="vdup_n_p16"><div>poly16x4_t <b><b>vdup_n_p16</b></b> (poly16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_p16" type="checkbox"><label for="vdupq_n_p16"><div>poly16x8_t <b><b>vdupq_n_p16</b></b> (poly16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_f64" type="checkbox"><label for="vdup_n_f64"><div>float64x1_t <b><b>vdup_n_f64</b></b> (float64_t value)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Dd.D[0],xn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_f64" type="checkbox"><label for="vdupq_n_f64"><div>float64x2_t <b><b>vdupq_n_f64</b></b> (float64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_s8" type="checkbox"><label for="vmov_n_s8"><div>int8x8_t <b><b>vmov_n_s8</b></b> (int8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_s8" type="checkbox"><label for="vmovq_n_s8"><div>int8x16_t <b><b>vmovq_n_s8</b></b> (int8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_s16" type="checkbox"><label for="vmov_n_s16"><div>int16x4_t <b><b>vmov_n_s16</b></b> (int16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_s16" type="checkbox"><label for="vmovq_n_s16"><div>int16x8_t <b><b>vmovq_n_s16</b></b> (int16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_s32" type="checkbox"><label for="vmov_n_s32"><div>int32x2_t <b><b>vmov_n_s32</b></b> (int32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_s32" type="checkbox"><label for="vmovq_n_s32"><div>int32x4_t <b><b>vmovq_n_s32</b></b> (int32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_s64" type="checkbox"><label for="vmov_n_s64"><div>int64x1_t <b><b>vmov_n_s64</b></b> (int64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_s64" type="checkbox"><label for="vmovq_n_s64"><div>int64x2_t <b><b>vmovq_n_s64</b></b> (int64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_u8" type="checkbox"><label for="vmov_n_u8"><div>uint8x8_t <b><b>vmov_n_u8</b></b> (uint8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_u8" type="checkbox"><label for="vmovq_n_u8"><div>uint8x16_t <b><b>vmovq_n_u8</b></b> (uint8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_u16" type="checkbox"><label for="vmov_n_u16"><div>uint16x4_t <b><b>vmov_n_u16</b></b> (uint16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_u16" type="checkbox"><label for="vmovq_n_u16"><div>uint16x8_t <b><b>vmovq_n_u16</b></b> (uint16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_u32" type="checkbox"><label for="vmov_n_u32"><div>uint32x2_t <b><b>vmov_n_u32</b></b> (uint32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_u32" type="checkbox"><label for="vmovq_n_u32"><div>uint32x4_t <b><b>vmovq_n_u32</b></b> (uint32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_u64" type="checkbox"><label for="vmov_n_u64"><div>uint64x1_t <b><b>vmov_n_u64</b></b> (uint64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_u64" type="checkbox"><label for="vmovq_n_u64"><div>uint64x2_t <b><b>vmovq_n_u64</b></b> (uint64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_f32" type="checkbox"><label for="vmov_n_f32"><div>float32x2_t <b><b>vmov_n_f32</b></b> (float32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_f32" type="checkbox"><label for="vmovq_n_f32"><div>float32x4_t <b><b>vmovq_n_f32</b></b> (float32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_p8" type="checkbox"><label for="vmov_n_p8"><div>poly8x8_t <b><b>vmov_n_p8</b></b> (poly8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_p8" type="checkbox"><label for="vmovq_n_p8"><div>poly8x16_t <b><b>vmovq_n_p8</b></b> (poly8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_p16" type="checkbox"><label for="vmov_n_p16"><div>poly16x4_t <b><b>vmov_n_p16</b></b> (poly16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_p16" type="checkbox"><label for="vmovq_n_p16"><div>poly16x8_t <b><b>vmovq_n_p16</b></b> (poly16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_f64" type="checkbox"><label for="vmov_n_f64"><div>float64x1_t <b><b>vmov_n_f64</b></b> (float64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_f64" type="checkbox"><label for="vmovq_n_f64"><div>float64x2_t <b><b>vmovq_n_f64</b></b> (float64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
-</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_s8" type="checkbox"><label for="vdup_lane_s8"><div>int8x8_t <b><b>vdup_lane_s8</b></b> (int8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_s8" type="checkbox"><label for="vdupq_lane_s8"><div>int8x16_t <b><b>vdupq_lane_s8</b></b> (int8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_s16" type="checkbox"><label for="vdup_lane_s16"><div>int16x4_t <b><b>vdup_lane_s16</b></b> (int16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_s16" type="checkbox"><label for="vdupq_lane_s16"><div>int16x8_t <b><b>vdupq_lane_s16</b></b> (int16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_s32" type="checkbox"><label for="vdup_lane_s32"><div>int32x2_t <b><b>vdup_lane_s32</b></b> (int32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_s32" type="checkbox"><label for="vdupq_lane_s32"><div>int32x4_t <b><b>vdupq_lane_s32</b></b> (int32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_s64" type="checkbox"><label for="vdup_lane_s64"><div>int64x1_t <b><b>vdup_lane_s64</b></b> (int64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_s64" type="checkbox"><label for="vdupq_lane_s64"><div>int64x2_t <b><b>vdupq_lane_s64</b></b> (int64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_u8" type="checkbox"><label for="vdup_lane_u8"><div>uint8x8_t <b><b>vdup_lane_u8</b></b> (uint8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_u8" type="checkbox"><label for="vdupq_lane_u8"><div>uint8x16_t <b><b>vdupq_lane_u8</b></b> (uint8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_u16" type="checkbox"><label for="vdup_lane_u16"><div>uint16x4_t <b><b>vdup_lane_u16</b></b> (uint16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_u16" type="checkbox"><label for="vdupq_lane_u16"><div>uint16x8_t <b><b>vdupq_lane_u16</b></b> (uint16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_u32" type="checkbox"><label for="vdup_lane_u32"><div>uint32x2_t <b><b>vdup_lane_u32</b></b> (uint32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_u32" type="checkbox"><label for="vdupq_lane_u32"><div>uint32x4_t <b><b>vdupq_lane_u32</b></b> (uint32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_u64" type="checkbox"><label for="vdup_lane_u64"><div>uint64x1_t <b><b>vdup_lane_u64</b></b> (uint64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_u64" type="checkbox"><label for="vdupq_lane_u64"><div>uint64x2_t <b><b>vdupq_lane_u64</b></b> (uint64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_p64" type="checkbox"><label for="vdup_lane_p64"><div>poly64x1_t <b><b>vdup_lane_p64</b></b> (poly64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_p64" type="checkbox"><label for="vdupq_lane_p64"><div>poly64x2_t <b><b>vdupq_lane_p64</b></b> (poly64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_f32" type="checkbox"><label for="vdup_lane_f32"><div>float32x2_t <b><b>vdup_lane_f32</b></b> (float32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_f32" type="checkbox"><label for="vdupq_lane_f32"><div>float32x4_t <b><b>vdupq_lane_f32</b></b> (float32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_p8" type="checkbox"><label for="vdup_lane_p8"><div>poly8x8_t <b><b>vdup_lane_p8</b></b> (poly8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_p8" type="checkbox"><label for="vdupq_lane_p8"><div>poly8x16_t <b><b>vdupq_lane_p8</b></b> (poly8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_p16" type="checkbox"><label for="vdup_lane_p16"><div>poly16x4_t <b><b>vdup_lane_p16</b></b> (poly16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_p16" type="checkbox"><label for="vdupq_lane_p16"><div>poly16x8_t <b><b>vdupq_lane_p16</b></b> (poly16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_f64" type="checkbox"><label for="vdup_lane_f64"><div>float64x1_t <b><b>vdup_lane_f64</b></b> (float64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_f64" type="checkbox"><label for="vdupq_lane_f64"><div>float64x2_t <b><b>vdupq_lane_f64</b></b> (float64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_s8" type="checkbox"><label for="vdup_laneq_s8"><div>int8x8_t <b><b>vdup_laneq_s8</b></b> (int8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_s8" type="checkbox"><label for="vdupq_laneq_s8"><div>int8x16_t <b><b>vdupq_laneq_s8</b></b> (int8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_s16" type="checkbox"><label for="vdup_laneq_s16"><div>int16x4_t <b><b>vdup_laneq_s16</b></b> (int16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_s16" type="checkbox"><label for="vdupq_laneq_s16"><div>int16x8_t <b><b>vdupq_laneq_s16</b></b> (int16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_s32" type="checkbox"><label for="vdup_laneq_s32"><div>int32x2_t <b><b>vdup_laneq_s32</b></b> (int32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_s32" type="checkbox"><label for="vdupq_laneq_s32"><div>int32x4_t <b><b>vdupq_laneq_s32</b></b> (int32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_s64" type="checkbox"><label for="vdup_laneq_s64"><div>int64x1_t <b><b>vdup_laneq_s64</b></b> (int64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_s64" type="checkbox"><label for="vdupq_laneq_s64"><div>int64x2_t <b><b>vdupq_laneq_s64</b></b> (int64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_u8" type="checkbox"><label for="vdup_laneq_u8"><div>uint8x8_t <b><b>vdup_laneq_u8</b></b> (uint8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_u8" type="checkbox"><label for="vdupq_laneq_u8"><div>uint8x16_t <b><b>vdupq_laneq_u8</b></b> (uint8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_u16" type="checkbox"><label for="vdup_laneq_u16"><div>uint16x4_t <b><b>vdup_laneq_u16</b></b> (uint16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_u16" type="checkbox"><label for="vdupq_laneq_u16"><div>uint16x8_t <b><b>vdupq_laneq_u16</b></b> (uint16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_u32" type="checkbox"><label for="vdup_laneq_u32"><div>uint32x2_t <b><b>vdup_laneq_u32</b></b> (uint32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_u32" type="checkbox"><label for="vdupq_laneq_u32"><div>uint32x4_t <b><b>vdupq_laneq_u32</b></b> (uint32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_u64" type="checkbox"><label for="vdup_laneq_u64"><div>uint64x1_t <b><b>vdup_laneq_u64</b></b> (uint64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_u64" type="checkbox"><label for="vdupq_laneq_u64"><div>uint64x2_t <b><b>vdupq_laneq_u64</b></b> (uint64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_p64" type="checkbox"><label for="vdup_laneq_p64"><div>poly64x1_t <b><b>vdup_laneq_p64</b></b> (poly64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_p64" type="checkbox"><label for="vdupq_laneq_p64"><div>poly64x2_t <b><b>vdupq_laneq_p64</b></b> (poly64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_f32" type="checkbox"><label for="vdup_laneq_f32"><div>float32x2_t <b><b>vdup_laneq_f32</b></b> (float32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_f32" type="checkbox"><label for="vdupq_laneq_f32"><div>float32x4_t <b><b>vdupq_laneq_f32</b></b> (float32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_p8" type="checkbox"><label for="vdup_laneq_p8"><div>poly8x8_t <b><b>vdup_laneq_p8</b></b> (poly8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_p8" type="checkbox"><label for="vdupq_laneq_p8"><div>poly8x16_t <b><b>vdupq_laneq_p8</b></b> (poly8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_p16" type="checkbox"><label for="vdup_laneq_p16"><div>poly16x4_t <b><b>vdup_laneq_p16</b></b> (poly16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_p16" type="checkbox"><label for="vdupq_laneq_p16"><div>poly16x8_t <b><b>vdupq_laneq_p16</b></b> (poly16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_f64" type="checkbox"><label for="vdup_laneq_f64"><div>float64x1_t <b><b>vdup_laneq_f64</b></b> (float64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_f64" type="checkbox"><label for="vdupq_laneq_f64"><div>float64x2_t <b><b>vdupq_laneq_f64</b></b> (float64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_s8" type="checkbox"><label for="vcombine_s8"><div>int8x16_t <b><b>vcombine_s8</b></b> (int8x8_t low, int8x8_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.8B <br />
-high &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_s16" type="checkbox"><label for="vcombine_s16"><div>int16x8_t <b><b>vcombine_s16</b></b> (int16x4_t low, int16x4_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.4H <br />
-high &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_s32" type="checkbox"><label for="vcombine_s32"><div>int32x4_t <b><b>vcombine_s32</b></b> (int32x2_t low, int32x2_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.2S <br />
-high &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_s64" type="checkbox"><label for="vcombine_s64"><div>int64x2_t <b><b>vcombine_s64</b></b> (int64x1_t low, int64x1_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.1D <br />
-high &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_u8" type="checkbox"><label for="vcombine_u8"><div>uint8x16_t <b><b>vcombine_u8</b></b> (uint8x8_t low, uint8x8_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.8B <br />
-high &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_u16" type="checkbox"><label for="vcombine_u16"><div>uint16x8_t <b><b>vcombine_u16</b></b> (uint16x4_t low, uint16x4_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.4H <br />
-high &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_u32" type="checkbox"><label for="vcombine_u32"><div>uint32x4_t <b><b>vcombine_u32</b></b> (uint32x2_t low, uint32x2_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.2S <br />
-high &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_u64" type="checkbox"><label for="vcombine_u64"><div>uint64x2_t <b><b>vcombine_u64</b></b> (uint64x1_t low, uint64x1_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.1D <br />
-high &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_p64" type="checkbox"><label for="vcombine_p64"><div>poly64x2_t <b><b>vcombine_p64</b></b> (poly64x1_t low, poly64x1_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.1D <br />
-high &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_f16" type="checkbox"><label for="vcombine_f16"><div>float16x8_t <b><b>vcombine_f16</b></b> (float16x4_t low, float16x4_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.4H <br />
-high &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_f32" type="checkbox"><label for="vcombine_f32"><div>float32x4_t <b><b>vcombine_f32</b></b> (float32x2_t low, float32x2_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.2S <br />
-high &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_p8" type="checkbox"><label for="vcombine_p8"><div>poly8x16_t <b><b>vcombine_p8</b></b> (poly8x8_t low, poly8x8_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.8B <br />
-high &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_p16" type="checkbox"><label for="vcombine_p16"><div>poly16x8_t <b><b>vcombine_p16</b></b> (poly16x4_t low, poly16x4_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.4H <br />
-high &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_f64" type="checkbox"><label for="vcombine_f64"><div>float64x2_t <b><b>vcombine_f64</b></b> (float64x1_t low, float64x1_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.1D <br />
-high &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_s8" type="checkbox"><label for="vget_high_s8"><div>int8x8_t <b><b>vget_high_s8</b></b> (int8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_s16" type="checkbox"><label for="vget_high_s16"><div>int16x4_t <b><b>vget_high_s16</b></b> (int16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_s32" type="checkbox"><label for="vget_high_s32"><div>int32x2_t <b><b>vget_high_s32</b></b> (int32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_s64" type="checkbox"><label for="vget_high_s64"><div>int64x1_t <b><b>vget_high_s64</b></b> (int64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_u8" type="checkbox"><label for="vget_high_u8"><div>uint8x8_t <b><b>vget_high_u8</b></b> (uint8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_u16" type="checkbox"><label for="vget_high_u16"><div>uint16x4_t <b><b>vget_high_u16</b></b> (uint16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_u32" type="checkbox"><label for="vget_high_u32"><div>uint32x2_t <b><b>vget_high_u32</b></b> (uint32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_u64" type="checkbox"><label for="vget_high_u64"><div>uint64x1_t <b><b>vget_high_u64</b></b> (uint64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_p64" type="checkbox"><label for="vget_high_p64"><div>poly64x1_t <b><b>vget_high_p64</b></b> (poly64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_f16" type="checkbox"><label for="vget_high_f16"><div>float16x4_t <b><b>vget_high_f16</b></b> (float16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_f32" type="checkbox"><label for="vget_high_f32"><div>float32x2_t <b><b>vget_high_f32</b></b> (float32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_p8" type="checkbox"><label for="vget_high_p8"><div>poly8x8_t <b><b>vget_high_p8</b></b> (poly8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_p16" type="checkbox"><label for="vget_high_p16"><div>poly16x4_t <b><b>vget_high_p16</b></b> (poly16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_f64" type="checkbox"><label for="vget_high_f64"><div>float64x1_t <b><b>vget_high_f64</b></b> (float64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_s8" type="checkbox"><label for="vget_low_s8"><div>int8x8_t <b><b>vget_low_s8</b></b> (int8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_s16" type="checkbox"><label for="vget_low_s16"><div>int16x4_t <b><b>vget_low_s16</b></b> (int16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_s32" type="checkbox"><label for="vget_low_s32"><div>int32x2_t <b><b>vget_low_s32</b></b> (int32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_s64" type="checkbox"><label for="vget_low_s64"><div>int64x1_t <b><b>vget_low_s64</b></b> (int64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_u8" type="checkbox"><label for="vget_low_u8"><div>uint8x8_t <b><b>vget_low_u8</b></b> (uint8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_u16" type="checkbox"><label for="vget_low_u16"><div>uint16x4_t <b><b>vget_low_u16</b></b> (uint16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_u32" type="checkbox"><label for="vget_low_u32"><div>uint32x2_t <b><b>vget_low_u32</b></b> (uint32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_u64" type="checkbox"><label for="vget_low_u64"><div>uint64x1_t <b><b>vget_low_u64</b></b> (uint64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_p64" type="checkbox"><label for="vget_low_p64"><div>poly64x1_t <b><b>vget_low_p64</b></b> (poly64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_f16" type="checkbox"><label for="vget_low_f16"><div>float16x4_t <b><b>vget_low_f16</b></b> (float16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_f32" type="checkbox"><label for="vget_low_f32"><div>float32x2_t <b><b>vget_low_f32</b></b> (float32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_p8" type="checkbox"><label for="vget_low_p8"><div>poly8x8_t <b><b>vget_low_p8</b></b> (poly8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_p16" type="checkbox"><label for="vget_low_p16"><div>poly16x4_t <b><b>vget_low_p16</b></b> (poly16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_f64" type="checkbox"><label for="vget_low_f64"><div>float64x1_t <b><b>vget_low_f64</b></b> (float64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_lane_s8" type="checkbox"><label for="vdupb_lane_s8"><div>int8_t <b><b>vdupb_lane_s8</b></b> (int8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_lane_s16" type="checkbox"><label for="vduph_lane_s16"><div>int16_t <b><b>vduph_lane_s16</b></b> (int16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_lane_s32" type="checkbox"><label for="vdups_lane_s32"><div>int32_t <b><b>vdups_lane_s32</b></b> (int32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_lane_s64" type="checkbox"><label for="vdupd_lane_s64"><div>int64_t <b><b>vdupd_lane_s64</b></b> (int64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_lane_u8" type="checkbox"><label for="vdupb_lane_u8"><div>uint8_t <b><b>vdupb_lane_u8</b></b> (uint8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_lane_u16" type="checkbox"><label for="vduph_lane_u16"><div>uint16_t <b><b>vduph_lane_u16</b></b> (uint16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_lane_u32" type="checkbox"><label for="vdups_lane_u32"><div>uint32_t <b><b>vdups_lane_u32</b></b> (uint32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_lane_u64" type="checkbox"><label for="vdupd_lane_u64"><div>uint64_t <b><b>vdupd_lane_u64</b></b> (uint64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_lane_f32" type="checkbox"><label for="vdups_lane_f32"><div>float32_t <b><b>vdups_lane_f32</b></b> (float32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_lane_f64" type="checkbox"><label for="vdupd_lane_f64"><div>float64_t <b><b>vdupd_lane_f64</b></b> (float64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_lane_p8" type="checkbox"><label for="vdupb_lane_p8"><div>poly8_t <b><b>vdupb_lane_p8</b></b> (poly8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_lane_p16" type="checkbox"><label for="vduph_lane_p16"><div>poly16_t <b><b>vduph_lane_p16</b></b> (poly16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_laneq_s8" type="checkbox"><label for="vdupb_laneq_s8"><div>int8_t <b><b>vdupb_laneq_s8</b></b> (int8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_laneq_s16" type="checkbox"><label for="vduph_laneq_s16"><div>int16_t <b><b>vduph_laneq_s16</b></b> (int16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_laneq_s32" type="checkbox"><label for="vdups_laneq_s32"><div>int32_t <b><b>vdups_laneq_s32</b></b> (int32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_laneq_s64" type="checkbox"><label for="vdupd_laneq_s64"><div>int64_t <b><b>vdupd_laneq_s64</b></b> (int64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_laneq_u8" type="checkbox"><label for="vdupb_laneq_u8"><div>uint8_t <b><b>vdupb_laneq_u8</b></b> (uint8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_laneq_u16" type="checkbox"><label for="vduph_laneq_u16"><div>uint16_t <b><b>vduph_laneq_u16</b></b> (uint16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_laneq_u32" type="checkbox"><label for="vdups_laneq_u32"><div>uint32_t <b><b>vdups_laneq_u32</b></b> (uint32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_laneq_u64" type="checkbox"><label for="vdupd_laneq_u64"><div>uint64_t <b><b>vdupd_laneq_u64</b></b> (uint64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_laneq_f32" type="checkbox"><label for="vdups_laneq_f32"><div>float32_t <b><b>vdups_laneq_f32</b></b> (float32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_laneq_f64" type="checkbox"><label for="vdupd_laneq_f64"><div>float64_t <b><b>vdupd_laneq_f64</b></b> (float64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_laneq_p8" type="checkbox"><label for="vdupb_laneq_p8"><div>poly8_t <b><b>vdupb_laneq_p8</b></b> (poly8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_laneq_p16" type="checkbox"><label for="vduph_laneq_p16"><div>poly16_t <b><b>vduph_laneq_p16</b></b> (poly16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s8" type="checkbox"><label for="vld1_s8"><div>int8x8_t <b><b>vld1_s8</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s8" type="checkbox"><label for="vld1q_s8"><div>int8x16_t <b><b>vld1q_s8</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s16" type="checkbox"><label for="vld1_s16"><div>int16x4_t <b><b>vld1_s16</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s16" type="checkbox"><label for="vld1q_s16"><div>int16x8_t <b><b>vld1q_s16</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s32" type="checkbox"><label for="vld1_s32"><div>int32x2_t <b><b>vld1_s32</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s32" type="checkbox"><label for="vld1q_s32"><div>int32x4_t <b><b>vld1q_s32</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s64" type="checkbox"><label for="vld1_s64"><div>int64x1_t <b><b>vld1_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s64" type="checkbox"><label for="vld1q_s64"><div>int64x2_t <b><b>vld1q_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u8" type="checkbox"><label for="vld1_u8"><div>uint8x8_t <b><b>vld1_u8</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u8" type="checkbox"><label for="vld1q_u8"><div>uint8x16_t <b><b>vld1q_u8</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u16" type="checkbox"><label for="vld1_u16"><div>uint16x4_t <b><b>vld1_u16</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u16" type="checkbox"><label for="vld1q_u16"><div>uint16x8_t <b><b>vld1q_u16</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u32" type="checkbox"><label for="vld1_u32"><div>uint32x2_t <b><b>vld1_u32</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u32" type="checkbox"><label for="vld1q_u32"><div>uint32x4_t <b><b>vld1q_u32</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u64" type="checkbox"><label for="vld1_u64"><div>uint64x1_t <b><b>vld1_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u64" type="checkbox"><label for="vld1q_u64"><div>uint64x2_t <b><b>vld1q_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p64" type="checkbox"><label for="vld1_p64"><div>poly64x1_t <b><b>vld1_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p64" type="checkbox"><label for="vld1q_p64"><div>poly64x2_t <b><b>vld1q_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f16" type="checkbox"><label for="vld1_f16"><div>float16x4_t <b><b>vld1_f16</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f16" type="checkbox"><label for="vld1q_f16"><div>float16x8_t <b><b>vld1q_f16</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f32" type="checkbox"><label for="vld1_f32"><div>float32x2_t <b><b>vld1_f32</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f32" type="checkbox"><label for="vld1q_f32"><div>float32x4_t <b><b>vld1q_f32</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p8" type="checkbox"><label for="vld1_p8"><div>poly8x8_t <b><b>vld1_p8</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p8" type="checkbox"><label for="vld1q_p8"><div>poly8x16_t <b><b>vld1q_p8</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p16" type="checkbox"><label for="vld1_p16"><div>poly16x4_t <b><b>vld1_p16</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p16" type="checkbox"><label for="vld1q_p16"><div>poly16x8_t <b><b>vld1q_p16</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f64" type="checkbox"><label for="vld1_f64"><div>float64x1_t <b><b>vld1_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f64" type="checkbox"><label for="vld1q_f64"><div>float64x2_t <b><b>vld1q_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_s8" type="checkbox"><label for="vld1_lane_s8"><div>int8x8_t <b><b>vld1_lane_s8</b></b> (int8_t const * ptr, int8x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_s8" type="checkbox"><label for="vld1q_lane_s8"><div>int8x16_t <b><b>vld1q_lane_s8</b></b> (int8_t const * ptr, int8x16_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_s16" type="checkbox"><label for="vld1_lane_s16"><div>int16x4_t <b><b>vld1_lane_s16</b></b> (int16_t const * ptr, int16x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_s16" type="checkbox"><label for="vld1q_lane_s16"><div>int16x8_t <b><b>vld1q_lane_s16</b></b> (int16_t const * ptr, int16x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_s32" type="checkbox"><label for="vld1_lane_s32"><div>int32x2_t <b><b>vld1_lane_s32</b></b> (int32_t const * ptr, int32x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_s32" type="checkbox"><label for="vld1q_lane_s32"><div>int32x4_t <b><b>vld1q_lane_s32</b></b> (int32_t const * ptr, int32x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_s64" type="checkbox"><label for="vld1_lane_s64"><div>int64x1_t <b><b>vld1_lane_s64</b></b> (int64_t const * ptr, int64x1_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_s64" type="checkbox"><label for="vld1q_lane_s64"><div>int64x2_t <b><b>vld1q_lane_s64</b></b> (int64_t const * ptr, int64x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_u8" type="checkbox"><label for="vld1_lane_u8"><div>uint8x8_t <b><b>vld1_lane_u8</b></b> (uint8_t const * ptr, uint8x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.B}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_u8" type="checkbox"><label for="vld1q_lane_u8"><div>uint8x16_t <b><b>vld1q_lane_u8</b></b> (uint8_t const * ptr, uint8x16_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.B}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_u16" type="checkbox"><label for="vld1_lane_u16"><div>uint16x4_t <b><b>vld1_lane_u16</b></b> (uint16_t const * ptr, uint16x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_u16" type="checkbox"><label for="vld1q_lane_u16"><div>uint16x8_t <b><b>vld1q_lane_u16</b></b> (uint16_t const * ptr, uint16x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_u32" type="checkbox"><label for="vld1_lane_u32"><div>uint32x2_t <b><b>vld1_lane_u32</b></b> (uint32_t const * ptr, uint32x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_u32" type="checkbox"><label for="vld1q_lane_u32"><div>uint32x4_t <b><b>vld1q_lane_u32</b></b> (uint32_t const * ptr, uint32x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_u64" type="checkbox"><label for="vld1_lane_u64"><div>uint64x1_t <b><b>vld1_lane_u64</b></b> (uint64_t const * ptr, uint64x1_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_u64" type="checkbox"><label for="vld1q_lane_u64"><div>uint64x2_t <b><b>vld1q_lane_u64</b></b> (uint64_t const * ptr, uint64x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_p64" type="checkbox"><label for="vld1_lane_p64"><div>poly64x1_t <b><b>vld1_lane_p64</b></b> (poly64_t const * ptr, poly64x1_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_p64" type="checkbox"><label for="vld1q_lane_p64"><div>poly64x2_t <b><b>vld1q_lane_p64</b></b> (poly64_t const * ptr, poly64x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_f16" type="checkbox"><label for="vld1_lane_f16"><div>float16x4_t <b><b>vld1_lane_f16</b></b> (float16_t const * ptr, float16x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_f16" type="checkbox"><label for="vld1q_lane_f16"><div>float16x8_t <b><b>vld1q_lane_f16</b></b> (float16_t const * ptr, float16x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_f32" type="checkbox"><label for="vld1_lane_f32"><div>float32x2_t <b><b>vld1_lane_f32</b></b> (float32_t const * ptr, float32x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_f32" type="checkbox"><label for="vld1q_lane_f32"><div>float32x4_t <b><b>vld1q_lane_f32</b></b> (float32_t const * ptr, float32x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_p8" type="checkbox"><label for="vld1_lane_p8"><div>poly8x8_t <b><b>vld1_lane_p8</b></b> (poly8_t const * ptr, poly8x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.B}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_p8" type="checkbox"><label for="vld1q_lane_p8"><div>poly8x16_t <b><b>vld1q_lane_p8</b></b> (poly8_t const * ptr, poly8x16_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.B}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_p16" type="checkbox"><label for="vld1_lane_p16"><div>poly16x4_t <b><b>vld1_lane_p16</b></b> (poly16_t const * ptr, poly16x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_p16" type="checkbox"><label for="vld1q_lane_p16"><div>poly16x8_t <b><b>vld1q_lane_p16</b></b> (poly16_t const * ptr, poly16x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_f64" type="checkbox"><label for="vld1_lane_f64"><div>float64x1_t <b><b>vld1_lane_f64</b></b> (float64_t const * ptr, float64x1_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_f64" type="checkbox"><label for="vld1q_lane_f64"><div>float64x2_t <b><b>vld1q_lane_f64</b></b> (float64_t const * ptr, float64x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_s8" type="checkbox"><label for="vld1_dup_s8"><div>int8x8_t <b><b>vld1_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_s8" type="checkbox"><label for="vld1q_dup_s8"><div>int8x16_t <b><b>vld1q_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_s16" type="checkbox"><label for="vld1_dup_s16"><div>int16x4_t <b><b>vld1_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_s16" type="checkbox"><label for="vld1q_dup_s16"><div>int16x8_t <b><b>vld1q_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_s32" type="checkbox"><label for="vld1_dup_s32"><div>int32x2_t <b><b>vld1_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_s32" type="checkbox"><label for="vld1q_dup_s32"><div>int32x4_t <b><b>vld1q_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_s64" type="checkbox"><label for="vld1_dup_s64"><div>int64x1_t <b><b>vld1_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_s64" type="checkbox"><label for="vld1q_dup_s64"><div>int64x2_t <b><b>vld1q_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_u8" type="checkbox"><label for="vld1_dup_u8"><div>uint8x8_t <b><b>vld1_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_u8" type="checkbox"><label for="vld1q_dup_u8"><div>uint8x16_t <b><b>vld1q_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_u16" type="checkbox"><label for="vld1_dup_u16"><div>uint16x4_t <b><b>vld1_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_u16" type="checkbox"><label for="vld1q_dup_u16"><div>uint16x8_t <b><b>vld1q_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_u32" type="checkbox"><label for="vld1_dup_u32"><div>uint32x2_t <b><b>vld1_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_u32" type="checkbox"><label for="vld1q_dup_u32"><div>uint32x4_t <b><b>vld1q_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_u64" type="checkbox"><label for="vld1_dup_u64"><div>uint64x1_t <b><b>vld1_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_u64" type="checkbox"><label for="vld1q_dup_u64"><div>uint64x2_t <b><b>vld1q_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_p64" type="checkbox"><label for="vld1_dup_p64"><div>poly64x1_t <b><b>vld1_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_p64" type="checkbox"><label for="vld1q_dup_p64"><div>poly64x2_t <b><b>vld1q_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_f16" type="checkbox"><label for="vld1_dup_f16"><div>float16x4_t <b><b>vld1_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_f16" type="checkbox"><label for="vld1q_dup_f16"><div>float16x8_t <b><b>vld1q_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_f32" type="checkbox"><label for="vld1_dup_f32"><div>float32x2_t <b><b>vld1_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_f32" type="checkbox"><label for="vld1q_dup_f32"><div>float32x4_t <b><b>vld1q_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_p8" type="checkbox"><label for="vld1_dup_p8"><div>poly8x8_t <b><b>vld1_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_p8" type="checkbox"><label for="vld1q_dup_p8"><div>poly8x16_t <b><b>vld1q_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_p16" type="checkbox"><label for="vld1_dup_p16"><div>poly16x4_t <b><b>vld1_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_p16" type="checkbox"><label for="vld1q_dup_p16"><div>poly16x8_t <b><b>vld1q_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_f64" type="checkbox"><label for="vld1_dup_f64"><div>float64x1_t <b><b>vld1_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_f64" type="checkbox"><label for="vld1q_dup_f64"><div>float64x2_t <b><b>vld1q_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s8" type="checkbox"><label for="vst1_s8"><div>void <b><b>vst1_s8</b></b> (int8_t * ptr, int8x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s8" type="checkbox"><label for="vst1q_s8"><div>void <b><b>vst1q_s8</b></b> (int8_t * ptr, int8x16_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s16" type="checkbox"><label for="vst1_s16"><div>void <b><b>vst1_s16</b></b> (int16_t * ptr, int16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s16" type="checkbox"><label for="vst1q_s16"><div>void <b><b>vst1q_s16</b></b> (int16_t * ptr, int16x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s32" type="checkbox"><label for="vst1_s32"><div>void <b><b>vst1_s32</b></b> (int32_t * ptr, int32x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s32" type="checkbox"><label for="vst1q_s32"><div>void <b><b>vst1q_s32</b></b> (int32_t * ptr, int32x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s64" type="checkbox"><label for="vst1_s64"><div>void <b><b>vst1_s64</b></b> (int64_t * ptr, int64x1_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s64" type="checkbox"><label for="vst1q_s64"><div>void <b><b>vst1q_s64</b></b> (int64_t * ptr, int64x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u8" type="checkbox"><label for="vst1_u8"><div>void <b><b>vst1_u8</b></b> (uint8_t * ptr, uint8x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u8" type="checkbox"><label for="vst1q_u8"><div>void <b><b>vst1q_u8</b></b> (uint8_t * ptr, uint8x16_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u16" type="checkbox"><label for="vst1_u16"><div>void <b><b>vst1_u16</b></b> (uint16_t * ptr, uint16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u16" type="checkbox"><label for="vst1q_u16"><div>void <b><b>vst1q_u16</b></b> (uint16_t * ptr, uint16x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u32" type="checkbox"><label for="vst1_u32"><div>void <b><b>vst1_u32</b></b> (uint32_t * ptr, uint32x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u32" type="checkbox"><label for="vst1q_u32"><div>void <b><b>vst1q_u32</b></b> (uint32_t * ptr, uint32x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u64" type="checkbox"><label for="vst1_u64"><div>void <b><b>vst1_u64</b></b> (uint64_t * ptr, uint64x1_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u64" type="checkbox"><label for="vst1q_u64"><div>void <b><b>vst1q_u64</b></b> (uint64_t * ptr, uint64x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p64" type="checkbox"><label for="vst1_p64"><div>void <b><b>vst1_p64</b></b> (poly64_t * ptr, poly64x1_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p64" type="checkbox"><label for="vst1q_p64"><div>void <b><b>vst1q_p64</b></b> (poly64_t * ptr, poly64x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f16" type="checkbox"><label for="vst1_f16"><div>void <b><b>vst1_f16</b></b> (float16_t * ptr, float16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f16" type="checkbox"><label for="vst1q_f16"><div>void <b><b>vst1q_f16</b></b> (float16_t * ptr, float16x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f32" type="checkbox"><label for="vst1_f32"><div>void <b><b>vst1_f32</b></b> (float32_t * ptr, float32x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f32" type="checkbox"><label for="vst1q_f32"><div>void <b><b>vst1q_f32</b></b> (float32_t * ptr, float32x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p8" type="checkbox"><label for="vst1_p8"><div>void <b><b>vst1_p8</b></b> (poly8_t * ptr, poly8x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p8" type="checkbox"><label for="vst1q_p8"><div>void <b><b>vst1q_p8</b></b> (poly8_t * ptr, poly8x16_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p16" type="checkbox"><label for="vst1_p16"><div>void <b><b>vst1_p16</b></b> (poly16_t * ptr, poly16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p16" type="checkbox"><label for="vst1q_p16"><div>void <b><b>vst1q_p16</b></b> (poly16_t * ptr, poly16x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f64" type="checkbox"><label for="vst1_f64"><div>void <b><b>vst1_f64</b></b> (float64_t * ptr, float64x1_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f64" type="checkbox"><label for="vst1q_f64"><div>void <b><b>vst1q_f64</b></b> (float64_t * ptr, float64x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_s8" type="checkbox"><label for="vst1_lane_s8"><div>void <b><b>vst1_lane_s8</b></b> (int8_t * ptr, int8x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_s8" type="checkbox"><label for="vst1q_lane_s8"><div>void <b><b>vst1q_lane_s8</b></b> (int8_t * ptr, int8x16_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_s16" type="checkbox"><label for="vst1_lane_s16"><div>void <b><b>vst1_lane_s16</b></b> (int16_t * ptr, int16x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_s16" type="checkbox"><label for="vst1q_lane_s16"><div>void <b><b>vst1q_lane_s16</b></b> (int16_t * ptr, int16x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_s32" type="checkbox"><label for="vst1_lane_s32"><div>void <b><b>vst1_lane_s32</b></b> (int32_t * ptr, int32x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_s32" type="checkbox"><label for="vst1q_lane_s32"><div>void <b><b>vst1q_lane_s32</b></b> (int32_t * ptr, int32x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_s64" type="checkbox"><label for="vst1_lane_s64"><div>void <b><b>vst1_lane_s64</b></b> (int64_t * ptr, int64x1_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_s64" type="checkbox"><label for="vst1q_lane_s64"><div>void <b><b>vst1q_lane_s64</b></b> (int64_t * ptr, int64x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_u8" type="checkbox"><label for="vst1_lane_u8"><div>void <b><b>vst1_lane_u8</b></b> (uint8_t * ptr, uint8x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_u8" type="checkbox"><label for="vst1q_lane_u8"><div>void <b><b>vst1q_lane_u8</b></b> (uint8_t * ptr, uint8x16_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_u16" type="checkbox"><label for="vst1_lane_u16"><div>void <b><b>vst1_lane_u16</b></b> (uint16_t * ptr, uint16x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_u16" type="checkbox"><label for="vst1q_lane_u16"><div>void <b><b>vst1q_lane_u16</b></b> (uint16_t * ptr, uint16x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_u32" type="checkbox"><label for="vst1_lane_u32"><div>void <b><b>vst1_lane_u32</b></b> (uint32_t * ptr, uint32x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_u32" type="checkbox"><label for="vst1q_lane_u32"><div>void <b><b>vst1q_lane_u32</b></b> (uint32_t * ptr, uint32x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_u64" type="checkbox"><label for="vst1_lane_u64"><div>void <b><b>vst1_lane_u64</b></b> (uint64_t * ptr, uint64x1_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_u64" type="checkbox"><label for="vst1q_lane_u64"><div>void <b><b>vst1q_lane_u64</b></b> (uint64_t * ptr, uint64x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_p64" type="checkbox"><label for="vst1_lane_p64"><div>void <b><b>vst1_lane_p64</b></b> (poly64_t * ptr, poly64x1_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_p64" type="checkbox"><label for="vst1q_lane_p64"><div>void <b><b>vst1q_lane_p64</b></b> (poly64_t * ptr, poly64x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_f16" type="checkbox"><label for="vst1_lane_f16"><div>void <b><b>vst1_lane_f16</b></b> (float16_t * ptr, float16x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_f16" type="checkbox"><label for="vst1q_lane_f16"><div>void <b><b>vst1q_lane_f16</b></b> (float16_t * ptr, float16x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_f32" type="checkbox"><label for="vst1_lane_f32"><div>void <b><b>vst1_lane_f32</b></b> (float32_t * ptr, float32x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_f32" type="checkbox"><label for="vst1q_lane_f32"><div>void <b><b>vst1q_lane_f32</b></b> (float32_t * ptr, float32x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_p8" type="checkbox"><label for="vst1_lane_p8"><div>void <b><b>vst1_lane_p8</b></b> (poly8_t * ptr, poly8x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_p8" type="checkbox"><label for="vst1q_lane_p8"><div>void <b><b>vst1q_lane_p8</b></b> (poly8_t * ptr, poly8x16_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_p16" type="checkbox"><label for="vst1_lane_p16"><div>void <b><b>vst1_lane_p16</b></b> (poly16_t * ptr, poly16x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_p16" type="checkbox"><label for="vst1q_lane_p16"><div>void <b><b>vst1q_lane_p16</b></b> (poly16_t * ptr, poly16x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_f64" type="checkbox"><label for="vst1_lane_f64"><div>void <b><b>vst1_lane_f64</b></b> (float64_t * ptr, float64x1_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_f64" type="checkbox"><label for="vst1q_lane_f64"><div>void <b><b>vst1q_lane_f64</b></b> (float64_t * ptr, float64x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_s8" type="checkbox"><label for="vld2_s8"><div>int8x8x2_t <b><b>vld2_s8</b></b> (int8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_s8" type="checkbox"><label for="vld2q_s8"><div>int8x16x2_t <b><b>vld2q_s8</b></b> (int8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_s16" type="checkbox"><label for="vld2_s16"><div>int16x4x2_t <b><b>vld2_s16</b></b> (int16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_s16" type="checkbox"><label for="vld2q_s16"><div>int16x8x2_t <b><b>vld2q_s16</b></b> (int16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_s32" type="checkbox"><label for="vld2_s32"><div>int32x2x2_t <b><b>vld2_s32</b></b> (int32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_s32" type="checkbox"><label for="vld2q_s32"><div>int32x4x2_t <b><b>vld2q_s32</b></b> (int32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_u8" type="checkbox"><label for="vld2_u8"><div>uint8x8x2_t <b><b>vld2_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_u8" type="checkbox"><label for="vld2q_u8"><div>uint8x16x2_t <b><b>vld2q_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_u16" type="checkbox"><label for="vld2_u16"><div>uint16x4x2_t <b><b>vld2_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_u16" type="checkbox"><label for="vld2q_u16"><div>uint16x8x2_t <b><b>vld2q_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_u32" type="checkbox"><label for="vld2_u32"><div>uint32x2x2_t <b><b>vld2_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_u32" type="checkbox"><label for="vld2q_u32"><div>uint32x4x2_t <b><b>vld2q_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_f16" type="checkbox"><label for="vld2_f16"><div>float16x4x2_t <b><b>vld2_f16</b></b> (float16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_f16" type="checkbox"><label for="vld2q_f16"><div>float16x8x2_t <b><b>vld2q_f16</b></b> (float16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_f32" type="checkbox"><label for="vld2_f32"><div>float32x2x2_t <b><b>vld2_f32</b></b> (float32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_f32" type="checkbox"><label for="vld2q_f32"><div>float32x4x2_t <b><b>vld2q_f32</b></b> (float32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_p8" type="checkbox"><label for="vld2_p8"><div>poly8x8x2_t <b><b>vld2_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_p8" type="checkbox"><label for="vld2q_p8"><div>poly8x16x2_t <b><b>vld2q_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_p16" type="checkbox"><label for="vld2_p16"><div>poly16x4x2_t <b><b>vld2_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_p16" type="checkbox"><label for="vld2q_p16"><div>poly16x8x2_t <b><b>vld2q_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_s64" type="checkbox"><label for="vld2_s64"><div>int64x1x2_t <b><b>vld2_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_u64" type="checkbox"><label for="vld2_u64"><div>uint64x1x2_t <b><b>vld2_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_p64" type="checkbox"><label for="vld2_p64"><div>poly64x1x2_t <b><b>vld2_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_s64" type="checkbox"><label for="vld2q_s64"><div>int64x2x2_t <b><b>vld2q_s64</b></b> (int64_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_u64" type="checkbox"><label for="vld2q_u64"><div>uint64x2x2_t <b><b>vld2q_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_p64" type="checkbox"><label for="vld2q_p64"><div>poly64x2x2_t <b><b>vld2q_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_f64" type="checkbox"><label for="vld2_f64"><div>float64x1x2_t <b><b>vld2_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_f64" type="checkbox"><label for="vld2q_f64"><div>float64x2x2_t <b><b>vld2q_f64</b></b> (float64_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_s8" type="checkbox"><label for="vld3_s8"><div>int8x8x3_t <b><b>vld3_s8</b></b> (int8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_s8" type="checkbox"><label for="vld3q_s8"><div>int8x16x3_t <b><b>vld3q_s8</b></b> (int8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_s16" type="checkbox"><label for="vld3_s16"><div>int16x4x3_t <b><b>vld3_s16</b></b> (int16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_s16" type="checkbox"><label for="vld3q_s16"><div>int16x8x3_t <b><b>vld3q_s16</b></b> (int16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_s32" type="checkbox"><label for="vld3_s32"><div>int32x2x3_t <b><b>vld3_s32</b></b> (int32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_s32" type="checkbox"><label for="vld3q_s32"><div>int32x4x3_t <b><b>vld3q_s32</b></b> (int32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_u8" type="checkbox"><label for="vld3_u8"><div>uint8x8x3_t <b><b>vld3_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_u8" type="checkbox"><label for="vld3q_u8"><div>uint8x16x3_t <b><b>vld3q_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_u16" type="checkbox"><label for="vld3_u16"><div>uint16x4x3_t <b><b>vld3_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_u16" type="checkbox"><label for="vld3q_u16"><div>uint16x8x3_t <b><b>vld3q_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_u32" type="checkbox"><label for="vld3_u32"><div>uint32x2x3_t <b><b>vld3_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_u32" type="checkbox"><label for="vld3q_u32"><div>uint32x4x3_t <b><b>vld3q_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_f16" type="checkbox"><label for="vld3_f16"><div>float16x4x3_t <b><b>vld3_f16</b></b> (float16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_f16" type="checkbox"><label for="vld3q_f16"><div>float16x8x3_t <b><b>vld3q_f16</b></b> (float16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_f32" type="checkbox"><label for="vld3_f32"><div>float32x2x3_t <b><b>vld3_f32</b></b> (float32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_f32" type="checkbox"><label for="vld3q_f32"><div>float32x4x3_t <b><b>vld3q_f32</b></b> (float32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_p8" type="checkbox"><label for="vld3_p8"><div>poly8x8x3_t <b><b>vld3_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_p8" type="checkbox"><label for="vld3q_p8"><div>poly8x16x3_t <b><b>vld3q_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_p16" type="checkbox"><label for="vld3_p16"><div>poly16x4x3_t <b><b>vld3_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_p16" type="checkbox"><label for="vld3q_p16"><div>poly16x8x3_t <b><b>vld3q_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_s64" type="checkbox"><label for="vld3_s64"><div>int64x1x3_t <b><b>vld3_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_u64" type="checkbox"><label for="vld3_u64"><div>uint64x1x3_t <b><b>vld3_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_p64" type="checkbox"><label for="vld3_p64"><div>poly64x1x3_t <b><b>vld3_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_s64" type="checkbox"><label for="vld3q_s64"><div>int64x2x3_t <b><b>vld3q_s64</b></b> (int64_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_u64" type="checkbox"><label for="vld3q_u64"><div>uint64x2x3_t <b><b>vld3q_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_p64" type="checkbox"><label for="vld3q_p64"><div>poly64x2x3_t <b><b>vld3q_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_f64" type="checkbox"><label for="vld3_f64"><div>float64x1x3_t <b><b>vld3_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_f64" type="checkbox"><label for="vld3q_f64"><div>float64x2x3_t <b><b>vld3q_f64</b></b> (float64_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_s8" type="checkbox"><label for="vld4_s8"><div>int8x8x4_t <b><b>vld4_s8</b></b> (int8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
-Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_s8" type="checkbox"><label for="vld4q_s8"><div>int8x16x4_t <b><b>vld4q_s8</b></b> (int8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
-Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_s16" type="checkbox"><label for="vld4_s16"><div>int16x4x4_t <b><b>vld4_s16</b></b> (int16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_s16" type="checkbox"><label for="vld4q_s16"><div>int16x8x4_t <b><b>vld4q_s16</b></b> (int16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_s32" type="checkbox"><label for="vld4_s32"><div>int32x2x4_t <b><b>vld4_s32</b></b> (int32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
-Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_s32" type="checkbox"><label for="vld4q_s32"><div>int32x4x4_t <b><b>vld4q_s32</b></b> (int32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
-Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_u8" type="checkbox"><label for="vld4_u8"><div>uint8x8x4_t <b><b>vld4_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
-Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_u8" type="checkbox"><label for="vld4q_u8"><div>uint8x16x4_t <b><b>vld4q_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
-Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_u16" type="checkbox"><label for="vld4_u16"><div>uint16x4x4_t <b><b>vld4_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_u16" type="checkbox"><label for="vld4q_u16"><div>uint16x8x4_t <b><b>vld4q_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_u32" type="checkbox"><label for="vld4_u32"><div>uint32x2x4_t <b><b>vld4_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
-Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_u32" type="checkbox"><label for="vld4q_u32"><div>uint32x4x4_t <b><b>vld4q_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
-Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_f16" type="checkbox"><label for="vld4_f16"><div>float16x4x4_t <b><b>vld4_f16</b></b> (float16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_f16" type="checkbox"><label for="vld4q_f16"><div>float16x8x4_t <b><b>vld4q_f16</b></b> (float16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_f32" type="checkbox"><label for="vld4_f32"><div>float32x2x4_t <b><b>vld4_f32</b></b> (float32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
-Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_f32" type="checkbox"><label for="vld4q_f32"><div>float32x4x4_t <b><b>vld4q_f32</b></b> (float32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
-Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_p8" type="checkbox"><label for="vld4_p8"><div>poly8x8x4_t <b><b>vld4_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
-Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_p8" type="checkbox"><label for="vld4q_p8"><div>poly8x16x4_t <b><b>vld4q_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
-Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_p16" type="checkbox"><label for="vld4_p16"><div>poly16x4x4_t <b><b>vld4_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_p16" type="checkbox"><label for="vld4q_p16"><div>poly16x8x4_t <b><b>vld4q_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_s64" type="checkbox"><label for="vld4_s64"><div>int64x1x4_t <b><b>vld4_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_u64" type="checkbox"><label for="vld4_u64"><div>uint64x1x4_t <b><b>vld4_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_p64" type="checkbox"><label for="vld4_p64"><div>poly64x1x4_t <b><b>vld4_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_s64" type="checkbox"><label for="vld4q_s64"><div>int64x2x4_t <b><b>vld4q_s64</b></b> (int64_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_u64" type="checkbox"><label for="vld4q_u64"><div>uint64x2x4_t <b><b>vld4q_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_p64" type="checkbox"><label for="vld4q_p64"><div>poly64x2x4_t <b><b>vld4q_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_f64" type="checkbox"><label for="vld4_f64"><div>float64x1x4_t <b><b>vld4_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_f64" type="checkbox"><label for="vld4q_f64"><div>float64x2x4_t <b><b>vld4q_f64</b></b> (float64_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_s8" type="checkbox"><label for="vld2_dup_s8"><div>int8x8x2_t <b><b>vld2_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_s8" type="checkbox"><label for="vld2q_dup_s8"><div>int8x16x2_t <b><b>vld2q_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_s16" type="checkbox"><label for="vld2_dup_s16"><div>int16x4x2_t <b><b>vld2_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_s16" type="checkbox"><label for="vld2q_dup_s16"><div>int16x8x2_t <b><b>vld2q_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_s32" type="checkbox"><label for="vld2_dup_s32"><div>int32x2x2_t <b><b>vld2_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_s32" type="checkbox"><label for="vld2q_dup_s32"><div>int32x4x2_t <b><b>vld2q_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_u8" type="checkbox"><label for="vld2_dup_u8"><div>uint8x8x2_t <b><b>vld2_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_u8" type="checkbox"><label for="vld2q_dup_u8"><div>uint8x16x2_t <b><b>vld2q_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_u16" type="checkbox"><label for="vld2_dup_u16"><div>uint16x4x2_t <b><b>vld2_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_u16" type="checkbox"><label for="vld2q_dup_u16"><div>uint16x8x2_t <b><b>vld2q_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_u32" type="checkbox"><label for="vld2_dup_u32"><div>uint32x2x2_t <b><b>vld2_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_u32" type="checkbox"><label for="vld2q_dup_u32"><div>uint32x4x2_t <b><b>vld2q_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_f16" type="checkbox"><label for="vld2_dup_f16"><div>float16x4x2_t <b><b>vld2_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_f16" type="checkbox"><label for="vld2q_dup_f16"><div>float16x8x2_t <b><b>vld2q_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_f32" type="checkbox"><label for="vld2_dup_f32"><div>float32x2x2_t <b><b>vld2_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_f32" type="checkbox"><label for="vld2q_dup_f32"><div>float32x4x2_t <b><b>vld2q_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_p8" type="checkbox"><label for="vld2_dup_p8"><div>poly8x8x2_t <b><b>vld2_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_p8" type="checkbox"><label for="vld2q_dup_p8"><div>poly8x16x2_t <b><b>vld2q_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_p16" type="checkbox"><label for="vld2_dup_p16"><div>poly16x4x2_t <b><b>vld2_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_p16" type="checkbox"><label for="vld2q_dup_p16"><div>poly16x8x2_t <b><b>vld2q_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_s64" type="checkbox"><label for="vld2_dup_s64"><div>int64x1x2_t <b><b>vld2_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_u64" type="checkbox"><label for="vld2_dup_u64"><div>uint64x1x2_t <b><b>vld2_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_p64" type="checkbox"><label for="vld2_dup_p64"><div>poly64x1x2_t <b><b>vld2_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_s64" type="checkbox"><label for="vld2q_dup_s64"><div>int64x2x2_t <b><b>vld2q_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_u64" type="checkbox"><label for="vld2q_dup_u64"><div>uint64x2x2_t <b><b>vld2q_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_p64" type="checkbox"><label for="vld2q_dup_p64"><div>poly64x2x2_t <b><b>vld2q_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_f64" type="checkbox"><label for="vld2_dup_f64"><div>float64x1x2_t <b><b>vld2_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_f64" type="checkbox"><label for="vld2q_dup_f64"><div>float64x2x2_t <b><b>vld2q_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_s8" type="checkbox"><label for="vld3_dup_s8"><div>int8x8x3_t <b><b>vld3_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_s8" type="checkbox"><label for="vld3q_dup_s8"><div>int8x16x3_t <b><b>vld3q_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_s16" type="checkbox"><label for="vld3_dup_s16"><div>int16x4x3_t <b><b>vld3_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_s16" type="checkbox"><label for="vld3q_dup_s16"><div>int16x8x3_t <b><b>vld3q_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_s32" type="checkbox"><label for="vld3_dup_s32"><div>int32x2x3_t <b><b>vld3_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_s32" type="checkbox"><label for="vld3q_dup_s32"><div>int32x4x3_t <b><b>vld3q_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_u8" type="checkbox"><label for="vld3_dup_u8"><div>uint8x8x3_t <b><b>vld3_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_u8" type="checkbox"><label for="vld3q_dup_u8"><div>uint8x16x3_t <b><b>vld3q_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_u16" type="checkbox"><label for="vld3_dup_u16"><div>uint16x4x3_t <b><b>vld3_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_u16" type="checkbox"><label for="vld3q_dup_u16"><div>uint16x8x3_t <b><b>vld3q_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_u32" type="checkbox"><label for="vld3_dup_u32"><div>uint32x2x3_t <b><b>vld3_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_u32" type="checkbox"><label for="vld3q_dup_u32"><div>uint32x4x3_t <b><b>vld3q_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_f16" type="checkbox"><label for="vld3_dup_f16"><div>float16x4x3_t <b><b>vld3_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_f16" type="checkbox"><label for="vld3q_dup_f16"><div>float16x8x3_t <b><b>vld3q_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_f32" type="checkbox"><label for="vld3_dup_f32"><div>float32x2x3_t <b><b>vld3_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_f32" type="checkbox"><label for="vld3q_dup_f32"><div>float32x4x3_t <b><b>vld3q_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_p8" type="checkbox"><label for="vld3_dup_p8"><div>poly8x8x3_t <b><b>vld3_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_p8" type="checkbox"><label for="vld3q_dup_p8"><div>poly8x16x3_t <b><b>vld3q_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_p16" type="checkbox"><label for="vld3_dup_p16"><div>poly16x4x3_t <b><b>vld3_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_p16" type="checkbox"><label for="vld3q_dup_p16"><div>poly16x8x3_t <b><b>vld3q_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_s64" type="checkbox"><label for="vld3_dup_s64"><div>int64x1x3_t <b><b>vld3_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_u64" type="checkbox"><label for="vld3_dup_u64"><div>uint64x1x3_t <b><b>vld3_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_p64" type="checkbox"><label for="vld3_dup_p64"><div>poly64x1x3_t <b><b>vld3_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_s64" type="checkbox"><label for="vld3q_dup_s64"><div>int64x2x3_t <b><b>vld3q_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_u64" type="checkbox"><label for="vld3q_dup_u64"><div>uint64x2x3_t <b><b>vld3q_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_p64" type="checkbox"><label for="vld3q_dup_p64"><div>poly64x2x3_t <b><b>vld3q_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_f64" type="checkbox"><label for="vld3_dup_f64"><div>float64x1x3_t <b><b>vld3_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_f64" type="checkbox"><label for="vld3q_dup_f64"><div>float64x2x3_t <b><b>vld3q_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_s8" type="checkbox"><label for="vld4_dup_s8"><div>int8x8x4_t <b><b>vld4_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
-Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_s8" type="checkbox"><label for="vld4q_dup_s8"><div>int8x16x4_t <b><b>vld4q_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
-Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_s16" type="checkbox"><label for="vld4_dup_s16"><div>int16x4x4_t <b><b>vld4_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_s16" type="checkbox"><label for="vld4q_dup_s16"><div>int16x8x4_t <b><b>vld4q_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_s32" type="checkbox"><label for="vld4_dup_s32"><div>int32x2x4_t <b><b>vld4_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
-Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_s32" type="checkbox"><label for="vld4q_dup_s32"><div>int32x4x4_t <b><b>vld4q_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
-Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_u8" type="checkbox"><label for="vld4_dup_u8"><div>uint8x8x4_t <b><b>vld4_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
-Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_u8" type="checkbox"><label for="vld4q_dup_u8"><div>uint8x16x4_t <b><b>vld4q_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
-Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_u16" type="checkbox"><label for="vld4_dup_u16"><div>uint16x4x4_t <b><b>vld4_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_u16" type="checkbox"><label for="vld4q_dup_u16"><div>uint16x8x4_t <b><b>vld4q_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_u32" type="checkbox"><label for="vld4_dup_u32"><div>uint32x2x4_t <b><b>vld4_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
-Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_u32" type="checkbox"><label for="vld4q_dup_u32"><div>uint32x4x4_t <b><b>vld4q_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
-Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_f16" type="checkbox"><label for="vld4_dup_f16"><div>float16x4x4_t <b><b>vld4_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_f16" type="checkbox"><label for="vld4q_dup_f16"><div>float16x8x4_t <b><b>vld4q_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_f32" type="checkbox"><label for="vld4_dup_f32"><div>float32x2x4_t <b><b>vld4_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
-Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_f32" type="checkbox"><label for="vld4q_dup_f32"><div>float32x4x4_t <b><b>vld4q_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
-Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_p8" type="checkbox"><label for="vld4_dup_p8"><div>poly8x8x4_t <b><b>vld4_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
-Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_p8" type="checkbox"><label for="vld4q_dup_p8"><div>poly8x16x4_t <b><b>vld4q_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
-Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_p16" type="checkbox"><label for="vld4_dup_p16"><div>poly16x4x4_t <b><b>vld4_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_p16" type="checkbox"><label for="vld4q_dup_p16"><div>poly16x8x4_t <b><b>vld4q_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_s64" type="checkbox"><label for="vld4_dup_s64"><div>int64x1x4_t <b><b>vld4_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_u64" type="checkbox"><label for="vld4_dup_u64"><div>uint64x1x4_t <b><b>vld4_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_p64" type="checkbox"><label for="vld4_dup_p64"><div>poly64x1x4_t <b><b>vld4_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_s64" type="checkbox"><label for="vld4q_dup_s64"><div>int64x2x4_t <b><b>vld4q_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_u64" type="checkbox"><label for="vld4q_dup_u64"><div>uint64x2x4_t <b><b>vld4q_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_p64" type="checkbox"><label for="vld4q_dup_p64"><div>poly64x2x4_t <b><b>vld4q_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_f64" type="checkbox"><label for="vld4_dup_f64"><div>float64x1x4_t <b><b>vld4_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_f64" type="checkbox"><label for="vld4q_dup_f64"><div>float64x2x4_t <b><b>vld4q_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_s8" type="checkbox"><label for="vst2_s8"><div>void <b><b>vst2_s8</b></b> (int8_t * ptr, int8x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_s8" type="checkbox"><label for="vst2q_s8"><div>void <b><b>vst2q_s8</b></b> (int8_t * ptr, int8x16x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_s16" type="checkbox"><label for="vst2_s16"><div>void <b><b>vst2_s16</b></b> (int16_t * ptr, int16x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_s16" type="checkbox"><label for="vst2q_s16"><div>void <b><b>vst2q_s16</b></b> (int16_t * ptr, int16x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_s32" type="checkbox"><label for="vst2_s32"><div>void <b><b>vst2_s32</b></b> (int32_t * ptr, int32x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_s32" type="checkbox"><label for="vst2q_s32"><div>void <b><b>vst2q_s32</b></b> (int32_t * ptr, int32x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_u8" type="checkbox"><label for="vst2_u8"><div>void <b><b>vst2_u8</b></b> (uint8_t * ptr, uint8x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_u8" type="checkbox"><label for="vst2q_u8"><div>void <b><b>vst2q_u8</b></b> (uint8_t * ptr, uint8x16x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_u16" type="checkbox"><label for="vst2_u16"><div>void <b><b>vst2_u16</b></b> (uint16_t * ptr, uint16x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_u16" type="checkbox"><label for="vst2q_u16"><div>void <b><b>vst2q_u16</b></b> (uint16_t * ptr, uint16x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_u32" type="checkbox"><label for="vst2_u32"><div>void <b><b>vst2_u32</b></b> (uint32_t * ptr, uint32x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_u32" type="checkbox"><label for="vst2q_u32"><div>void <b><b>vst2q_u32</b></b> (uint32_t * ptr, uint32x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_f16" type="checkbox"><label for="vst2_f16"><div>void <b><b>vst2_f16</b></b> (float16_t * ptr, float16x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_f16" type="checkbox"><label for="vst2q_f16"><div>void <b><b>vst2q_f16</b></b> (float16_t * ptr, float16x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_f32" type="checkbox"><label for="vst2_f32"><div>void <b><b>vst2_f32</b></b> (float32_t * ptr, float32x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_f32" type="checkbox"><label for="vst2q_f32"><div>void <b><b>vst2q_f32</b></b> (float32_t * ptr, float32x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_p8" type="checkbox"><label for="vst2_p8"><div>void <b><b>vst2_p8</b></b> (poly8_t * ptr, poly8x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_p8" type="checkbox"><label for="vst2q_p8"><div>void <b><b>vst2q_p8</b></b> (poly8_t * ptr, poly8x16x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_p16" type="checkbox"><label for="vst2_p16"><div>void <b><b>vst2_p16</b></b> (poly16_t * ptr, poly16x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_p16" type="checkbox"><label for="vst2q_p16"><div>void <b><b>vst2q_p16</b></b> (poly16_t * ptr, poly16x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_s64" type="checkbox"><label for="vst2_s64"><div>void <b><b>vst2_s64</b></b> (int64_t * ptr, int64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_u64" type="checkbox"><label for="vst2_u64"><div>void <b><b>vst2_u64</b></b> (uint64_t * ptr, uint64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_p64" type="checkbox"><label for="vst2_p64"><div>void <b><b>vst2_p64</b></b> (poly64_t * ptr, poly64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_s64" type="checkbox"><label for="vst2q_s64"><div>void <b><b>vst2q_s64</b></b> (int64_t * ptr, int64x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_u64" type="checkbox"><label for="vst2q_u64"><div>void <b><b>vst2q_u64</b></b> (uint64_t * ptr, uint64x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_p64" type="checkbox"><label for="vst2q_p64"><div>void <b><b>vst2q_p64</b></b> (poly64_t * ptr, poly64x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_f64" type="checkbox"><label for="vst2_f64"><div>void <b><b>vst2_f64</b></b> (float64_t * ptr, float64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_f64" type="checkbox"><label for="vst2q_f64"><div>void <b><b>vst2q_f64</b></b> (float64_t * ptr, float64x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_s8" type="checkbox"><label for="vst3_s8"><div>void <b><b>vst3_s8</b></b> (int8_t * ptr, int8x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_s8" type="checkbox"><label for="vst3q_s8"><div>void <b><b>vst3q_s8</b></b> (int8_t * ptr, int8x16x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_s16" type="checkbox"><label for="vst3_s16"><div>void <b><b>vst3_s16</b></b> (int16_t * ptr, int16x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_s16" type="checkbox"><label for="vst3q_s16"><div>void <b><b>vst3q_s16</b></b> (int16_t * ptr, int16x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_s32" type="checkbox"><label for="vst3_s32"><div>void <b><b>vst3_s32</b></b> (int32_t * ptr, int32x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_s32" type="checkbox"><label for="vst3q_s32"><div>void <b><b>vst3q_s32</b></b> (int32_t * ptr, int32x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_u8" type="checkbox"><label for="vst3_u8"><div>void <b><b>vst3_u8</b></b> (uint8_t * ptr, uint8x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_u8" type="checkbox"><label for="vst3q_u8"><div>void <b><b>vst3q_u8</b></b> (uint8_t * ptr, uint8x16x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_u16" type="checkbox"><label for="vst3_u16"><div>void <b><b>vst3_u16</b></b> (uint16_t * ptr, uint16x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_u16" type="checkbox"><label for="vst3q_u16"><div>void <b><b>vst3q_u16</b></b> (uint16_t * ptr, uint16x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_u32" type="checkbox"><label for="vst3_u32"><div>void <b><b>vst3_u32</b></b> (uint32_t * ptr, uint32x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_u32" type="checkbox"><label for="vst3q_u32"><div>void <b><b>vst3q_u32</b></b> (uint32_t * ptr, uint32x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_f16" type="checkbox"><label for="vst3_f16"><div>void <b><b>vst3_f16</b></b> (float16_t * ptr, float16x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_f16" type="checkbox"><label for="vst3q_f16"><div>void <b><b>vst3q_f16</b></b> (float16_t * ptr, float16x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_f32" type="checkbox"><label for="vst3_f32"><div>void <b><b>vst3_f32</b></b> (float32_t * ptr, float32x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_f32" type="checkbox"><label for="vst3q_f32"><div>void <b><b>vst3q_f32</b></b> (float32_t * ptr, float32x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_p8" type="checkbox"><label for="vst3_p8"><div>void <b><b>vst3_p8</b></b> (poly8_t * ptr, poly8x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_p8" type="checkbox"><label for="vst3q_p8"><div>void <b><b>vst3q_p8</b></b> (poly8_t * ptr, poly8x16x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_p16" type="checkbox"><label for="vst3_p16"><div>void <b><b>vst3_p16</b></b> (poly16_t * ptr, poly16x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_p16" type="checkbox"><label for="vst3q_p16"><div>void <b><b>vst3q_p16</b></b> (poly16_t * ptr, poly16x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_s64" type="checkbox"><label for="vst3_s64"><div>void <b><b>vst3_s64</b></b> (int64_t * ptr, int64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_u64" type="checkbox"><label for="vst3_u64"><div>void <b><b>vst3_u64</b></b> (uint64_t * ptr, uint64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_p64" type="checkbox"><label for="vst3_p64"><div>void <b><b>vst3_p64</b></b> (poly64_t * ptr, poly64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_s64" type="checkbox"><label for="vst3q_s64"><div>void <b><b>vst3q_s64</b></b> (int64_t * ptr, int64x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_u64" type="checkbox"><label for="vst3q_u64"><div>void <b><b>vst3q_u64</b></b> (uint64_t * ptr, uint64x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_p64" type="checkbox"><label for="vst3q_p64"><div>void <b><b>vst3q_p64</b></b> (poly64_t * ptr, poly64x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_f64" type="checkbox"><label for="vst3_f64"><div>void <b><b>vst3_f64</b></b> (float64_t * ptr, float64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_f64" type="checkbox"><label for="vst3q_f64"><div>void <b><b>vst3q_f64</b></b> (float64_t * ptr, float64x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_s8" type="checkbox"><label for="vst4_s8"><div>void <b><b>vst4_s8</b></b> (int8_t * ptr, int8x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8B <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_s8" type="checkbox"><label for="vst4q_s8"><div>void <b><b>vst4q_s8</b></b> (int8_t * ptr, int8x16x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.16B <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_s16" type="checkbox"><label for="vst4_s16"><div>void <b><b>vst4_s16</b></b> (int16_t * ptr, int16x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4H <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_s16" type="checkbox"><label for="vst4q_s16"><div>void <b><b>vst4q_s16</b></b> (int16_t * ptr, int16x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8H <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_s32" type="checkbox"><label for="vst4_s32"><div>void <b><b>vst4_s32</b></b> (int32_t * ptr, int32x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2S <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_s32" type="checkbox"><label for="vst4q_s32"><div>void <b><b>vst4q_s32</b></b> (int32_t * ptr, int32x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4S <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_u8" type="checkbox"><label for="vst4_u8"><div>void <b><b>vst4_u8</b></b> (uint8_t * ptr, uint8x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8B <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_u8" type="checkbox"><label for="vst4q_u8"><div>void <b><b>vst4q_u8</b></b> (uint8_t * ptr, uint8x16x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.16B <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_u16" type="checkbox"><label for="vst4_u16"><div>void <b><b>vst4_u16</b></b> (uint16_t * ptr, uint16x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4H <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_u16" type="checkbox"><label for="vst4q_u16"><div>void <b><b>vst4q_u16</b></b> (uint16_t * ptr, uint16x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8H <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_u32" type="checkbox"><label for="vst4_u32"><div>void <b><b>vst4_u32</b></b> (uint32_t * ptr, uint32x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2S <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_u32" type="checkbox"><label for="vst4q_u32"><div>void <b><b>vst4q_u32</b></b> (uint32_t * ptr, uint32x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4S <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_f16" type="checkbox"><label for="vst4_f16"><div>void <b><b>vst4_f16</b></b> (float16_t * ptr, float16x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4H <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_f16" type="checkbox"><label for="vst4q_f16"><div>void <b><b>vst4q_f16</b></b> (float16_t * ptr, float16x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8H <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_f32" type="checkbox"><label for="vst4_f32"><div>void <b><b>vst4_f32</b></b> (float32_t * ptr, float32x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2S <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_f32" type="checkbox"><label for="vst4q_f32"><div>void <b><b>vst4q_f32</b></b> (float32_t * ptr, float32x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4S <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_p8" type="checkbox"><label for="vst4_p8"><div>void <b><b>vst4_p8</b></b> (poly8_t * ptr, poly8x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8B <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_p8" type="checkbox"><label for="vst4q_p8"><div>void <b><b>vst4q_p8</b></b> (poly8_t * ptr, poly8x16x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.16B <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_p16" type="checkbox"><label for="vst4_p16"><div>void <b><b>vst4_p16</b></b> (poly16_t * ptr, poly16x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4H <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_p16" type="checkbox"><label for="vst4q_p16"><div>void <b><b>vst4q_p16</b></b> (poly16_t * ptr, poly16x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8H <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_s64" type="checkbox"><label for="vst4_s64"><div>void <b><b>vst4_s64</b></b> (int64_t * ptr, int64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.1D <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_u64" type="checkbox"><label for="vst4_u64"><div>void <b><b>vst4_u64</b></b> (uint64_t * ptr, uint64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.1D <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_p64" type="checkbox"><label for="vst4_p64"><div>void <b><b>vst4_p64</b></b> (poly64_t * ptr, poly64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.1D <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_s64" type="checkbox"><label for="vst4q_s64"><div>void <b><b>vst4q_s64</b></b> (int64_t * ptr, int64x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2D <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_u64" type="checkbox"><label for="vst4q_u64"><div>void <b><b>vst4q_u64</b></b> (uint64_t * ptr, uint64x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2D <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_p64" type="checkbox"><label for="vst4q_p64"><div>void <b><b>vst4q_p64</b></b> (poly64_t * ptr, poly64x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2D <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_f64" type="checkbox"><label for="vst4_f64"><div>void <b><b>vst4_f64</b></b> (float64_t * ptr, float64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.1D <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_f64" type="checkbox"><label for="vst4q_f64"><div>void <b><b>vst4q_f64</b></b> (float64_t * ptr, float64x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2D <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_s16" type="checkbox"><label for="vld2_lane_s16"><div>int16x4x2_t <b><b>vld2_lane_s16</b></b> (int16_t const * ptr, int16x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.4H <br />
-src.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_s16" type="checkbox"><label for="vld2q_lane_s16"><div>int16x8x2_t <b><b>vld2q_lane_s16</b></b> (int16_t const * ptr, int16x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.8H <br />
-src.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_s32" type="checkbox"><label for="vld2_lane_s32"><div>int32x2x2_t <b><b>vld2_lane_s32</b></b> (int32_t const * ptr, int32x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.2S <br />
-src.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_s32" type="checkbox"><label for="vld2q_lane_s32"><div>int32x4x2_t <b><b>vld2q_lane_s32</b></b> (int32_t const * ptr, int32x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.4S <br />
-src.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_u16" type="checkbox"><label for="vld2_lane_u16"><div>uint16x4x2_t <b><b>vld2_lane_u16</b></b> (uint16_t const * ptr, uint16x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.4H <br />
-src.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_u16" type="checkbox"><label for="vld2q_lane_u16"><div>uint16x8x2_t <b><b>vld2q_lane_u16</b></b> (uint16_t const * ptr, uint16x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.8H <br />
-src.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_u32" type="checkbox"><label for="vld2_lane_u32"><div>uint32x2x2_t <b><b>vld2_lane_u32</b></b> (uint32_t const * ptr, uint32x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.2S <br />
-src.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_u32" type="checkbox"><label for="vld2q_lane_u32"><div>uint32x4x2_t <b><b>vld2q_lane_u32</b></b> (uint32_t const * ptr, uint32x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.4S <br />
-src.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_f16" type="checkbox"><label for="vld2_lane_f16"><div>float16x4x2_t <b><b>vld2_lane_f16</b></b> (float16_t const * ptr, float16x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.4H <br />
-src.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_f16" type="checkbox"><label for="vld2q_lane_f16"><div>float16x8x2_t <b><b>vld2q_lane_f16</b></b> (float16_t const * ptr, float16x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.8H <br />
-src.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_f32" type="checkbox"><label for="vld2_lane_f32"><div>float32x2x2_t <b><b>vld2_lane_f32</b></b> (float32_t const * ptr, float32x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.2S <br />
-src.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_f32" type="checkbox"><label for="vld2q_lane_f32"><div>float32x4x2_t <b><b>vld2q_lane_f32</b></b> (float32_t const * ptr, float32x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.4S <br />
-src.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_p16" type="checkbox"><label for="vld2_lane_p16"><div>poly16x4x2_t <b><b>vld2_lane_p16</b></b> (poly16_t const * ptr, poly16x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.4H <br />
-src.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_p16" type="checkbox"><label for="vld2q_lane_p16"><div>poly16x8x2_t <b><b>vld2q_lane_p16</b></b> (poly16_t const * ptr, poly16x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.8H <br />
-src.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_s8" type="checkbox"><label for="vld2_lane_s8"><div>int8x8x2_t <b><b>vld2_lane_s8</b></b> (int8_t const * ptr, int8x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.8B <br />
-src.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_u8" type="checkbox"><label for="vld2_lane_u8"><div>uint8x8x2_t <b><b>vld2_lane_u8</b></b> (uint8_t const * ptr, uint8x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.8B <br />
-src.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_p8" type="checkbox"><label for="vld2_lane_p8"><div>poly8x8x2_t <b><b>vld2_lane_p8</b></b> (poly8_t const * ptr, poly8x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.8B <br />
-src.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_s8" type="checkbox"><label for="vld2q_lane_s8"><div>int8x16x2_t <b><b>vld2q_lane_s8</b></b> (int8_t const * ptr, int8x16x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.16B <br />
-src.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_u8" type="checkbox"><label for="vld2q_lane_u8"><div>uint8x16x2_t <b><b>vld2q_lane_u8</b></b> (uint8_t const * ptr, uint8x16x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.16B <br />
-src.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_p8" type="checkbox"><label for="vld2q_lane_p8"><div>poly8x16x2_t <b><b>vld2q_lane_p8</b></b> (poly8_t const * ptr, poly8x16x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.16B <br />
-src.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_s64" type="checkbox"><label for="vld2_lane_s64"><div>int64x1x2_t <b><b>vld2_lane_s64</b></b> (int64_t const * ptr, int64x1x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.1D <br />
-src.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>ptr &rarr; Xn
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_s64" type="checkbox"><label for="vld2q_lane_s64"><div>int64x2x2_t <b><b>vld2q_lane_s64</b></b> (int64_t const * ptr, int64x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.2D <br />
-src.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>ptr &rarr; Xn
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_u64" type="checkbox"><label for="vld2_lane_u64"><div>uint64x1x2_t <b><b>vld2_lane_u64</b></b> (uint64_t const * ptr, uint64x1x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.1D <br />
-src.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_u64" type="checkbox"><label for="vld2q_lane_u64"><div>uint64x2x2_t <b><b>vld2q_lane_u64</b></b> (uint64_t const * ptr, uint64x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.2D <br />
-src.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_p64" type="checkbox"><label for="vld2_lane_p64"><div>poly64x1x2_t <b><b>vld2_lane_p64</b></b> (poly64_t const * ptr, poly64x1x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.1D <br />
-src.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_p64" type="checkbox"><label for="vld2q_lane_p64"><div>poly64x2x2_t <b><b>vld2q_lane_p64</b></b> (poly64_t const * ptr, poly64x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.2D <br />
-src.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_f64" type="checkbox"><label for="vld2_lane_f64"><div>float64x1x2_t <b><b>vld2_lane_f64</b></b> (float64_t const * ptr, float64x1x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.1D <br />
-src.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_f64" type="checkbox"><label for="vld2q_lane_f64"><div>float64x2x2_t <b><b>vld2q_lane_f64</b></b> (float64_t const * ptr, float64x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[1] &rarr; Vt2.2D <br />
-src.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_s16" type="checkbox"><label for="vld3_lane_s16"><div>int16x4x3_t <b><b>vld3_lane_s16</b></b> (int16_t const * ptr, int16x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.4H <br />
-src.val[1] &rarr; Vt2.4H <br />
-src.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_s16" type="checkbox"><label for="vld3q_lane_s16"><div>int16x8x3_t <b><b>vld3q_lane_s16</b></b> (int16_t const * ptr, int16x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.8H <br />
-src.val[1] &rarr; Vt2.8H <br />
-src.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_s32" type="checkbox"><label for="vld3_lane_s32"><div>int32x2x3_t <b><b>vld3_lane_s32</b></b> (int32_t const * ptr, int32x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.2S <br />
-src.val[1] &rarr; Vt2.2S <br />
-src.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_s32" type="checkbox"><label for="vld3q_lane_s32"><div>int32x4x3_t <b><b>vld3q_lane_s32</b></b> (int32_t const * ptr, int32x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.4S <br />
-src.val[1] &rarr; Vt2.4S <br />
-src.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_u16" type="checkbox"><label for="vld3_lane_u16"><div>uint16x4x3_t <b><b>vld3_lane_u16</b></b> (uint16_t const * ptr, uint16x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.4H <br />
-src.val[1] &rarr; Vt2.4H <br />
-src.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_u16" type="checkbox"><label for="vld3q_lane_u16"><div>uint16x8x3_t <b><b>vld3q_lane_u16</b></b> (uint16_t const * ptr, uint16x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.8H <br />
-src.val[1] &rarr; Vt2.8H <br />
-src.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_u32" type="checkbox"><label for="vld3_lane_u32"><div>uint32x2x3_t <b><b>vld3_lane_u32</b></b> (uint32_t const * ptr, uint32x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.2S <br />
-src.val[1] &rarr; Vt2.2S <br />
-src.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_u32" type="checkbox"><label for="vld3q_lane_u32"><div>uint32x4x3_t <b><b>vld3q_lane_u32</b></b> (uint32_t const * ptr, uint32x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.4S <br />
-src.val[1] &rarr; Vt2.4S <br />
-src.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_f16" type="checkbox"><label for="vld3_lane_f16"><div>float16x4x3_t <b><b>vld3_lane_f16</b></b> (float16_t const * ptr, float16x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.4H <br />
-src.val[1] &rarr; Vt2.4H <br />
-src.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_f16" type="checkbox"><label for="vld3q_lane_f16"><div>float16x8x3_t <b><b>vld3q_lane_f16</b></b> (float16_t const * ptr, float16x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.8H <br />
-src.val[1] &rarr; Vt2.8H <br />
-src.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_f32" type="checkbox"><label for="vld3_lane_f32"><div>float32x2x3_t <b><b>vld3_lane_f32</b></b> (float32_t const * ptr, float32x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.2S <br />
-src.val[1] &rarr; Vt2.2S <br />
-src.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_f32" type="checkbox"><label for="vld3q_lane_f32"><div>float32x4x3_t <b><b>vld3q_lane_f32</b></b> (float32_t const * ptr, float32x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.4S <br />
-src.val[1] &rarr; Vt2.4S <br />
-src.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_p16" type="checkbox"><label for="vld3_lane_p16"><div>poly16x4x3_t <b><b>vld3_lane_p16</b></b> (poly16_t const * ptr, poly16x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.4H <br />
-src.val[1] &rarr; Vt2.4H <br />
-src.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_p16" type="checkbox"><label for="vld3q_lane_p16"><div>poly16x8x3_t <b><b>vld3q_lane_p16</b></b> (poly16_t const * ptr, poly16x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.8H <br />
-src.val[1] &rarr; Vt2.8H <br />
-src.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_s8" type="checkbox"><label for="vld3_lane_s8"><div>int8x8x3_t <b><b>vld3_lane_s8</b></b> (int8_t const * ptr, int8x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.8B <br />
-src.val[1] &rarr; Vt2.8B <br />
-src.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_u8" type="checkbox"><label for="vld3_lane_u8"><div>uint8x8x3_t <b><b>vld3_lane_u8</b></b> (uint8_t const * ptr, uint8x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.8B <br />
-src.val[1] &rarr; Vt2.8B <br />
-src.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_p8" type="checkbox"><label for="vld3_lane_p8"><div>poly8x8x3_t <b><b>vld3_lane_p8</b></b> (poly8_t const * ptr, poly8x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.8B <br />
-src.val[1] &rarr; Vt2.8B <br />
-src.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_s8" type="checkbox"><label for="vld3q_lane_s8"><div>int8x16x3_t <b><b>vld3q_lane_s8</b></b> (int8_t const * ptr, int8x16x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.16B <br />
-src.val[1] &rarr; Vt2.16B <br />
-src.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_u8" type="checkbox"><label for="vld3q_lane_u8"><div>uint8x16x3_t <b><b>vld3q_lane_u8</b></b> (uint8_t const * ptr, uint8x16x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.16B <br />
-src.val[1] &rarr; Vt2.16B <br />
-src.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_p8" type="checkbox"><label for="vld3q_lane_p8"><div>poly8x16x3_t <b><b>vld3q_lane_p8</b></b> (poly8_t const * ptr, poly8x16x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.16B <br />
-src.val[1] &rarr; Vt2.16B <br />
-src.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_s64" type="checkbox"><label for="vld3_lane_s64"><div>int64x1x3_t <b><b>vld3_lane_s64</b></b> (int64_t const * ptr, int64x1x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.1D <br />
-src.val[1] &rarr; Vt2.1D <br />
-src.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_s64" type="checkbox"><label for="vld3q_lane_s64"><div>int64x2x3_t <b><b>vld3q_lane_s64</b></b> (int64_t const * ptr, int64x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.2D <br />
-src.val[1] &rarr; Vt2.2D <br />
-src.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_u64" type="checkbox"><label for="vld3_lane_u64"><div>uint64x1x3_t <b><b>vld3_lane_u64</b></b> (uint64_t const * ptr, uint64x1x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.1D <br />
-src.val[1] &rarr; Vt2.1D <br />
-src.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_u64" type="checkbox"><label for="vld3q_lane_u64"><div>uint64x2x3_t <b><b>vld3q_lane_u64</b></b> (uint64_t const * ptr, uint64x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.2D <br />
-src.val[1] &rarr; Vt2.2D <br />
-src.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_p64" type="checkbox"><label for="vld3_lane_p64"><div>poly64x1x3_t <b><b>vld3_lane_p64</b></b> (poly64_t const * ptr, poly64x1x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.1D <br />
-src.val[1] &rarr; Vt2.1D <br />
-src.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_p64" type="checkbox"><label for="vld3q_lane_p64"><div>poly64x2x3_t <b><b>vld3q_lane_p64</b></b> (poly64_t const * ptr, poly64x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.2D <br />
-src.val[1] &rarr; Vt2.2D <br />
-src.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_f64" type="checkbox"><label for="vld3_lane_f64"><div>float64x1x3_t <b><b>vld3_lane_f64</b></b> (float64_t const * ptr, float64x1x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.1D <br />
-src.val[1] &rarr; Vt2.1D <br />
-src.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_f64" type="checkbox"><label for="vld3q_lane_f64"><div>float64x2x3_t <b><b>vld3q_lane_f64</b></b> (float64_t const * ptr, float64x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[2] &rarr; Vt3.2D <br />
-src.val[1] &rarr; Vt2.2D <br />
-src.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_s16" type="checkbox"><label for="vld4_lane_s16"><div>int16x4x4_t <b><b>vld4_lane_s16</b></b> (int16_t const * ptr, int16x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.4H <br />
-src.val[2] &rarr; Vt3.4H <br />
-src.val[1] &rarr; Vt2.4H <br />
-src.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_s16" type="checkbox"><label for="vld4q_lane_s16"><div>int16x8x4_t <b><b>vld4q_lane_s16</b></b> (int16_t const * ptr, int16x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.8H <br />
-src.val[2] &rarr; Vt3.8H <br />
-src.val[1] &rarr; Vt2.8H <br />
-src.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_s32" type="checkbox"><label for="vld4_lane_s32"><div>int32x2x4_t <b><b>vld4_lane_s32</b></b> (int32_t const * ptr, int32x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.2S <br />
-src.val[2] &rarr; Vt3.2S <br />
-src.val[1] &rarr; Vt2.2S <br />
-src.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
-Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_s32" type="checkbox"><label for="vld4q_lane_s32"><div>int32x4x4_t <b><b>vld4q_lane_s32</b></b> (int32_t const * ptr, int32x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.4S <br />
-src.val[2] &rarr; Vt3.4S <br />
-src.val[1] &rarr; Vt2.4S <br />
-src.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
-Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_u16" type="checkbox"><label for="vld4_lane_u16"><div>uint16x4x4_t <b><b>vld4_lane_u16</b></b> (uint16_t const * ptr, uint16x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.4H <br />
-src.val[2] &rarr; Vt3.4H <br />
-src.val[1] &rarr; Vt2.4H <br />
-src.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_u16" type="checkbox"><label for="vld4q_lane_u16"><div>uint16x8x4_t <b><b>vld4q_lane_u16</b></b> (uint16_t const * ptr, uint16x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.8H <br />
-src.val[2] &rarr; Vt3.8H <br />
-src.val[1] &rarr; Vt2.8H <br />
-src.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_u32" type="checkbox"><label for="vld4_lane_u32"><div>uint32x2x4_t <b><b>vld4_lane_u32</b></b> (uint32_t const * ptr, uint32x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.2S <br />
-src.val[2] &rarr; Vt3.2S <br />
-src.val[1] &rarr; Vt2.2S <br />
-src.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
-Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_u32" type="checkbox"><label for="vld4q_lane_u32"><div>uint32x4x4_t <b><b>vld4q_lane_u32</b></b> (uint32_t const * ptr, uint32x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.4S <br />
-src.val[2] &rarr; Vt3.4S <br />
-src.val[1] &rarr; Vt2.4S <br />
-src.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
-Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_f16" type="checkbox"><label for="vld4_lane_f16"><div>float16x4x4_t <b><b>vld4_lane_f16</b></b> (float16_t const * ptr, float16x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.4H <br />
-src.val[2] &rarr; Vt3.4H <br />
-src.val[1] &rarr; Vt2.4H <br />
-src.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_f16" type="checkbox"><label for="vld4q_lane_f16"><div>float16x8x4_t <b><b>vld4q_lane_f16</b></b> (float16_t const * ptr, float16x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.8H <br />
-src.val[2] &rarr; Vt3.8H <br />
-src.val[1] &rarr; Vt2.8H <br />
-src.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_f32" type="checkbox"><label for="vld4_lane_f32"><div>float32x2x4_t <b><b>vld4_lane_f32</b></b> (float32_t const * ptr, float32x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.2S <br />
-src.val[2] &rarr; Vt3.2S <br />
-src.val[1] &rarr; Vt2.2S <br />
-src.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
-Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_f32" type="checkbox"><label for="vld4q_lane_f32"><div>float32x4x4_t <b><b>vld4q_lane_f32</b></b> (float32_t const * ptr, float32x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.4S <br />
-src.val[2] &rarr; Vt3.4S <br />
-src.val[1] &rarr; Vt2.4S <br />
-src.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
-Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_p16" type="checkbox"><label for="vld4_lane_p16"><div>poly16x4x4_t <b><b>vld4_lane_p16</b></b> (poly16_t const * ptr, poly16x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.4H <br />
-src.val[2] &rarr; Vt3.4H <br />
-src.val[1] &rarr; Vt2.4H <br />
-src.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_p16" type="checkbox"><label for="vld4q_lane_p16"><div>poly16x8x4_t <b><b>vld4q_lane_p16</b></b> (poly16_t const * ptr, poly16x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.8H <br />
-src.val[2] &rarr; Vt3.8H <br />
-src.val[1] &rarr; Vt2.8H <br />
-src.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_s8" type="checkbox"><label for="vld4_lane_s8"><div>int8x8x4_t <b><b>vld4_lane_s8</b></b> (int8_t const * ptr, int8x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.8B <br />
-src.val[2] &rarr; Vt3.8B <br />
-src.val[1] &rarr; Vt2.8B <br />
-src.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
-Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_u8" type="checkbox"><label for="vld4_lane_u8"><div>uint8x8x4_t <b><b>vld4_lane_u8</b></b> (uint8_t const * ptr, uint8x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.8B <br />
-src.val[2] &rarr; Vt3.8B <br />
-src.val[1] &rarr; Vt2.8B <br />
-src.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
-Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_p8" type="checkbox"><label for="vld4_lane_p8"><div>poly8x8x4_t <b><b>vld4_lane_p8</b></b> (poly8_t const * ptr, poly8x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.8B <br />
-src.val[2] &rarr; Vt3.8B <br />
-src.val[1] &rarr; Vt2.8B <br />
-src.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
-Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_s8" type="checkbox"><label for="vld4q_lane_s8"><div>int8x16x4_t <b><b>vld4q_lane_s8</b></b> (int8_t const * ptr, int8x16x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.16B <br />
-src.val[2] &rarr; Vt3.16B <br />
-src.val[1] &rarr; Vt2.16B <br />
-src.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
-Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_u8" type="checkbox"><label for="vld4q_lane_u8"><div>uint8x16x4_t <b><b>vld4q_lane_u8</b></b> (uint8_t const * ptr, uint8x16x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.16B <br />
-src.val[2] &rarr; Vt3.16B <br />
-src.val[1] &rarr; Vt2.16B <br />
-src.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
-Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_p8" type="checkbox"><label for="vld4q_lane_p8"><div>poly8x16x4_t <b><b>vld4q_lane_p8</b></b> (poly8_t const * ptr, poly8x16x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.16B <br />
-src.val[2] &rarr; Vt3.16B <br />
-src.val[1] &rarr; Vt2.16B <br />
-src.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
-Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_s64" type="checkbox"><label for="vld4_lane_s64"><div>int64x1x4_t <b><b>vld4_lane_s64</b></b> (int64_t const * ptr, int64x1x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.1D <br />
-src.val[2] &rarr; Vt3.1D <br />
-src.val[1] &rarr; Vt2.1D <br />
-src.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_s64" type="checkbox"><label for="vld4q_lane_s64"><div>int64x2x4_t <b><b>vld4q_lane_s64</b></b> (int64_t const * ptr, int64x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.2D <br />
-src.val[2] &rarr; Vt3.2D <br />
-src.val[1] &rarr; Vt2.2D <br />
-src.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_u64" type="checkbox"><label for="vld4_lane_u64"><div>uint64x1x4_t <b><b>vld4_lane_u64</b></b> (uint64_t const * ptr, uint64x1x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.1D <br />
-src.val[2] &rarr; Vt3.1D <br />
-src.val[1] &rarr; Vt2.1D <br />
-src.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_u64" type="checkbox"><label for="vld4q_lane_u64"><div>uint64x2x4_t <b><b>vld4q_lane_u64</b></b> (uint64_t const * ptr, uint64x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.2D <br />
-src.val[2] &rarr; Vt3.2D <br />
-src.val[1] &rarr; Vt2.2D <br />
-src.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_p64" type="checkbox"><label for="vld4_lane_p64"><div>poly64x1x4_t <b><b>vld4_lane_p64</b></b> (poly64_t const * ptr, poly64x1x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.1D <br />
-src.val[2] &rarr; Vt3.1D <br />
-src.val[1] &rarr; Vt2.1D <br />
-src.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_p64" type="checkbox"><label for="vld4q_lane_p64"><div>poly64x2x4_t <b><b>vld4q_lane_p64</b></b> (poly64_t const * ptr, poly64x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.2D <br />
-src.val[2] &rarr; Vt3.2D <br />
-src.val[1] &rarr; Vt2.2D <br />
-src.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_f64" type="checkbox"><label for="vld4_lane_f64"><div>float64x1x4_t <b><b>vld4_lane_f64</b></b> (float64_t const * ptr, float64x1x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.1D <br />
-src.val[2] &rarr; Vt3.1D <br />
-src.val[1] &rarr; Vt2.1D <br />
-src.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_f64" type="checkbox"><label for="vld4q_lane_f64"><div>float64x2x4_t <b><b>vld4q_lane_f64</b></b> (float64_t const * ptr, float64x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-src.val[3] &rarr; Vt4.2D <br />
-src.val[2] &rarr; Vt3.2D <br />
-src.val[1] &rarr; Vt2.2D <br />
-src.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_s8" type="checkbox"><label for="vst2_lane_s8"><div>void <b><b>vst2_lane_s8</b></b> (int8_t * ptr, int8x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_u8" type="checkbox"><label for="vst2_lane_u8"><div>void <b><b>vst2_lane_u8</b></b> (uint8_t * ptr, uint8x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_p8" type="checkbox"><label for="vst2_lane_p8"><div>void <b><b>vst2_lane_p8</b></b> (poly8_t * ptr, poly8x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_s8" type="checkbox"><label for="vst3_lane_s8"><div>void <b><b>vst3_lane_s8</b></b> (int8_t * ptr, int8x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_u8" type="checkbox"><label for="vst3_lane_u8"><div>void <b><b>vst3_lane_u8</b></b> (uint8_t * ptr, uint8x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_p8" type="checkbox"><label for="vst3_lane_p8"><div>void <b><b>vst3_lane_p8</b></b> (poly8_t * ptr, poly8x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_s8" type="checkbox"><label for="vst4_lane_s8"><div>void <b><b>vst4_lane_s8</b></b> (int8_t * ptr, int8x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8B <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_u8" type="checkbox"><label for="vst4_lane_u8"><div>void <b><b>vst4_lane_u8</b></b> (uint8_t * ptr, uint8x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8B <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_p8" type="checkbox"><label for="vst4_lane_p8"><div>void <b><b>vst4_lane_p8</b></b> (poly8_t * ptr, poly8x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8B <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_s16" type="checkbox"><label for="vst2_lane_s16"><div>void <b><b>vst2_lane_s16</b></b> (int16_t * ptr, int16x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_s16" type="checkbox"><label for="vst2q_lane_s16"><div>void <b><b>vst2q_lane_s16</b></b> (int16_t * ptr, int16x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_s32" type="checkbox"><label for="vst2_lane_s32"><div>void <b><b>vst2_lane_s32</b></b> (int32_t * ptr, int32x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_s32" type="checkbox"><label for="vst2q_lane_s32"><div>void <b><b>vst2q_lane_s32</b></b> (int32_t * ptr, int32x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_u16" type="checkbox"><label for="vst2_lane_u16"><div>void <b><b>vst2_lane_u16</b></b> (uint16_t * ptr, uint16x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_u16" type="checkbox"><label for="vst2q_lane_u16"><div>void <b><b>vst2q_lane_u16</b></b> (uint16_t * ptr, uint16x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_u32" type="checkbox"><label for="vst2_lane_u32"><div>void <b><b>vst2_lane_u32</b></b> (uint32_t * ptr, uint32x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_u32" type="checkbox"><label for="vst2q_lane_u32"><div>void <b><b>vst2q_lane_u32</b></b> (uint32_t * ptr, uint32x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_f16" type="checkbox"><label for="vst2_lane_f16"><div>void <b><b>vst2_lane_f16</b></b> (float16_t * ptr, float16x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_f16" type="checkbox"><label for="vst2q_lane_f16"><div>void <b><b>vst2q_lane_f16</b></b> (float16_t * ptr, float16x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_f32" type="checkbox"><label for="vst2_lane_f32"><div>void <b><b>vst2_lane_f32</b></b> (float32_t * ptr, float32x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_f32" type="checkbox"><label for="vst2q_lane_f32"><div>void <b><b>vst2q_lane_f32</b></b> (float32_t * ptr, float32x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_p16" type="checkbox"><label for="vst2_lane_p16"><div>void <b><b>vst2_lane_p16</b></b> (poly16_t * ptr, poly16x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_p16" type="checkbox"><label for="vst2q_lane_p16"><div>void <b><b>vst2q_lane_p16</b></b> (poly16_t * ptr, poly16x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_s8" type="checkbox"><label for="vst2q_lane_s8"><div>void <b><b>vst2q_lane_s8</b></b> (int8_t * ptr, int8x16x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_u8" type="checkbox"><label for="vst2q_lane_u8"><div>void <b><b>vst2q_lane_u8</b></b> (uint8_t * ptr, uint8x16x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_p8" type="checkbox"><label for="vst2q_lane_p8"><div>void <b><b>vst2q_lane_p8</b></b> (poly8_t * ptr, poly8x16x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_s64" type="checkbox"><label for="vst2_lane_s64"><div>void <b><b>vst2_lane_s64</b></b> (int64_t * ptr, int64x1x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_s64" type="checkbox"><label for="vst2q_lane_s64"><div>void <b><b>vst2q_lane_s64</b></b> (int64_t * ptr, int64x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_u64" type="checkbox"><label for="vst2_lane_u64"><div>void <b><b>vst2_lane_u64</b></b> (uint64_t * ptr, uint64x1x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_u64" type="checkbox"><label for="vst2q_lane_u64"><div>void <b><b>vst2q_lane_u64</b></b> (uint64_t * ptr, uint64x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_p64" type="checkbox"><label for="vst2_lane_p64"><div>void <b><b>vst2_lane_p64</b></b> (poly64_t * ptr, poly64x1x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_p64" type="checkbox"><label for="vst2q_lane_p64"><div>void <b><b>vst2q_lane_p64</b></b> (poly64_t * ptr, poly64x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_f64" type="checkbox"><label for="vst2_lane_f64"><div>void <b><b>vst2_lane_f64</b></b> (float64_t * ptr, float64x1x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_f64" type="checkbox"><label for="vst2q_lane_f64"><div>void <b><b>vst2q_lane_f64</b></b> (float64_t * ptr, float64x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 2 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_s16" type="checkbox"><label for="vst3_lane_s16"><div>void <b><b>vst3_lane_s16</b></b> (int16_t * ptr, int16x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_s16" type="checkbox"><label for="vst3q_lane_s16"><div>void <b><b>vst3q_lane_s16</b></b> (int16_t * ptr, int16x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_s32" type="checkbox"><label for="vst3_lane_s32"><div>void <b><b>vst3_lane_s32</b></b> (int32_t * ptr, int32x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_s32" type="checkbox"><label for="vst3q_lane_s32"><div>void <b><b>vst3q_lane_s32</b></b> (int32_t * ptr, int32x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_u16" type="checkbox"><label for="vst3_lane_u16"><div>void <b><b>vst3_lane_u16</b></b> (uint16_t * ptr, uint16x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_u16" type="checkbox"><label for="vst3q_lane_u16"><div>void <b><b>vst3q_lane_u16</b></b> (uint16_t * ptr, uint16x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_u32" type="checkbox"><label for="vst3_lane_u32"><div>void <b><b>vst3_lane_u32</b></b> (uint32_t * ptr, uint32x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_u32" type="checkbox"><label for="vst3q_lane_u32"><div>void <b><b>vst3q_lane_u32</b></b> (uint32_t * ptr, uint32x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_f16" type="checkbox"><label for="vst3_lane_f16"><div>void <b><b>vst3_lane_f16</b></b> (float16_t * ptr, float16x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_f16" type="checkbox"><label for="vst3q_lane_f16"><div>void <b><b>vst3q_lane_f16</b></b> (float16_t * ptr, float16x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_f32" type="checkbox"><label for="vst3_lane_f32"><div>void <b><b>vst3_lane_f32</b></b> (float32_t * ptr, float32x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_f32" type="checkbox"><label for="vst3q_lane_f32"><div>void <b><b>vst3q_lane_f32</b></b> (float32_t * ptr, float32x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_p16" type="checkbox"><label for="vst3_lane_p16"><div>void <b><b>vst3_lane_p16</b></b> (poly16_t * ptr, poly16x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_p16" type="checkbox"><label for="vst3q_lane_p16"><div>void <b><b>vst3q_lane_p16</b></b> (poly16_t * ptr, poly16x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_s8" type="checkbox"><label for="vst3q_lane_s8"><div>void <b><b>vst3q_lane_s8</b></b> (int8_t * ptr, int8x16x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_u8" type="checkbox"><label for="vst3q_lane_u8"><div>void <b><b>vst3q_lane_u8</b></b> (uint8_t * ptr, uint8x16x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_p8" type="checkbox"><label for="vst3q_lane_p8"><div>void <b><b>vst3q_lane_p8</b></b> (poly8_t * ptr, poly8x16x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_s64" type="checkbox"><label for="vst3_lane_s64"><div>void <b><b>vst3_lane_s64</b></b> (int64_t * ptr, int64x1x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_s64" type="checkbox"><label for="vst3q_lane_s64"><div>void <b><b>vst3q_lane_s64</b></b> (int64_t * ptr, int64x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_u64" type="checkbox"><label for="vst3_lane_u64"><div>void <b><b>vst3_lane_u64</b></b> (uint64_t * ptr, uint64x1x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_u64" type="checkbox"><label for="vst3q_lane_u64"><div>void <b><b>vst3q_lane_u64</b></b> (uint64_t * ptr, uint64x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_p64" type="checkbox"><label for="vst3_lane_p64"><div>void <b><b>vst3_lane_p64</b></b> (poly64_t * ptr, poly64x1x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_p64" type="checkbox"><label for="vst3q_lane_p64"><div>void <b><b>vst3q_lane_p64</b></b> (poly64_t * ptr, poly64x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_f64" type="checkbox"><label for="vst3_lane_f64"><div>void <b><b>vst3_lane_f64</b></b> (float64_t * ptr, float64x1x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_f64" type="checkbox"><label for="vst3q_lane_f64"><div>void <b><b>vst3q_lane_f64</b></b> (float64_t * ptr, float64x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_s16" type="checkbox"><label for="vst4_lane_s16"><div>void <b><b>vst4_lane_s16</b></b> (int16_t * ptr, int16x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4H <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_s16" type="checkbox"><label for="vst4q_lane_s16"><div>void <b><b>vst4q_lane_s16</b></b> (int16_t * ptr, int16x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8H <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_s32" type="checkbox"><label for="vst4_lane_s32"><div>void <b><b>vst4_lane_s32</b></b> (int32_t * ptr, int32x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2S <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_s32" type="checkbox"><label for="vst4q_lane_s32"><div>void <b><b>vst4q_lane_s32</b></b> (int32_t * ptr, int32x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4S <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_u16" type="checkbox"><label for="vst4_lane_u16"><div>void <b><b>vst4_lane_u16</b></b> (uint16_t * ptr, uint16x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4H <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_u16" type="checkbox"><label for="vst4q_lane_u16"><div>void <b><b>vst4q_lane_u16</b></b> (uint16_t * ptr, uint16x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8H <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_u32" type="checkbox"><label for="vst4_lane_u32"><div>void <b><b>vst4_lane_u32</b></b> (uint32_t * ptr, uint32x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2S <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_u32" type="checkbox"><label for="vst4q_lane_u32"><div>void <b><b>vst4q_lane_u32</b></b> (uint32_t * ptr, uint32x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4S <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_f16" type="checkbox"><label for="vst4_lane_f16"><div>void <b><b>vst4_lane_f16</b></b> (float16_t * ptr, float16x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4H <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_f16" type="checkbox"><label for="vst4q_lane_f16"><div>void <b><b>vst4q_lane_f16</b></b> (float16_t * ptr, float16x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8H <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_f32" type="checkbox"><label for="vst4_lane_f32"><div>void <b><b>vst4_lane_f32</b></b> (float32_t * ptr, float32x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2S <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_f32" type="checkbox"><label for="vst4q_lane_f32"><div>void <b><b>vst4q_lane_f32</b></b> (float32_t * ptr, float32x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4S <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_p16" type="checkbox"><label for="vst4_lane_p16"><div>void <b><b>vst4_lane_p16</b></b> (poly16_t * ptr, poly16x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4H <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_p16" type="checkbox"><label for="vst4q_lane_p16"><div>void <b><b>vst4q_lane_p16</b></b> (poly16_t * ptr, poly16x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8H <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_s8" type="checkbox"><label for="vst4q_lane_s8"><div>void <b><b>vst4q_lane_s8</b></b> (int8_t * ptr, int8x16x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.16B <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_u8" type="checkbox"><label for="vst4q_lane_u8"><div>void <b><b>vst4q_lane_u8</b></b> (uint8_t * ptr, uint8x16x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.16B <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_p8" type="checkbox"><label for="vst4q_lane_p8"><div>void <b><b>vst4q_lane_p8</b></b> (poly8_t * ptr, poly8x16x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.16B <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_s64" type="checkbox"><label for="vst4_lane_s64"><div>void <b><b>vst4_lane_s64</b></b> (int64_t * ptr, int64x1x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.1D <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_s64" type="checkbox"><label for="vst4q_lane_s64"><div>void <b><b>vst4q_lane_s64</b></b> (int64_t * ptr, int64x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2D <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_u64" type="checkbox"><label for="vst4_lane_u64"><div>void <b><b>vst4_lane_u64</b></b> (uint64_t * ptr, uint64x1x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.1D <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_u64" type="checkbox"><label for="vst4q_lane_u64"><div>void <b><b>vst4q_lane_u64</b></b> (uint64_t * ptr, uint64x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2D <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_p64" type="checkbox"><label for="vst4_lane_p64"><div>void <b><b>vst4_lane_p64</b></b> (poly64_t * ptr, poly64x1x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.1D <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_p64" type="checkbox"><label for="vst4q_lane_p64"><div>void <b><b>vst4q_lane_p64</b></b> (poly64_t * ptr, poly64x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2D <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_f64" type="checkbox"><label for="vst4_lane_f64"><div>void <b><b>vst4_lane_f64</b></b> (float64_t * ptr, float64x1x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.1D <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_f64" type="checkbox"><label for="vst4q_lane_f64"><div>void <b><b>vst4q_lane_f64</b></b> (float64_t * ptr, float64x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2D <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s8_x2" type="checkbox"><label for="vst1_s8_x2"><div>void <b><b>vst1_s8_x2</b></b> (int8_t * ptr, int8x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s8_x2" type="checkbox"><label for="vst1q_s8_x2"><div>void <b><b>vst1q_s8_x2</b></b> (int8_t * ptr, int8x16x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s16_x2" type="checkbox"><label for="vst1_s16_x2"><div>void <b><b>vst1_s16_x2</b></b> (int16_t * ptr, int16x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s16_x2" type="checkbox"><label for="vst1q_s16_x2"><div>void <b><b>vst1q_s16_x2</b></b> (int16_t * ptr, int16x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s32_x2" type="checkbox"><label for="vst1_s32_x2"><div>void <b><b>vst1_s32_x2</b></b> (int32_t * ptr, int32x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s32_x2" type="checkbox"><label for="vst1q_s32_x2"><div>void <b><b>vst1q_s32_x2</b></b> (int32_t * ptr, int32x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u8_x2" type="checkbox"><label for="vst1_u8_x2"><div>void <b><b>vst1_u8_x2</b></b> (uint8_t * ptr, uint8x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u8_x2" type="checkbox"><label for="vst1q_u8_x2"><div>void <b><b>vst1q_u8_x2</b></b> (uint8_t * ptr, uint8x16x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u16_x2" type="checkbox"><label for="vst1_u16_x2"><div>void <b><b>vst1_u16_x2</b></b> (uint16_t * ptr, uint16x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u16_x2" type="checkbox"><label for="vst1q_u16_x2"><div>void <b><b>vst1q_u16_x2</b></b> (uint16_t * ptr, uint16x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u32_x2" type="checkbox"><label for="vst1_u32_x2"><div>void <b><b>vst1_u32_x2</b></b> (uint32_t * ptr, uint32x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u32_x2" type="checkbox"><label for="vst1q_u32_x2"><div>void <b><b>vst1q_u32_x2</b></b> (uint32_t * ptr, uint32x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f16_x2" type="checkbox"><label for="vst1_f16_x2"><div>void <b><b>vst1_f16_x2</b></b> (float16_t * ptr, float16x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f16_x2" type="checkbox"><label for="vst1q_f16_x2"><div>void <b><b>vst1q_f16_x2</b></b> (float16_t * ptr, float16x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f32_x2" type="checkbox"><label for="vst1_f32_x2"><div>void <b><b>vst1_f32_x2</b></b> (float32_t * ptr, float32x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f32_x2" type="checkbox"><label for="vst1q_f32_x2"><div>void <b><b>vst1q_f32_x2</b></b> (float32_t * ptr, float32x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p8_x2" type="checkbox"><label for="vst1_p8_x2"><div>void <b><b>vst1_p8_x2</b></b> (poly8_t * ptr, poly8x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p8_x2" type="checkbox"><label for="vst1q_p8_x2"><div>void <b><b>vst1q_p8_x2</b></b> (poly8_t * ptr, poly8x16x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p16_x2" type="checkbox"><label for="vst1_p16_x2"><div>void <b><b>vst1_p16_x2</b></b> (poly16_t * ptr, poly16x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p16_x2" type="checkbox"><label for="vst1q_p16_x2"><div>void <b><b>vst1q_p16_x2</b></b> (poly16_t * ptr, poly16x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s64_x2" type="checkbox"><label for="vst1_s64_x2"><div>void <b><b>vst1_s64_x2</b></b> (int64_t * ptr, int64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u64_x2" type="checkbox"><label for="vst1_u64_x2"><div>void <b><b>vst1_u64_x2</b></b> (uint64_t * ptr, uint64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p64_x2" type="checkbox"><label for="vst1_p64_x2"><div>void <b><b>vst1_p64_x2</b></b> (poly64_t * ptr, poly64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s64_x2" type="checkbox"><label for="vst1q_s64_x2"><div>void <b><b>vst1q_s64_x2</b></b> (int64_t * ptr, int64x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u64_x2" type="checkbox"><label for="vst1q_u64_x2"><div>void <b><b>vst1q_u64_x2</b></b> (uint64_t * ptr, uint64x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p64_x2" type="checkbox"><label for="vst1q_p64_x2"><div>void <b><b>vst1q_p64_x2</b></b> (poly64_t * ptr, poly64x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f64_x2" type="checkbox"><label for="vst1_f64_x2"><div>void <b><b>vst1_f64_x2</b></b> (float64_t * ptr, float64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f64_x2" type="checkbox"><label for="vst1q_f64_x2"><div>void <b><b>vst1q_f64_x2</b></b> (float64_t * ptr, float64x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s8_x3" type="checkbox"><label for="vst1_s8_x3"><div>void <b><b>vst1_s8_x3</b></b> (int8_t * ptr, int8x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s8_x3" type="checkbox"><label for="vst1q_s8_x3"><div>void <b><b>vst1q_s8_x3</b></b> (int8_t * ptr, int8x16x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s16_x3" type="checkbox"><label for="vst1_s16_x3"><div>void <b><b>vst1_s16_x3</b></b> (int16_t * ptr, int16x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s16_x3" type="checkbox"><label for="vst1q_s16_x3"><div>void <b><b>vst1q_s16_x3</b></b> (int16_t * ptr, int16x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s32_x3" type="checkbox"><label for="vst1_s32_x3"><div>void <b><b>vst1_s32_x3</b></b> (int32_t * ptr, int32x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s32_x3" type="checkbox"><label for="vst1q_s32_x3"><div>void <b><b>vst1q_s32_x3</b></b> (int32_t * ptr, int32x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u8_x3" type="checkbox"><label for="vst1_u8_x3"><div>void <b><b>vst1_u8_x3</b></b> (uint8_t * ptr, uint8x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u8_x3" type="checkbox"><label for="vst1q_u8_x3"><div>void <b><b>vst1q_u8_x3</b></b> (uint8_t * ptr, uint8x16x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u16_x3" type="checkbox"><label for="vst1_u16_x3"><div>void <b><b>vst1_u16_x3</b></b> (uint16_t * ptr, uint16x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u16_x3" type="checkbox"><label for="vst1q_u16_x3"><div>void <b><b>vst1q_u16_x3</b></b> (uint16_t * ptr, uint16x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u32_x3" type="checkbox"><label for="vst1_u32_x3"><div>void <b><b>vst1_u32_x3</b></b> (uint32_t * ptr, uint32x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u32_x3" type="checkbox"><label for="vst1q_u32_x3"><div>void <b><b>vst1q_u32_x3</b></b> (uint32_t * ptr, uint32x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f16_x3" type="checkbox"><label for="vst1_f16_x3"><div>void <b><b>vst1_f16_x3</b></b> (float16_t * ptr, float16x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f16_x3" type="checkbox"><label for="vst1q_f16_x3"><div>void <b><b>vst1q_f16_x3</b></b> (float16_t * ptr, float16x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f32_x3" type="checkbox"><label for="vst1_f32_x3"><div>void <b><b>vst1_f32_x3</b></b> (float32_t * ptr, float32x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f32_x3" type="checkbox"><label for="vst1q_f32_x3"><div>void <b><b>vst1q_f32_x3</b></b> (float32_t * ptr, float32x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p8_x3" type="checkbox"><label for="vst1_p8_x3"><div>void <b><b>vst1_p8_x3</b></b> (poly8_t * ptr, poly8x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p8_x3" type="checkbox"><label for="vst1q_p8_x3"><div>void <b><b>vst1q_p8_x3</b></b> (poly8_t * ptr, poly8x16x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p16_x3" type="checkbox"><label for="vst1_p16_x3"><div>void <b><b>vst1_p16_x3</b></b> (poly16_t * ptr, poly16x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p16_x3" type="checkbox"><label for="vst1q_p16_x3"><div>void <b><b>vst1q_p16_x3</b></b> (poly16_t * ptr, poly16x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s64_x3" type="checkbox"><label for="vst1_s64_x3"><div>void <b><b>vst1_s64_x3</b></b> (int64_t * ptr, int64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u64_x3" type="checkbox"><label for="vst1_u64_x3"><div>void <b><b>vst1_u64_x3</b></b> (uint64_t * ptr, uint64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p64_x3" type="checkbox"><label for="vst1_p64_x3"><div>void <b><b>vst1_p64_x3</b></b> (poly64_t * ptr, poly64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s64_x3" type="checkbox"><label for="vst1q_s64_x3"><div>void <b><b>vst1q_s64_x3</b></b> (int64_t * ptr, int64x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u64_x3" type="checkbox"><label for="vst1q_u64_x3"><div>void <b><b>vst1q_u64_x3</b></b> (uint64_t * ptr, uint64x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p64_x3" type="checkbox"><label for="vst1q_p64_x3"><div>void <b><b>vst1q_p64_x3</b></b> (poly64_t * ptr, poly64x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f64_x3" type="checkbox"><label for="vst1_f64_x3"><div>void <b><b>vst1_f64_x3</b></b> (float64_t * ptr, float64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f64_x3" type="checkbox"><label for="vst1q_f64_x3"><div>void <b><b>vst1q_f64_x3</b></b> (float64_t * ptr, float64x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s8_x4" type="checkbox"><label for="vst1_s8_x4"><div>void <b><b>vst1_s8_x4</b></b> (int8_t * ptr, int8x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8B <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s8_x4" type="checkbox"><label for="vst1q_s8_x4"><div>void <b><b>vst1q_s8_x4</b></b> (int8_t * ptr, int8x16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.16B <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s16_x4" type="checkbox"><label for="vst1_s16_x4"><div>void <b><b>vst1_s16_x4</b></b> (int16_t * ptr, int16x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4H <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s16_x4" type="checkbox"><label for="vst1q_s16_x4"><div>void <b><b>vst1q_s16_x4</b></b> (int16_t * ptr, int16x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8H <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s32_x4" type="checkbox"><label for="vst1_s32_x4"><div>void <b><b>vst1_s32_x4</b></b> (int32_t * ptr, int32x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2S <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s32_x4" type="checkbox"><label for="vst1q_s32_x4"><div>void <b><b>vst1q_s32_x4</b></b> (int32_t * ptr, int32x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4S <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u8_x4" type="checkbox"><label for="vst1_u8_x4"><div>void <b><b>vst1_u8_x4</b></b> (uint8_t * ptr, uint8x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8B <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u8_x4" type="checkbox"><label for="vst1q_u8_x4"><div>void <b><b>vst1q_u8_x4</b></b> (uint8_t * ptr, uint8x16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.16B <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u16_x4" type="checkbox"><label for="vst1_u16_x4"><div>void <b><b>vst1_u16_x4</b></b> (uint16_t * ptr, uint16x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4H <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u16_x4" type="checkbox"><label for="vst1q_u16_x4"><div>void <b><b>vst1q_u16_x4</b></b> (uint16_t * ptr, uint16x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8H <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u32_x4" type="checkbox"><label for="vst1_u32_x4"><div>void <b><b>vst1_u32_x4</b></b> (uint32_t * ptr, uint32x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2S <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u32_x4" type="checkbox"><label for="vst1q_u32_x4"><div>void <b><b>vst1q_u32_x4</b></b> (uint32_t * ptr, uint32x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4S <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f16_x4" type="checkbox"><label for="vst1_f16_x4"><div>void <b><b>vst1_f16_x4</b></b> (float16_t * ptr, float16x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4H <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f16_x4" type="checkbox"><label for="vst1q_f16_x4"><div>void <b><b>vst1q_f16_x4</b></b> (float16_t * ptr, float16x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8H <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f32_x4" type="checkbox"><label for="vst1_f32_x4"><div>void <b><b>vst1_f32_x4</b></b> (float32_t * ptr, float32x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2S <br />
-val.val[2] &rarr; Vt3.2S <br />
-val.val[1] &rarr; Vt2.2S <br />
-val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f32_x4" type="checkbox"><label for="vst1q_f32_x4"><div>void <b><b>vst1q_f32_x4</b></b> (float32_t * ptr, float32x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4S <br />
-val.val[2] &rarr; Vt3.4S <br />
-val.val[1] &rarr; Vt2.4S <br />
-val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p8_x4" type="checkbox"><label for="vst1_p8_x4"><div>void <b><b>vst1_p8_x4</b></b> (poly8_t * ptr, poly8x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8B <br />
-val.val[2] &rarr; Vt3.8B <br />
-val.val[1] &rarr; Vt2.8B <br />
-val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p8_x4" type="checkbox"><label for="vst1q_p8_x4"><div>void <b><b>vst1q_p8_x4</b></b> (poly8_t * ptr, poly8x16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.16B <br />
-val.val[2] &rarr; Vt3.16B <br />
-val.val[1] &rarr; Vt2.16B <br />
-val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p16_x4" type="checkbox"><label for="vst1_p16_x4"><div>void <b><b>vst1_p16_x4</b></b> (poly16_t * ptr, poly16x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.4H <br />
-val.val[2] &rarr; Vt3.4H <br />
-val.val[1] &rarr; Vt2.4H <br />
-val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p16_x4" type="checkbox"><label for="vst1q_p16_x4"><div>void <b><b>vst1q_p16_x4</b></b> (poly16_t * ptr, poly16x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.8H <br />
-val.val[2] &rarr; Vt3.8H <br />
-val.val[1] &rarr; Vt2.8H <br />
-val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s64_x4" type="checkbox"><label for="vst1_s64_x4"><div>void <b><b>vst1_s64_x4</b></b> (int64_t * ptr, int64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.1D <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u64_x4" type="checkbox"><label for="vst1_u64_x4"><div>void <b><b>vst1_u64_x4</b></b> (uint64_t * ptr, uint64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.1D <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p64_x4" type="checkbox"><label for="vst1_p64_x4"><div>void <b><b>vst1_p64_x4</b></b> (poly64_t * ptr, poly64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.1D <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s64_x4" type="checkbox"><label for="vst1q_s64_x4"><div>void <b><b>vst1q_s64_x4</b></b> (int64_t * ptr, int64x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2D <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u64_x4" type="checkbox"><label for="vst1q_u64_x4"><div>void <b><b>vst1q_u64_x4</b></b> (uint64_t * ptr, uint64x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2D <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p64_x4" type="checkbox"><label for="vst1q_p64_x4"><div>void <b><b>vst1q_p64_x4</b></b> (poly64_t * ptr, poly64x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2D <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f64_x4" type="checkbox"><label for="vst1_f64_x4"><div>void <b><b>vst1_f64_x4</b></b> (float64_t * ptr, float64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.1D <br />
-val.val[2] &rarr; Vt3.1D <br />
-val.val[1] &rarr; Vt2.1D <br />
-val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f64_x4" type="checkbox"><label for="vst1q_f64_x4"><div>void <b><b>vst1q_f64_x4</b></b> (float64_t * ptr, float64x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val.val[3] &rarr; Vt4.2D <br />
-val.val[2] &rarr; Vt3.2D <br />
-val.val[1] &rarr; Vt2.2D <br />
-val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s8_x2" type="checkbox"><label for="vld1_s8_x2"><div>int8x8x2_t <b><b>vld1_s8_x2</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s8_x2" type="checkbox"><label for="vld1q_s8_x2"><div>int8x16x2_t <b><b>vld1q_s8_x2</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s16_x2" type="checkbox"><label for="vld1_s16_x2"><div>int16x4x2_t <b><b>vld1_s16_x2</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s16_x2" type="checkbox"><label for="vld1q_s16_x2"><div>int16x8x2_t <b><b>vld1q_s16_x2</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s32_x2" type="checkbox"><label for="vld1_s32_x2"><div>int32x2x2_t <b><b>vld1_s32_x2</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s32_x2" type="checkbox"><label for="vld1q_s32_x2"><div>int32x4x2_t <b><b>vld1q_s32_x2</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u8_x2" type="checkbox"><label for="vld1_u8_x2"><div>uint8x8x2_t <b><b>vld1_u8_x2</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u8_x2" type="checkbox"><label for="vld1q_u8_x2"><div>uint8x16x2_t <b><b>vld1q_u8_x2</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u16_x2" type="checkbox"><label for="vld1_u16_x2"><div>uint16x4x2_t <b><b>vld1_u16_x2</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u16_x2" type="checkbox"><label for="vld1q_u16_x2"><div>uint16x8x2_t <b><b>vld1q_u16_x2</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u32_x2" type="checkbox"><label for="vld1_u32_x2"><div>uint32x2x2_t <b><b>vld1_u32_x2</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u32_x2" type="checkbox"><label for="vld1q_u32_x2"><div>uint32x4x2_t <b><b>vld1q_u32_x2</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f16_x2" type="checkbox"><label for="vld1_f16_x2"><div>float16x4x2_t <b><b>vld1_f16_x2</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f16_x2" type="checkbox"><label for="vld1q_f16_x2"><div>float16x8x2_t <b><b>vld1q_f16_x2</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f32_x2" type="checkbox"><label for="vld1_f32_x2"><div>float32x2x2_t <b><b>vld1_f32_x2</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt2.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f32_x2" type="checkbox"><label for="vld1q_f32_x2"><div>float32x4x2_t <b><b>vld1q_f32_x2</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt2.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p8_x2" type="checkbox"><label for="vld1_p8_x2"><div>poly8x8x2_t <b><b>vld1_p8_x2</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt2.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p8_x2" type="checkbox"><label for="vld1q_p8_x2"><div>poly8x16x2_t <b><b>vld1q_p8_x2</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt2.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p16_x2" type="checkbox"><label for="vld1_p16_x2"><div>poly16x4x2_t <b><b>vld1_p16_x2</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt2.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p16_x2" type="checkbox"><label for="vld1q_p16_x2"><div>poly16x8x2_t <b><b>vld1q_p16_x2</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt2.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s64_x2" type="checkbox"><label for="vld1_s64_x2"><div>int64x1x2_t <b><b>vld1_s64_x2</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u64_x2" type="checkbox"><label for="vld1_u64_x2"><div>uint64x1x2_t <b><b>vld1_u64_x2</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p64_x2" type="checkbox"><label for="vld1_p64_x2"><div>poly64x1x2_t <b><b>vld1_p64_x2</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s64_x2" type="checkbox"><label for="vld1q_s64_x2"><div>int64x2x2_t <b><b>vld1q_s64_x2</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u64_x2" type="checkbox"><label for="vld1q_u64_x2"><div>uint64x2x2_t <b><b>vld1q_u64_x2</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p64_x2" type="checkbox"><label for="vld1q_p64_x2"><div>poly64x2x2_t <b><b>vld1q_p64_x2</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f64_x2" type="checkbox"><label for="vld1_f64_x2"><div>float64x1x2_t <b><b>vld1_f64_x2</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f64_x2" type="checkbox"><label for="vld1q_f64_x2"><div>float64x2x2_t <b><b>vld1q_f64_x2</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt2.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s8_x3" type="checkbox"><label for="vld1_s8_x3"><div>int8x8x3_t <b><b>vld1_s8_x3</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s8_x3" type="checkbox"><label for="vld1q_s8_x3"><div>int8x16x3_t <b><b>vld1q_s8_x3</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s16_x3" type="checkbox"><label for="vld1_s16_x3"><div>int16x4x3_t <b><b>vld1_s16_x3</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s16_x3" type="checkbox"><label for="vld1q_s16_x3"><div>int16x8x3_t <b><b>vld1q_s16_x3</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s32_x3" type="checkbox"><label for="vld1_s32_x3"><div>int32x2x3_t <b><b>vld1_s32_x3</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s32_x3" type="checkbox"><label for="vld1q_s32_x3"><div>int32x4x3_t <b><b>vld1q_s32_x3</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u8_x3" type="checkbox"><label for="vld1_u8_x3"><div>uint8x8x3_t <b><b>vld1_u8_x3</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u8_x3" type="checkbox"><label for="vld1q_u8_x3"><div>uint8x16x3_t <b><b>vld1q_u8_x3</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u16_x3" type="checkbox"><label for="vld1_u16_x3"><div>uint16x4x3_t <b><b>vld1_u16_x3</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u16_x3" type="checkbox"><label for="vld1q_u16_x3"><div>uint16x8x3_t <b><b>vld1q_u16_x3</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u32_x3" type="checkbox"><label for="vld1_u32_x3"><div>uint32x2x3_t <b><b>vld1_u32_x3</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u32_x3" type="checkbox"><label for="vld1q_u32_x3"><div>uint32x4x3_t <b><b>vld1q_u32_x3</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f16_x3" type="checkbox"><label for="vld1_f16_x3"><div>float16x4x3_t <b><b>vld1_f16_x3</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f16_x3" type="checkbox"><label for="vld1q_f16_x3"><div>float16x8x3_t <b><b>vld1q_f16_x3</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f32_x3" type="checkbox"><label for="vld1_f32_x3"><div>float32x2x3_t <b><b>vld1_f32_x3</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt3.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f32_x3" type="checkbox"><label for="vld1q_f32_x3"><div>float32x4x3_t <b><b>vld1q_f32_x3</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt3.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p8_x3" type="checkbox"><label for="vld1_p8_x3"><div>poly8x8x3_t <b><b>vld1_p8_x3</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt3.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p8_x3" type="checkbox"><label for="vld1q_p8_x3"><div>poly8x16x3_t <b><b>vld1q_p8_x3</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt3.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p16_x3" type="checkbox"><label for="vld1_p16_x3"><div>poly16x4x3_t <b><b>vld1_p16_x3</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt3.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p16_x3" type="checkbox"><label for="vld1q_p16_x3"><div>poly16x8x3_t <b><b>vld1q_p16_x3</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt3.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s64_x3" type="checkbox"><label for="vld1_s64_x3"><div>int64x1x3_t <b><b>vld1_s64_x3</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u64_x3" type="checkbox"><label for="vld1_u64_x3"><div>uint64x1x3_t <b><b>vld1_u64_x3</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p64_x3" type="checkbox"><label for="vld1_p64_x3"><div>poly64x1x3_t <b><b>vld1_p64_x3</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s64_x3" type="checkbox"><label for="vld1q_s64_x3"><div>int64x2x3_t <b><b>vld1q_s64_x3</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u64_x3" type="checkbox"><label for="vld1q_u64_x3"><div>uint64x2x3_t <b><b>vld1q_u64_x3</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p64_x3" type="checkbox"><label for="vld1q_p64_x3"><div>poly64x2x3_t <b><b>vld1q_p64_x3</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f64_x3" type="checkbox"><label for="vld1_f64_x3"><div>float64x1x3_t <b><b>vld1_f64_x3</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f64_x3" type="checkbox"><label for="vld1q_f64_x3"><div>float64x2x3_t <b><b>vld1q_f64_x3</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt3.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s8_x4" type="checkbox"><label for="vld1_s8_x4"><div>int8x8x4_t <b><b>vld1_s8_x4</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
-Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s8_x4" type="checkbox"><label for="vld1q_s8_x4"><div>int8x16x4_t <b><b>vld1q_s8_x4</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
-Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s16_x4" type="checkbox"><label for="vld1_s16_x4"><div>int16x4x4_t <b><b>vld1_s16_x4</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s16_x4" type="checkbox"><label for="vld1q_s16_x4"><div>int16x8x4_t <b><b>vld1q_s16_x4</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s32_x4" type="checkbox"><label for="vld1_s32_x4"><div>int32x2x4_t <b><b>vld1_s32_x4</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
-Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s32_x4" type="checkbox"><label for="vld1q_s32_x4"><div>int32x4x4_t <b><b>vld1q_s32_x4</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
-Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u8_x4" type="checkbox"><label for="vld1_u8_x4"><div>uint8x8x4_t <b><b>vld1_u8_x4</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
-Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u8_x4" type="checkbox"><label for="vld1q_u8_x4"><div>uint8x16x4_t <b><b>vld1q_u8_x4</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
-Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u16_x4" type="checkbox"><label for="vld1_u16_x4"><div>uint16x4x4_t <b><b>vld1_u16_x4</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u16_x4" type="checkbox"><label for="vld1q_u16_x4"><div>uint16x8x4_t <b><b>vld1q_u16_x4</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u32_x4" type="checkbox"><label for="vld1_u32_x4"><div>uint32x2x4_t <b><b>vld1_u32_x4</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
-Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u32_x4" type="checkbox"><label for="vld1q_u32_x4"><div>uint32x4x4_t <b><b>vld1q_u32_x4</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
-Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f16_x4" type="checkbox"><label for="vld1_f16_x4"><div>float16x4x4_t <b><b>vld1_f16_x4</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f16_x4" type="checkbox"><label for="vld1q_f16_x4"><div>float16x8x4_t <b><b>vld1q_f16_x4</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f32_x4" type="checkbox"><label for="vld1_f32_x4"><div>float32x2x4_t <b><b>vld1_f32_x4</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt4.2S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
-Vt3.2S &rarr; result.val[2]
-Vt2.2S &rarr; result.val[1]
-Vt.2S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f32_x4" type="checkbox"><label for="vld1q_f32_x4"><div>float32x4x4_t <b><b>vld1q_f32_x4</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt4.4S},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
-Vt3.4S &rarr; result.val[2]
-Vt2.4S &rarr; result.val[1]
-Vt.4S &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p8_x4" type="checkbox"><label for="vld1_p8_x4"><div>poly8x8x4_t <b><b>vld1_p8_x4</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt4.8B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
-Vt3.8B &rarr; result.val[2]
-Vt2.8B &rarr; result.val[1]
-Vt.8B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p8_x4" type="checkbox"><label for="vld1q_p8_x4"><div>poly8x16x4_t <b><b>vld1q_p8_x4</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt4.16B},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
-Vt3.16B &rarr; result.val[2]
-Vt2.16B &rarr; result.val[1]
-Vt.16B &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p16_x4" type="checkbox"><label for="vld1_p16_x4"><div>poly16x4x4_t <b><b>vld1_p16_x4</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt4.4H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
-Vt3.4H &rarr; result.val[2]
-Vt2.4H &rarr; result.val[1]
-Vt.4H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p16_x4" type="checkbox"><label for="vld1q_p16_x4"><div>poly16x8x4_t <b><b>vld1q_p16_x4</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt4.8H},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
-Vt3.8H &rarr; result.val[2]
-Vt2.8H &rarr; result.val[1]
-Vt.8H &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s64_x4" type="checkbox"><label for="vld1_s64_x4"><div>int64x1x4_t <b><b>vld1_s64_x4</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u64_x4" type="checkbox"><label for="vld1_u64_x4"><div>uint64x1x4_t <b><b>vld1_u64_x4</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p64_x4" type="checkbox"><label for="vld1_p64_x4"><div>poly64x1x4_t <b><b>vld1_p64_x4</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s64_x4" type="checkbox"><label for="vld1q_s64_x4"><div>int64x2x4_t <b><b>vld1q_s64_x4</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u64_x4" type="checkbox"><label for="vld1q_u64_x4"><div>uint64x2x4_t <b><b>vld1q_u64_x4</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p64_x4" type="checkbox"><label for="vld1q_p64_x4"><div>poly64x2x4_t <b><b>vld1q_p64_x4</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f64_x4" type="checkbox"><label for="vld1_f64_x4"><div>float64x1x4_t <b><b>vld1_f64_x4</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
-Vt3.1D &rarr; result.val[2]
-Vt2.1D &rarr; result.val[1]
-Vt.1D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f64_x4" type="checkbox"><label for="vld1q_f64_x4"><div>float64x2x4_t <b><b>vld1q_f64_x4</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt4.2D},[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
-Vt3.2D &rarr; result.val[2]
-Vt2.2D &rarr; result.val[1]
-Vt.2D &rarr; result.val[0]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-
-bits(64) address;
-bits(64) offs;
-bits(128) rval;
-bits(esize) element;
-constant integer ebytes = esize DIV 8;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-if replicate then
-    // load and replicate to all elements
-    for s = 0 to selem-1
-        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        // replicate to fill 128- or 64-bit register
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-else
-    // load/store one element per register
-    for s = 0 to selem-1
-        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
-            // insert into one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
-        else // memop == MemOp_STORE
-            // extract from one lane of 128-bit register
-            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
-        offs = offs + ebytes;
-        t = (t + 1) MOD 32;
-
-if wback then
-    if m != 31 then
-        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
-    if n == 31 then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_s8" type="checkbox"><label for="vpadd_s8"><div>int8x8_t <b><b>vpadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_s16" type="checkbox"><label for="vpadd_s16"><div>int16x4_t <b><b>vpadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_s32" type="checkbox"><label for="vpadd_s32"><div>int32x2_t <b><b>vpadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_u8" type="checkbox"><label for="vpadd_u8"><div>uint8x8_t <b><b>vpadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_u16" type="checkbox"><label for="vpadd_u16"><div>uint16x4_t <b><b>vpadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_u32" type="checkbox"><label for="vpadd_u32"><div>uint32x2_t <b><b>vpadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_f32" type="checkbox"><label for="vpadd_f32"><div>float32x2_t <b><b>vpadd_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_s8" type="checkbox"><label for="vpaddq_s8"><div>int8x16_t <b><b>vpaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_s16" type="checkbox"><label for="vpaddq_s16"><div>int16x8_t <b><b>vpaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_s32" type="checkbox"><label for="vpaddq_s32"><div>int32x4_t <b><b>vpaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_s64" type="checkbox"><label for="vpaddq_s64"><div>int64x2_t <b><b>vpaddq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_u8" type="checkbox"><label for="vpaddq_u8"><div>uint8x16_t <b><b>vpaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_u16" type="checkbox"><label for="vpaddq_u16"><div>uint16x8_t <b><b>vpaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_u32" type="checkbox"><label for="vpaddq_u32"><div>uint32x4_t <b><b>vpaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_u64" type="checkbox"><label for="vpaddq_u64"><div>uint64x2_t <b><b>vpaddq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_f32" type="checkbox"><label for="vpaddq_f32"><div>float32x4_t <b><b>vpaddq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_f64" type="checkbox"><label for="vpaddq_f64"><div>float64x2_t <b><b>vpaddq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_s8" type="checkbox"><label for="vpaddl_s8"><div>int16x4_t <b><b>vpaddl_s8</b></b> (int8x8_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.4H,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_s8" type="checkbox"><label for="vpaddlq_s8"><div>int16x8_t <b><b>vpaddlq_s8</b></b> (int8x16_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.8H,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_s16" type="checkbox"><label for="vpaddl_s16"><div>int32x2_t <b><b>vpaddl_s16</b></b> (int16x4_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.2S,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_s16" type="checkbox"><label for="vpaddlq_s16"><div>int32x4_t <b><b>vpaddlq_s16</b></b> (int16x8_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.4S,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_s32" type="checkbox"><label for="vpaddl_s32"><div>int64x1_t <b><b>vpaddl_s32</b></b> (int32x2_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.1D,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_s32" type="checkbox"><label for="vpaddlq_s32"><div>int64x2_t <b><b>vpaddlq_s32</b></b> (int32x4_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.2D,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_u8" type="checkbox"><label for="vpaddl_u8"><div>uint16x4_t <b><b>vpaddl_u8</b></b> (uint8x8_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.4H,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_u8" type="checkbox"><label for="vpaddlq_u8"><div>uint16x8_t <b><b>vpaddlq_u8</b></b> (uint8x16_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.8H,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_u16" type="checkbox"><label for="vpaddl_u16"><div>uint32x2_t <b><b>vpaddl_u16</b></b> (uint16x4_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.2S,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_u16" type="checkbox"><label for="vpaddlq_u16"><div>uint32x4_t <b><b>vpaddlq_u16</b></b> (uint16x8_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.4S,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_u32" type="checkbox"><label for="vpaddl_u32"><div>uint64x1_t <b><b>vpaddl_u32</b></b> (uint32x2_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.1D,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_u32" type="checkbox"><label for="vpaddlq_u32"><div>uint64x2_t <b><b>vpaddlq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.2D,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_s8" type="checkbox"><label for="vpadal_s8"><div>int16x4_t <b><b>vpadal_s8</b></b> (int16x4_t a, int8x8_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.4H,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_s8" type="checkbox"><label for="vpadalq_s8"><div>int16x8_t <b><b>vpadalq_s8</b></b> (int16x8_t a, int8x16_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.8H,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_s16" type="checkbox"><label for="vpadal_s16"><div>int32x2_t <b><b>vpadal_s16</b></b> (int32x2_t a, int16x4_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.2S,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_s16" type="checkbox"><label for="vpadalq_s16"><div>int32x4_t <b><b>vpadalq_s16</b></b> (int32x4_t a, int16x8_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.4S,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_s32" type="checkbox"><label for="vpadal_s32"><div>int64x1_t <b><b>vpadal_s32</b></b> (int64x1_t a, int32x2_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.1D,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D <br />
-b &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_s32" type="checkbox"><label for="vpadalq_s32"><div>int64x2_t <b><b>vpadalq_s32</b></b> (int64x2_t a, int32x4_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.2D,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_u8" type="checkbox"><label for="vpadal_u8"><div>uint16x4_t <b><b>vpadal_u8</b></b> (uint16x4_t a, uint8x8_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.4H,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
-b &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_u8" type="checkbox"><label for="vpadalq_u8"><div>uint16x8_t <b><b>vpadalq_u8</b></b> (uint16x8_t a, uint8x16_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.8H,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
-b &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_u16" type="checkbox"><label for="vpadal_u16"><div>uint32x2_t <b><b>vpadal_u16</b></b> (uint32x2_t a, uint16x4_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.2S,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_u16" type="checkbox"><label for="vpadalq_u16"><div>uint32x4_t <b><b>vpadalq_u16</b></b> (uint32x4_t a, uint16x8_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.4S,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_u32" type="checkbox"><label for="vpadal_u32"><div>uint64x1_t <b><b>vpadal_u32</b></b> (uint64x1_t a, uint32x2_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.1D,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D <br />
-b &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_u32" type="checkbox"><label for="vpadalq_u32"><div>uint64x2_t <b><b>vpadalq_u32</b></b> (uint64x2_t a, uint32x4_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.2D,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_s8" type="checkbox"><label for="vpmax_s8"><div>int8x8_t <b><b>vpmax_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_s16" type="checkbox"><label for="vpmax_s16"><div>int16x4_t <b><b>vpmax_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_s32" type="checkbox"><label for="vpmax_s32"><div>int32x2_t <b><b>vpmax_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_u8" type="checkbox"><label for="vpmax_u8"><div>uint8x8_t <b><b>vpmax_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_u16" type="checkbox"><label for="vpmax_u16"><div>uint16x4_t <b><b>vpmax_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_u32" type="checkbox"><label for="vpmax_u32"><div>uint32x2_t <b><b>vpmax_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_f32" type="checkbox"><label for="vpmax_f32"><div>float32x2_t <b><b>vpmax_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_s8" type="checkbox"><label for="vpmaxq_s8"><div>int8x16_t <b><b>vpmaxq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_s16" type="checkbox"><label for="vpmaxq_s16"><div>int16x8_t <b><b>vpmaxq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_s32" type="checkbox"><label for="vpmaxq_s32"><div>int32x4_t <b><b>vpmaxq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_u8" type="checkbox"><label for="vpmaxq_u8"><div>uint8x16_t <b><b>vpmaxq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_u16" type="checkbox"><label for="vpmaxq_u16"><div>uint16x8_t <b><b>vpmaxq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_u32" type="checkbox"><label for="vpmaxq_u32"><div>uint32x4_t <b><b>vpmaxq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_f32" type="checkbox"><label for="vpmaxq_f32"><div>float32x4_t <b><b>vpmaxq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_f64" type="checkbox"><label for="vpmaxq_f64"><div>float64x2_t <b><b>vpmaxq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_s8" type="checkbox"><label for="vpmin_s8"><div>int8x8_t <b><b>vpmin_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_s16" type="checkbox"><label for="vpmin_s16"><div>int16x4_t <b><b>vpmin_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_s32" type="checkbox"><label for="vpmin_s32"><div>int32x2_t <b><b>vpmin_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_u8" type="checkbox"><label for="vpmin_u8"><div>uint8x8_t <b><b>vpmin_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_u16" type="checkbox"><label for="vpmin_u16"><div>uint16x4_t <b><b>vpmin_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_u32" type="checkbox"><label for="vpmin_u32"><div>uint32x2_t <b><b>vpmin_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_f32" type="checkbox"><label for="vpmin_f32"><div>float32x2_t <b><b>vpmin_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_s8" type="checkbox"><label for="vpminq_s8"><div>int8x16_t <b><b>vpminq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_s16" type="checkbox"><label for="vpminq_s16"><div>int16x8_t <b><b>vpminq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_s32" type="checkbox"><label for="vpminq_s32"><div>int32x4_t <b><b>vpminq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_u8" type="checkbox"><label for="vpminq_u8"><div>uint8x16_t <b><b>vpminq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_u16" type="checkbox"><label for="vpminq_u16"><div>uint16x8_t <b><b>vpminq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_u32" type="checkbox"><label for="vpminq_u32"><div>uint32x4_t <b><b>vpminq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_f32" type="checkbox"><label for="vpminq_f32"><div>float32x4_t <b><b>vpminq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_f64" type="checkbox"><label for="vpminq_f64"><div>float64x2_t <b><b>vpminq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnm_f32" type="checkbox"><label for="vpmaxnm_f32"><div>float32x2_t <b><b>vpmaxnm_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnmq_f32" type="checkbox"><label for="vpmaxnmq_f32"><div>float32x4_t <b><b>vpmaxnmq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnmq_f64" type="checkbox"><label for="vpmaxnmq_f64"><div>float64x2_t <b><b>vpmaxnmq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnm_f32" type="checkbox"><label for="vpminnm_f32"><div>float32x2_t <b><b>vpminnm_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnmq_f32" type="checkbox"><label for="vpminnmq_f32"><div>float32x4_t <b><b>vpminnmq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnmq_f64" type="checkbox"><label for="vpminnmq_f64"><div>float64x2_t <b><b>vpminnmq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddd_s64" type="checkbox"><label for="vpaddd_s64"><div>int64_t <b><b>vpaddd_s64</b></b> (int64x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddd_u64" type="checkbox"><label for="vpaddd_u64"><div>uint64_t <b><b>vpaddd_u64</b></b> (uint64x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpadds_f32" type="checkbox"><label for="vpadds_f32"><div>float32_t <b><b>vpadds_f32</b></b> (float32x2_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Sd,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddd_f64" type="checkbox"><label for="vpaddd_f64"><div>float64_t <b><b>vpaddd_f64</b></b> (float64x2_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxs_f32" type="checkbox"><label for="vpmaxs_f32"><div>float32_t <b><b>vpmaxs_f32</b></b> (float32x2_t a)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Sd,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxqd_f64" type="checkbox"><label for="vpmaxqd_f64"><div>float64_t <b><b>vpmaxqd_f64</b></b> (float64x2_t a)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmins_f32" type="checkbox"><label for="vpmins_f32"><div>float32_t <b><b>vpmins_f32</b></b> (float32x2_t a)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Sd,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminqd_f64" type="checkbox"><label for="vpminqd_f64"><div>float64_t <b><b>vpminqd_f64</b></b> (float64x2_t a)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnms_f32" type="checkbox"><label for="vpmaxnms_f32"><div>float32_t <b><b>vpmaxnms_f32</b></b> (float32x2_t a)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Sd,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnmqd_f64" type="checkbox"><label for="vpmaxnmqd_f64"><div>float64_t <b><b>vpmaxnmqd_f64</b></b> (float64x2_t a)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnms_f32" type="checkbox"><label for="vpminnms_f32"><div>float32_t <b><b>vpminnms_f32</b></b> (float32x2_t a)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Sd,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnmqd_f64" type="checkbox"><label for="vpminnmqd_f64"><div>float64_t <b><b>vpminnmqd_f64</b></b> (float64x2_t a)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_s8" type="checkbox"><label for="vaddv_s8"><div>int8_t <b><b>vaddv_s8</b></b> (int8x8_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Bd,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_s8" type="checkbox"><label for="vaddvq_s8"><div>int8_t <b><b>vaddvq_s8</b></b> (int8x16_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Bd,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_s16" type="checkbox"><label for="vaddv_s16"><div>int16_t <b><b>vaddv_s16</b></b> (int16x4_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Hd,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_s16" type="checkbox"><label for="vaddvq_s16"><div>int16_t <b><b>vaddvq_s16</b></b> (int16x8_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Hd,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_s32" type="checkbox"><label for="vaddv_s32"><div>int32_t <b><b>vaddv_s32</b></b> (int32x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a>  Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_s32" type="checkbox"><label for="vaddvq_s32"><div>int32_t <b><b>vaddvq_s32</b></b> (int32x4_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Sd,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_s64" type="checkbox"><label for="vaddvq_s64"><div>int64_t <b><b>vaddvq_s64</b></b> (int64x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_u8" type="checkbox"><label for="vaddv_u8"><div>uint8_t <b><b>vaddv_u8</b></b> (uint8x8_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Bd,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_u8" type="checkbox"><label for="vaddvq_u8"><div>uint8_t <b><b>vaddvq_u8</b></b> (uint8x16_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Bd,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_u16" type="checkbox"><label for="vaddv_u16"><div>uint16_t <b><b>vaddv_u16</b></b> (uint16x4_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Hd,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_u16" type="checkbox"><label for="vaddvq_u16"><div>uint16_t <b><b>vaddvq_u16</b></b> (uint16x8_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Hd,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_u32" type="checkbox"><label for="vaddv_u32"><div>uint32_t <b><b>vaddv_u32</b></b> (uint32x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a>  Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_u32" type="checkbox"><label for="vaddvq_u32"><div>uint32_t <b><b>vaddvq_u32</b></b> (uint32x4_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Sd,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_u64" type="checkbox"><label for="vaddvq_u64"><div>uint64_t <b><b>vaddvq_u64</b></b> (uint64x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_f32" type="checkbox"><label for="vaddv_f32"><div>float32_t <b><b>vaddv_f32</b></b> (float32x2_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Sd,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_f32" type="checkbox"><label for="vaddvq_f32"><div>float32_t <b><b>vaddvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Vt.4S,Vn.4S,Vm.4S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Sd,Vt.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-a &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_f64" type="checkbox"><label for="vaddvq_f64"><div>float64_t <b><b>vaddvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_s8" type="checkbox"><label for="vaddlv_s8"><div>int16_t <b><b>vaddlv_s8</b></b> (int8x8_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Hd,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer sum;
-
-sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_s8" type="checkbox"><label for="vaddlvq_s8"><div>int16_t <b><b>vaddlvq_s8</b></b> (int8x16_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Hd,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer sum;
-
-sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_s16" type="checkbox"><label for="vaddlv_s16"><div>int32_t <b><b>vaddlv_s16</b></b> (int16x4_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Sd,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer sum;
-
-sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_s16" type="checkbox"><label for="vaddlvq_s16"><div>int32_t <b><b>vaddlvq_s16</b></b> (int16x8_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Sd,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer sum;
-
-sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_s32" type="checkbox"><label for="vaddlv_s32"><div>int64_t <b><b>vaddlv_s32</b></b> (int32x2_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.1D,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_s32" type="checkbox"><label for="vaddlvq_s32"><div>int64_t <b><b>vaddlvq_s32</b></b> (int32x4_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Dd,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer sum;
-
-sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_u8" type="checkbox"><label for="vaddlv_u8"><div>uint16_t <b><b>vaddlv_u8</b></b> (uint8x8_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Hd,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer sum;
-
-sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_u8" type="checkbox"><label for="vaddlvq_u8"><div>uint16_t <b><b>vaddlvq_u8</b></b> (uint8x16_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Hd,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer sum;
-
-sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_u16" type="checkbox"><label for="vaddlv_u16"><div>uint32_t <b><b>vaddlv_u16</b></b> (uint16x4_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Sd,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer sum;
-
-sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_u16" type="checkbox"><label for="vaddlvq_u16"><div>uint32_t <b><b>vaddlvq_u16</b></b> (uint16x8_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Sd,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer sum;
-
-sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_u32" type="checkbox"><label for="vaddlv_u32"><div>uint64_t <b><b>vaddlv_u32</b></b> (uint32x2_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.1D,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-
-bits(2*esize) sum;
-integer op1;
-integer op2;
-
-result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-for e = 0 to elements-1
-    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
-    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
-    sum = (op1+op2)&lt;2*esize-1:0&gt;;
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_u32" type="checkbox"><label for="vaddlvq_u32"><div>uint64_t <b><b>vaddlvq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Dd,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer sum;
-
-sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_s8" type="checkbox"><label for="vmaxv_s8"><div>int8_t <b><b>vmaxv_s8</b></b> (int8x8_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Bd,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_s8" type="checkbox"><label for="vmaxvq_s8"><div>int8_t <b><b>vmaxvq_s8</b></b> (int8x16_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Bd,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_s16" type="checkbox"><label for="vmaxv_s16"><div>int16_t <b><b>vmaxv_s16</b></b> (int16x4_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Hd,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_s16" type="checkbox"><label for="vmaxvq_s16"><div>int16_t <b><b>vmaxvq_s16</b></b> (int16x8_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Hd,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_s32" type="checkbox"><label for="vmaxv_s32"><div>int32_t <b><b>vmaxv_s32</b></b> (int32x2_t a)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a>  Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_s32" type="checkbox"><label for="vmaxvq_s32"><div>int32_t <b><b>vmaxvq_s32</b></b> (int32x4_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Sd,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_u8" type="checkbox"><label for="vmaxv_u8"><div>uint8_t <b><b>vmaxv_u8</b></b> (uint8x8_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Bd,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_u8" type="checkbox"><label for="vmaxvq_u8"><div>uint8_t <b><b>vmaxvq_u8</b></b> (uint8x16_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Bd,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_u16" type="checkbox"><label for="vmaxv_u16"><div>uint16_t <b><b>vmaxv_u16</b></b> (uint16x4_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Hd,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_u16" type="checkbox"><label for="vmaxvq_u16"><div>uint16_t <b><b>vmaxvq_u16</b></b> (uint16x8_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Hd,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_u32" type="checkbox"><label for="vmaxv_u32"><div>uint32_t <b><b>vmaxv_u32</b></b> (uint32x2_t a)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a>  Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_u32" type="checkbox"><label for="vmaxvq_u32"><div>uint32_t <b><b>vmaxvq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Sd,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_f32" type="checkbox"><label for="vmaxv_f32"><div>float32_t <b><b>vmaxv_f32</b></b> (float32x2_t a)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Sd,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_f32" type="checkbox"><label for="vmaxvq_f32"><div>float32_t <b><b>vmaxvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxv-floating-point-maximum-across-vector">FMAXV</a> Sd,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_FMAX" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_FMAX</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_f64" type="checkbox"><label for="vmaxvq_f64"><div>float64_t <b><b>vmaxvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_s8" type="checkbox"><label for="vminv_s8"><div>int8_t <b><b>vminv_s8</b></b> (int8x8_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Bd,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_s8" type="checkbox"><label for="vminvq_s8"><div>int8_t <b><b>vminvq_s8</b></b> (int8x16_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Bd,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_s16" type="checkbox"><label for="vminv_s16"><div>int16_t <b><b>vminv_s16</b></b> (int16x4_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Hd,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_s16" type="checkbox"><label for="vminvq_s16"><div>int16_t <b><b>vminvq_s16</b></b> (int16x8_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Hd,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_s32" type="checkbox"><label for="vminv_s32"><div>int32_t <b><b>vminv_s32</b></b> (int32x2_t a)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a>  Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_s32" type="checkbox"><label for="vminvq_s32"><div>int32_t <b><b>vminvq_s32</b></b> (int32x4_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Sd,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_u8" type="checkbox"><label for="vminv_u8"><div>uint8_t <b><b>vminv_u8</b></b> (uint8x8_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Bd,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_u8" type="checkbox"><label for="vminvq_u8"><div>uint8_t <b><b>vminvq_u8</b></b> (uint8x16_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Bd,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_u16" type="checkbox"><label for="vminv_u16"><div>uint16_t <b><b>vminv_u16</b></b> (uint16x4_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Hd,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_u16" type="checkbox"><label for="vminvq_u16"><div>uint16_t <b><b>vminvq_u16</b></b> (uint16x8_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Hd,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_u32" type="checkbox"><label for="vminv_u32"><div>uint32_t <b><b>vminv_u32</b></b> (uint32x2_t a)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a>  Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-integer element1;
-integer element2;
-integer maxmin;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
-    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_u32" type="checkbox"><label for="vminvq_u32"><div>uint32_t <b><b>vminvq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Sd,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-integer maxmin;
-integer element;
-
-maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
-for e = 1 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
-    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_f32" type="checkbox"><label for="vminv_f32"><div>float32_t <b><b>vminv_f32</b></b> (float32x2_t a)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Sd,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_f32" type="checkbox"><label for="vminvq_f32"><div>float32_t <b><b>vminvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminv-floating-point-minimum-across-vector">FMINV</a> Sd,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_FMIN" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_FMIN</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_f64" type="checkbox"><label for="vminvq_f64"><div>float64_t <b><b>vminvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmv_f32" type="checkbox"><label for="vmaxnmv_f32"><div>float32_t <b><b>vmaxnmv_f32</b></b> (float32x2_t a)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Sd,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmvq_f32" type="checkbox"><label for="vmaxnmvq_f32"><div>float32_t <b><b>vmaxnmvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point maximum number across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmv-floating-point-maximum-number-across-vector">FMAXNMV</a> Sd,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_FMAXNUM" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_FMAXNUM</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmvq_f64" type="checkbox"><label for="vmaxnmvq_f64"><div>float64_t <b><b>vmaxnmvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a>  Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnmv_f32" type="checkbox"><label for="vminnmv_f32"><div>float32_t <b><b>vminnmv_f32</b></b> (float32x2_t a)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a>  Sd,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnmvq_f32" type="checkbox"><label for="vminnmvq_f32"><div>float32_t <b><b>vminnmvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point minimum number across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmv-floating-point-minimum-number-across-vector">FMINNMV</a> Sd,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_FMINNUM" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
- ReduceOp_FMIN, ReduceOp_FMAX,
- ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_FMINNUM</a>, operand, esize);</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnmvq_f64" type="checkbox"><label for="vminnmvq_f64"><div>float64_t <b><b>vminnmvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a>  Dd,Vn.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-bits(2*datasize) concat = operand2:operand1;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    if pair then
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
-    else
-        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-
-    if minimum then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
-    else
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vext_s8" type="checkbox"><label for="vext_s8"><div>int8x8_t <b><b>vext_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_s8" type="checkbox"><label for="vextq_s8"><div>int8x16_t <b><b>vextq_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_s16" type="checkbox"><label for="vext_s16"><div>int16x4_t <b><b>vext_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<1)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B <br />
-0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_s16" type="checkbox"><label for="vextq_s16"><div>int16x8_t <b><b>vextq_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<1)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_s32" type="checkbox"><label for="vext_s32"><div>int32x2_t <b><b>vext_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<2)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B <br />
-0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_s32" type="checkbox"><label for="vextq_s32"><div>int32x4_t <b><b>vextq_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<2)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B <br />
-0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_s64" type="checkbox"><label for="vext_s64"><div>int64x1_t <b><b>vext_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<3)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B <br />
-0 &lt;&lt; n &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_s64" type="checkbox"><label for="vextq_s64"><div>int64x2_t <b><b>vextq_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<3)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B <br />
-0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_u8" type="checkbox"><label for="vext_u8"><div>uint8x8_t <b><b>vext_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_u8" type="checkbox"><label for="vextq_u8"><div>uint8x16_t <b><b>vextq_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_u16" type="checkbox"><label for="vext_u16"><div>uint16x4_t <b><b>vext_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<1)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B <br />
-0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_u16" type="checkbox"><label for="vextq_u16"><div>uint16x8_t <b><b>vextq_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<1)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_u32" type="checkbox"><label for="vext_u32"><div>uint32x2_t <b><b>vext_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<2)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B <br />
-0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_u32" type="checkbox"><label for="vextq_u32"><div>uint32x4_t <b><b>vextq_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<2)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B <br />
-0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_u64" type="checkbox"><label for="vext_u64"><div>uint64x1_t <b><b>vext_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<3)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B <br />
-0 &lt;&lt; n &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_u64" type="checkbox"><label for="vextq_u64"><div>uint64x2_t <b><b>vextq_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<3)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B <br />
-0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_p64" type="checkbox"><label for="vext_p64"><div>poly64x1_t <b><b>vext_p64</b></b> (poly64x1_t a, poly64x1_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<3)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B <br />
-0 &lt;&lt; n &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_p64" type="checkbox"><label for="vextq_p64"><div>poly64x2_t <b><b>vextq_p64</b></b> (poly64x2_t a, poly64x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<3)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B <br />
-0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_f32" type="checkbox"><label for="vext_f32"><div>float32x2_t <b><b>vext_f32</b></b> (float32x2_t a, float32x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<2)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B <br />
-0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_f32" type="checkbox"><label for="vextq_f32"><div>float32x4_t <b><b>vextq_f32</b></b> (float32x4_t a, float32x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<2)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B <br />
-0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_f64" type="checkbox"><label for="vext_f64"><div>float64x1_t <b><b>vext_f64</b></b> (float64x1_t a, float64x1_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<3)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B <br />
-0 &lt;&lt; n &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vextq_f64" type="checkbox"><label for="vextq_f64"><div>float64x2_t <b><b>vextq_f64</b></b> (float64x2_t a, float64x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<3)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B <br />
-0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vext_p8" type="checkbox"><label for="vext_p8"><div>poly8x8_t <b><b>vext_p8</b></b> (poly8x8_t a, poly8x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_p8" type="checkbox"><label for="vextq_p8"><div>poly8x16_t <b><b>vextq_p8</b></b> (poly8x16_t a, poly8x16_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#n
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B <br />
-0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_p16" type="checkbox"><label for="vext_p16"><div>poly16x4_t <b><b>vext_p16</b></b> (poly16x4_t a, poly16x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<1)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B <br />
-0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_p16" type="checkbox"><label for="vextq_p16"><div>poly16x8_t <b><b>vextq_p16</b></b> (poly16x8_t a, poly16x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<1)
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B <br />
-0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize*2) concat = hi:lo;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_s8" type="checkbox"><label for="vrev64_s8"><div>int8x8_t <b><b>vrev64_s8</b></b> (int8x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_s8" type="checkbox"><label for="vrev64q_s8"><div>int8x16_t <b><b>vrev64q_s8</b></b> (int8x16_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_s16" type="checkbox"><label for="vrev64_s16"><div>int16x4_t <b><b>vrev64_s16</b></b> (int16x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_s16" type="checkbox"><label for="vrev64q_s16"><div>int16x8_t <b><b>vrev64q_s16</b></b> (int16x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_s32" type="checkbox"><label for="vrev64_s32"><div>int32x2_t <b><b>vrev64_s32</b></b> (int32x2_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_s32" type="checkbox"><label for="vrev64q_s32"><div>int32x4_t <b><b>vrev64q_s32</b></b> (int32x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_u8" type="checkbox"><label for="vrev64_u8"><div>uint8x8_t <b><b>vrev64_u8</b></b> (uint8x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_u8" type="checkbox"><label for="vrev64q_u8"><div>uint8x16_t <b><b>vrev64q_u8</b></b> (uint8x16_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_u16" type="checkbox"><label for="vrev64_u16"><div>uint16x4_t <b><b>vrev64_u16</b></b> (uint16x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_u16" type="checkbox"><label for="vrev64q_u16"><div>uint16x8_t <b><b>vrev64q_u16</b></b> (uint16x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_u32" type="checkbox"><label for="vrev64_u32"><div>uint32x2_t <b><b>vrev64_u32</b></b> (uint32x2_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_u32" type="checkbox"><label for="vrev64q_u32"><div>uint32x4_t <b><b>vrev64q_u32</b></b> (uint32x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_f32" type="checkbox"><label for="vrev64_f32"><div>float32x2_t <b><b>vrev64_f32</b></b> (float32x2_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.2S,Vn.2S
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_f32" type="checkbox"><label for="vrev64q_f32"><div>float32x4_t <b><b>vrev64q_f32</b></b> (float32x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_p8" type="checkbox"><label for="vrev64_p8"><div>poly8x8_t <b><b>vrev64_p8</b></b> (poly8x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_p8" type="checkbox"><label for="vrev64q_p8"><div>poly8x16_t <b><b>vrev64q_p8</b></b> (poly8x16_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_p16" type="checkbox"><label for="vrev64_p16"><div>poly16x4_t <b><b>vrev64_p16</b></b> (poly16x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_p16" type="checkbox"><label for="vrev64q_p16"><div>poly16x8_t <b><b>vrev64q_p16</b></b> (poly16x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_s8" type="checkbox"><label for="vrev32_s8"><div>int8x8_t <b><b>vrev32_s8</b></b> (int8x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_s8" type="checkbox"><label for="vrev32q_s8"><div>int8x16_t <b><b>vrev32q_s8</b></b> (int8x16_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_s16" type="checkbox"><label for="vrev32_s16"><div>int16x4_t <b><b>vrev32_s16</b></b> (int16x4_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_s16" type="checkbox"><label for="vrev32q_s16"><div>int16x8_t <b><b>vrev32q_s16</b></b> (int16x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_u8" type="checkbox"><label for="vrev32_u8"><div>uint8x8_t <b><b>vrev32_u8</b></b> (uint8x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_u8" type="checkbox"><label for="vrev32q_u8"><div>uint8x16_t <b><b>vrev32q_u8</b></b> (uint8x16_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_u16" type="checkbox"><label for="vrev32_u16"><div>uint16x4_t <b><b>vrev32_u16</b></b> (uint16x4_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_u16" type="checkbox"><label for="vrev32q_u16"><div>uint16x8_t <b><b>vrev32q_u16</b></b> (uint16x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_p8" type="checkbox"><label for="vrev32_p8"><div>poly8x8_t <b><b>vrev32_p8</b></b> (poly8x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_p8" type="checkbox"><label for="vrev32q_p8"><div>poly8x16_t <b><b>vrev32q_p8</b></b> (poly8x16_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_p16" type="checkbox"><label for="vrev32_p16"><div>poly16x4_t <b><b>vrev32_p16</b></b> (poly16x4_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.4H,Vn.4H
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_p16" type="checkbox"><label for="vrev32q_p16"><div>poly16x8_t <b><b>vrev32q_p16</b></b> (poly16x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8H,Vn.8H
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16_s8" type="checkbox"><label for="vrev16_s8"><div>int8x8_t <b><b>vrev16_s8</b></b> (int8x8_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16q_s8" type="checkbox"><label for="vrev16q_s8"><div>int8x16_t <b><b>vrev16q_s8</b></b> (int8x16_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16_u8" type="checkbox"><label for="vrev16_u8"><div>uint8x8_t <b><b>vrev16_u8</b></b> (uint8x8_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16q_u8" type="checkbox"><label for="vrev16q_u8"><div>uint8x16_t <b><b>vrev16q_u8</b></b> (uint8x16_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16_p8" type="checkbox"><label for="vrev16_p8"><div>poly8x8_t <b><b>vrev16_p8</b></b> (poly8x8_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.8B,Vn.8B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16q_p8" type="checkbox"><label for="vrev16q_p8"><div>poly8x16_t <b><b>vrev16q_p8</b></b> (poly8x16_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-integer element = 0;
-integer rev_element;
-for c = 0 to containers-1
-    rev_element = element + elements_per_container - 1;
-    for e = 0 to elements_per_container-1
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
-        element = element + 1;
-        rev_element = rev_element - 1;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_s8" type="checkbox"><label for="vzip1_s8"><div>int8x8_t <b><b>vzip1_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_s8" type="checkbox"><label for="vzip1q_s8"><div>int8x16_t <b><b>vzip1q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_s16" type="checkbox"><label for="vzip1_s16"><div>int16x4_t <b><b>vzip1_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_s16" type="checkbox"><label for="vzip1q_s16"><div>int16x8_t <b><b>vzip1q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_s32" type="checkbox"><label for="vzip1_s32"><div>int32x2_t <b><b>vzip1_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_s32" type="checkbox"><label for="vzip1q_s32"><div>int32x4_t <b><b>vzip1q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_s64" type="checkbox"><label for="vzip1q_s64"><div>int64x2_t <b><b>vzip1q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_u8" type="checkbox"><label for="vzip1_u8"><div>uint8x8_t <b><b>vzip1_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_u8" type="checkbox"><label for="vzip1q_u8"><div>uint8x16_t <b><b>vzip1q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_u16" type="checkbox"><label for="vzip1_u16"><div>uint16x4_t <b><b>vzip1_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_u16" type="checkbox"><label for="vzip1q_u16"><div>uint16x8_t <b><b>vzip1q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_u32" type="checkbox"><label for="vzip1_u32"><div>uint32x2_t <b><b>vzip1_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_u32" type="checkbox"><label for="vzip1q_u32"><div>uint32x4_t <b><b>vzip1q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_u64" type="checkbox"><label for="vzip1q_u64"><div>uint64x2_t <b><b>vzip1q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_p64" type="checkbox"><label for="vzip1q_p64"><div>poly64x2_t <b><b>vzip1q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_f32" type="checkbox"><label for="vzip1_f32"><div>float32x2_t <b><b>vzip1_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_f32" type="checkbox"><label for="vzip1q_f32"><div>float32x4_t <b><b>vzip1q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_f64" type="checkbox"><label for="vzip1q_f64"><div>float64x2_t <b><b>vzip1q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_p8" type="checkbox"><label for="vzip1_p8"><div>poly8x8_t <b><b>vzip1_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_p8" type="checkbox"><label for="vzip1q_p8"><div>poly8x16_t <b><b>vzip1q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_p16" type="checkbox"><label for="vzip1_p16"><div>poly16x4_t <b><b>vzip1_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_p16" type="checkbox"><label for="vzip1q_p16"><div>poly16x8_t <b><b>vzip1q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_s8" type="checkbox"><label for="vzip2_s8"><div>int8x8_t <b><b>vzip2_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_s8" type="checkbox"><label for="vzip2q_s8"><div>int8x16_t <b><b>vzip2q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_s16" type="checkbox"><label for="vzip2_s16"><div>int16x4_t <b><b>vzip2_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_s16" type="checkbox"><label for="vzip2q_s16"><div>int16x8_t <b><b>vzip2q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_s32" type="checkbox"><label for="vzip2_s32"><div>int32x2_t <b><b>vzip2_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_s32" type="checkbox"><label for="vzip2q_s32"><div>int32x4_t <b><b>vzip2q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_s64" type="checkbox"><label for="vzip2q_s64"><div>int64x2_t <b><b>vzip2q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_u8" type="checkbox"><label for="vzip2_u8"><div>uint8x8_t <b><b>vzip2_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_u8" type="checkbox"><label for="vzip2q_u8"><div>uint8x16_t <b><b>vzip2q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_u16" type="checkbox"><label for="vzip2_u16"><div>uint16x4_t <b><b>vzip2_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_u16" type="checkbox"><label for="vzip2q_u16"><div>uint16x8_t <b><b>vzip2q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_u32" type="checkbox"><label for="vzip2_u32"><div>uint32x2_t <b><b>vzip2_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_u32" type="checkbox"><label for="vzip2q_u32"><div>uint32x4_t <b><b>vzip2q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_u64" type="checkbox"><label for="vzip2q_u64"><div>uint64x2_t <b><b>vzip2q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_p64" type="checkbox"><label for="vzip2q_p64"><div>poly64x2_t <b><b>vzip2q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_f32" type="checkbox"><label for="vzip2_f32"><div>float32x2_t <b><b>vzip2_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_f32" type="checkbox"><label for="vzip2q_f32"><div>float32x4_t <b><b>vzip2q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_f64" type="checkbox"><label for="vzip2q_f64"><div>float64x2_t <b><b>vzip2q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_p8" type="checkbox"><label for="vzip2_p8"><div>poly8x8_t <b><b>vzip2_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_p8" type="checkbox"><label for="vzip2q_p8"><div>poly8x16_t <b><b>vzip2q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_p16" type="checkbox"><label for="vzip2_p16"><div>poly16x4_t <b><b>vzip2_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_p16" type="checkbox"><label for="vzip2q_p16"><div>poly16x8_t <b><b>vzip2q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_s8" type="checkbox"><label for="vuzp1_s8"><div>int8x8_t <b><b>vuzp1_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_s8" type="checkbox"><label for="vuzp1q_s8"><div>int8x16_t <b><b>vuzp1q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_s16" type="checkbox"><label for="vuzp1_s16"><div>int16x4_t <b><b>vuzp1_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_s16" type="checkbox"><label for="vuzp1q_s16"><div>int16x8_t <b><b>vuzp1q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_s32" type="checkbox"><label for="vuzp1_s32"><div>int32x2_t <b><b>vuzp1_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_s32" type="checkbox"><label for="vuzp1q_s32"><div>int32x4_t <b><b>vuzp1q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_s64" type="checkbox"><label for="vuzp1q_s64"><div>int64x2_t <b><b>vuzp1q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_u8" type="checkbox"><label for="vuzp1_u8"><div>uint8x8_t <b><b>vuzp1_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_u8" type="checkbox"><label for="vuzp1q_u8"><div>uint8x16_t <b><b>vuzp1q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_u16" type="checkbox"><label for="vuzp1_u16"><div>uint16x4_t <b><b>vuzp1_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_u16" type="checkbox"><label for="vuzp1q_u16"><div>uint16x8_t <b><b>vuzp1q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_u32" type="checkbox"><label for="vuzp1_u32"><div>uint32x2_t <b><b>vuzp1_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_u32" type="checkbox"><label for="vuzp1q_u32"><div>uint32x4_t <b><b>vuzp1q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_u64" type="checkbox"><label for="vuzp1q_u64"><div>uint64x2_t <b><b>vuzp1q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_p64" type="checkbox"><label for="vuzp1q_p64"><div>poly64x2_t <b><b>vuzp1q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_f32" type="checkbox"><label for="vuzp1_f32"><div>float32x2_t <b><b>vuzp1_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_f32" type="checkbox"><label for="vuzp1q_f32"><div>float32x4_t <b><b>vuzp1q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_f64" type="checkbox"><label for="vuzp1q_f64"><div>float64x2_t <b><b>vuzp1q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_p8" type="checkbox"><label for="vuzp1_p8"><div>poly8x8_t <b><b>vuzp1_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_p8" type="checkbox"><label for="vuzp1q_p8"><div>poly8x16_t <b><b>vuzp1q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_p16" type="checkbox"><label for="vuzp1_p16"><div>poly16x4_t <b><b>vuzp1_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_p16" type="checkbox"><label for="vuzp1q_p16"><div>poly16x8_t <b><b>vuzp1q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_s8" type="checkbox"><label for="vuzp2_s8"><div>int8x8_t <b><b>vuzp2_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_s8" type="checkbox"><label for="vuzp2q_s8"><div>int8x16_t <b><b>vuzp2q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_s16" type="checkbox"><label for="vuzp2_s16"><div>int16x4_t <b><b>vuzp2_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_s16" type="checkbox"><label for="vuzp2q_s16"><div>int16x8_t <b><b>vuzp2q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_s32" type="checkbox"><label for="vuzp2_s32"><div>int32x2_t <b><b>vuzp2_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_s32" type="checkbox"><label for="vuzp2q_s32"><div>int32x4_t <b><b>vuzp2q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_s64" type="checkbox"><label for="vuzp2q_s64"><div>int64x2_t <b><b>vuzp2q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_u8" type="checkbox"><label for="vuzp2_u8"><div>uint8x8_t <b><b>vuzp2_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_u8" type="checkbox"><label for="vuzp2q_u8"><div>uint8x16_t <b><b>vuzp2q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_u16" type="checkbox"><label for="vuzp2_u16"><div>uint16x4_t <b><b>vuzp2_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_u16" type="checkbox"><label for="vuzp2q_u16"><div>uint16x8_t <b><b>vuzp2q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_u32" type="checkbox"><label for="vuzp2_u32"><div>uint32x2_t <b><b>vuzp2_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_u32" type="checkbox"><label for="vuzp2q_u32"><div>uint32x4_t <b><b>vuzp2q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_u64" type="checkbox"><label for="vuzp2q_u64"><div>uint64x2_t <b><b>vuzp2q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_p64" type="checkbox"><label for="vuzp2q_p64"><div>poly64x2_t <b><b>vuzp2q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_f32" type="checkbox"><label for="vuzp2_f32"><div>float32x2_t <b><b>vuzp2_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_f32" type="checkbox"><label for="vuzp2q_f32"><div>float32x4_t <b><b>vuzp2q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_f64" type="checkbox"><label for="vuzp2q_f64"><div>float64x2_t <b><b>vuzp2q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_p8" type="checkbox"><label for="vuzp2_p8"><div>poly8x8_t <b><b>vuzp2_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_p8" type="checkbox"><label for="vuzp2q_p8"><div>poly8x16_t <b><b>vuzp2q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_p16" type="checkbox"><label for="vuzp2_p16"><div>poly16x4_t <b><b>vuzp2_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_p16" type="checkbox"><label for="vuzp2q_p16"><div>poly16x8_t <b><b>vuzp2q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_s8" type="checkbox"><label for="vtrn1_s8"><div>int8x8_t <b><b>vtrn1_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_s8" type="checkbox"><label for="vtrn1q_s8"><div>int8x16_t <b><b>vtrn1q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_s16" type="checkbox"><label for="vtrn1_s16"><div>int16x4_t <b><b>vtrn1_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_s16" type="checkbox"><label for="vtrn1q_s16"><div>int16x8_t <b><b>vtrn1q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_s32" type="checkbox"><label for="vtrn1_s32"><div>int32x2_t <b><b>vtrn1_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_s32" type="checkbox"><label for="vtrn1q_s32"><div>int32x4_t <b><b>vtrn1q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_s64" type="checkbox"><label for="vtrn1q_s64"><div>int64x2_t <b><b>vtrn1q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_u8" type="checkbox"><label for="vtrn1_u8"><div>uint8x8_t <b><b>vtrn1_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_u8" type="checkbox"><label for="vtrn1q_u8"><div>uint8x16_t <b><b>vtrn1q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_u16" type="checkbox"><label for="vtrn1_u16"><div>uint16x4_t <b><b>vtrn1_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_u16" type="checkbox"><label for="vtrn1q_u16"><div>uint16x8_t <b><b>vtrn1q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_u32" type="checkbox"><label for="vtrn1_u32"><div>uint32x2_t <b><b>vtrn1_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_u32" type="checkbox"><label for="vtrn1q_u32"><div>uint32x4_t <b><b>vtrn1q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_u64" type="checkbox"><label for="vtrn1q_u64"><div>uint64x2_t <b><b>vtrn1q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_p64" type="checkbox"><label for="vtrn1q_p64"><div>poly64x2_t <b><b>vtrn1q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_f32" type="checkbox"><label for="vtrn1_f32"><div>float32x2_t <b><b>vtrn1_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_f32" type="checkbox"><label for="vtrn1q_f32"><div>float32x4_t <b><b>vtrn1q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_f64" type="checkbox"><label for="vtrn1q_f64"><div>float64x2_t <b><b>vtrn1q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_p8" type="checkbox"><label for="vtrn1_p8"><div>poly8x8_t <b><b>vtrn1_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_p8" type="checkbox"><label for="vtrn1q_p8"><div>poly8x16_t <b><b>vtrn1q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_p16" type="checkbox"><label for="vtrn1_p16"><div>poly16x4_t <b><b>vtrn1_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_p16" type="checkbox"><label for="vtrn1q_p16"><div>poly16x8_t <b><b>vtrn1q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_s8" type="checkbox"><label for="vtrn2_s8"><div>int8x8_t <b><b>vtrn2_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_s8" type="checkbox"><label for="vtrn2q_s8"><div>int8x16_t <b><b>vtrn2q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_s16" type="checkbox"><label for="vtrn2_s16"><div>int16x4_t <b><b>vtrn2_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_s16" type="checkbox"><label for="vtrn2q_s16"><div>int16x8_t <b><b>vtrn2q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_s32" type="checkbox"><label for="vtrn2_s32"><div>int32x2_t <b><b>vtrn2_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_s32" type="checkbox"><label for="vtrn2q_s32"><div>int32x4_t <b><b>vtrn2q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_s64" type="checkbox"><label for="vtrn2q_s64"><div>int64x2_t <b><b>vtrn2q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_u8" type="checkbox"><label for="vtrn2_u8"><div>uint8x8_t <b><b>vtrn2_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_u8" type="checkbox"><label for="vtrn2q_u8"><div>uint8x16_t <b><b>vtrn2q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_u16" type="checkbox"><label for="vtrn2_u16"><div>uint16x4_t <b><b>vtrn2_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_u16" type="checkbox"><label for="vtrn2q_u16"><div>uint16x8_t <b><b>vtrn2q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_u32" type="checkbox"><label for="vtrn2_u32"><div>uint32x2_t <b><b>vtrn2_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_u32" type="checkbox"><label for="vtrn2q_u32"><div>uint32x4_t <b><b>vtrn2q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_u64" type="checkbox"><label for="vtrn2q_u64"><div>uint64x2_t <b><b>vtrn2q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_p64" type="checkbox"><label for="vtrn2q_p64"><div>poly64x2_t <b><b>vtrn2q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_f32" type="checkbox"><label for="vtrn2_f32"><div>float32x2_t <b><b>vtrn2_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_f32" type="checkbox"><label for="vtrn2q_f32"><div>float32x4_t <b><b>vtrn2q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_f64" type="checkbox"><label for="vtrn2q_f64"><div>float64x2_t <b><b>vtrn2q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2D,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_p8" type="checkbox"><label for="vtrn2_p8"><div>poly8x8_t <b><b>vtrn2_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_p8" type="checkbox"><label for="vtrn2q_p8"><div>poly8x16_t <b><b>vtrn2q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_p16" type="checkbox"><label for="vtrn2_p16"><div>poly16x4_t <b><b>vtrn2_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_p16" type="checkbox"><label for="vtrn2q_p16"><div>poly16x8_t <b><b>vtrn2q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtbl1_s8" type="checkbox"><label for="vtbl1_s8"><div>int8x8_t <b><b>vtbl1_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; Zeros(64):a <br />
-b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl1_u8" type="checkbox"><label for="vtbl1_u8"><div>uint8x8_t <b><b>vtbl1_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; Zeros(64):a <br />
-b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl1_p8" type="checkbox"><label for="vtbl1_p8"><div>poly8x8_t <b><b>vtbl1_p8</b></b> (poly8x8_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; Zeros(64):a <br />
-b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx1_s8" type="checkbox"><label for="vtbx1_s8"><div>int8x8_t <b><b>vtbx1_s8</b></b> (int8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#8
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B},Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
-Vn &rarr; Zeros(64):b <br />
-c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx1_u8" type="checkbox"><label for="vtbx1_u8"><div>uint8x8_t <b><b>vtbx1_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#8
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B},Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
-Vn &rarr; Zeros(64):b <br />
-c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx1_p8" type="checkbox"><label for="vtbx1_p8"><div>poly8x8_t <b><b>vtbx1_p8</b></b> (poly8x8_t a, poly8x8_t b, uint8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#8
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B},Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B, Vtmp.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
-Vn &rarr; Zeros(64):b <br />
-c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl2_s8" type="checkbox"><label for="vtbl2_s8"><div>int8x8_t <b><b>vtbl2_s8</b></b> (int8x8x2_t a, int8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
-b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl2_u8" type="checkbox"><label for="vtbl2_u8"><div>uint8x8_t <b><b>vtbl2_u8</b></b> (uint8x8x2_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
-b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl2_p8" type="checkbox"><label for="vtbl2_p8"><div>poly8x8_t <b><b>vtbl2_p8</b></b> (poly8x8x2_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
-b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl3_s8" type="checkbox"><label for="vtbl3_s8"><div>int8x8_t <b><b>vtbl3_s8</b></b> (int8x8x3_t a, int8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
-Vn+1 &rarr; Zeros(64):a.val[2] <br />
-b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl3_u8" type="checkbox"><label for="vtbl3_u8"><div>uint8x8_t <b><b>vtbl3_u8</b></b> (uint8x8x3_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
-Vn+1 &rarr; Zeros(64):a.val[2] <br />
-b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl3_p8" type="checkbox"><label for="vtbl3_p8"><div>poly8x8_t <b><b>vtbl3_p8</b></b> (poly8x8x3_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
-Vn+1 &rarr; Zeros(64):a.val[2] <br />
-b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl4_s8" type="checkbox"><label for="vtbl4_s8"><div>int8x8_t <b><b>vtbl4_s8</b></b> (int8x8x4_t a, int8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
-Vn+1 &rarr; a.val[3]:a.val[2] <br />
-b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl4_u8" type="checkbox"><label for="vtbl4_u8"><div>uint8x8_t <b><b>vtbl4_u8</b></b> (uint8x8x4_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
-Vn+1 &rarr; a.val[3]:a.val[2] <br />
-b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl4_p8" type="checkbox"><label for="vtbl4_p8"><div>poly8x8_t <b><b>vtbl4_p8</b></b> (poly8x8x4_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
-Vn+1 &rarr; a.val[3]:a.val[2] <br />
-b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx2_s8" type="checkbox"><label for="vtbx2_s8"><div>int8x8_t <b><b>vtbx2_s8</b></b> (int8x8_t a, int8x8x2_t b, int8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
-Vn &rarr; b.val[1]:b.val[0] <br />
-c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx2_u8" type="checkbox"><label for="vtbx2_u8"><div>uint8x8_t <b><b>vtbx2_u8</b></b> (uint8x8_t a, uint8x8x2_t b, uint8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
-Vn &rarr; b.val[1]:b.val[0] <br />
-c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx2_p8" type="checkbox"><label for="vtbx2_p8"><div>poly8x8_t <b><b>vtbx2_p8</b></b> (poly8x8_t a, poly8x8x2_t b, uint8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
-Vn &rarr; b.val[1]:b.val[0] <br />
-c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx3_s8" type="checkbox"><label for="vtbx3_s8"><div>int8x8_t <b><b>vtbx3_s8</b></b> (int8x8_t a, int8x8x3_t b, int8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#24
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B,Vn+1.16B},Vm.8
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
-Vn &rarr; b.val[1]:b.val[0] <br />
-Vn+1 &rarr; Zeros(64):b.val[2] <br />
-c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx3_u8" type="checkbox"><label for="vtbx3_u8"><div>uint8x8_t <b><b>vtbx3_u8</b></b> (uint8x8_t a, uint8x8x3_t b, uint8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#24
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B,Vn+1.16B},Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
-Vn &rarr; b.val[1]:b.val[0] <br />
-Vn+1 &rarr; Zeros(64):b.val[2] <br />
-c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx3_p8" type="checkbox"><label for="vtbx3_p8"><div>poly8x8_t <b><b>vtbx3_p8</b></b> (poly8x8_t a, poly8x8x3_t b, uint8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#24
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B,Vn+1.16B},Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
-Vn &rarr; b.val[1]:b.val[0] <br />
-Vn+1 &rarr; Zeros(64):b.val[2] <br />
-c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1;
-bits(datasize) operand3;
-bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx4_s8" type="checkbox"><label for="vtbx4_s8"><div>int8x8_t <b><b>vtbx4_s8</b></b> (int8x8_t a, int8x8x4_t b, int8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
-Vn &rarr; b.val[1]:b.val[0] <br />
-Vn+1 &rarr; b.val[3]:b.val[2] <br />
-c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx4_u8" type="checkbox"><label for="vtbx4_u8"><div>uint8x8_t <b><b>vtbx4_u8</b></b> (uint8x8_t a, uint8x8x4_t b, uint8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
-Vn &rarr; b.val[1]:b.val[0] <br />
-Vn+1 &rarr; b.val[3]:b.val[2] <br />
-c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx4_p8" type="checkbox"><label for="vtbx4_p8"><div>poly8x8_t <b><b>vtbx4_p8</b></b> (poly8x8_t a, poly8x8x4_t b, uint8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
-Vn &rarr; b.val[1]:b.val[0] <br />
-Vn+1 &rarr; b.val[3]:b.val[2] <br />
-c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1_s8" type="checkbox"><label for="vqtbl1_s8"><div>int8x8_t <b><b>vqtbl1_s8</b></b> (int8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1q_s8" type="checkbox"><label for="vqtbl1q_s8"><div>int8x16_t <b><b>vqtbl1q_s8</b></b> (int8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1_u8" type="checkbox"><label for="vqtbl1_u8"><div>uint8x8_t <b><b>vqtbl1_u8</b></b> (uint8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1q_u8" type="checkbox"><label for="vqtbl1q_u8"><div>uint8x16_t <b><b>vqtbl1q_u8</b></b> (uint8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1_p8" type="checkbox"><label for="vqtbl1_p8"><div>poly8x8_t <b><b>vqtbl1_p8</b></b> (poly8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1q_p8" type="checkbox"><label for="vqtbl1q_p8"><div>poly8x16_t <b><b>vqtbl1q_p8</b></b> (poly8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1_s8" type="checkbox"><label for="vqtbx1_s8"><div>int8x8_t <b><b>vqtbx1_s8</b></b> (int8x8_t a, int8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-t &rarr; Vn.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1q_s8" type="checkbox"><label for="vqtbx1q_s8"><div>int8x16_t <b><b>vqtbx1q_s8</b></b> (int8x16_t a, int8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-t &rarr; Vn.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1_u8" type="checkbox"><label for="vqtbx1_u8"><div>uint8x8_t <b><b>vqtbx1_u8</b></b> (uint8x8_t a, uint8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-t &rarr; Vn.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1q_u8" type="checkbox"><label for="vqtbx1q_u8"><div>uint8x16_t <b><b>vqtbx1q_u8</b></b> (uint8x16_t a, uint8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-t &rarr; Vn.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1_p8" type="checkbox"><label for="vqtbx1_p8"><div>poly8x8_t <b><b>vqtbx1_p8</b></b> (poly8x8_t a, poly8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-t &rarr; Vn.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1q_p8" type="checkbox"><label for="vqtbx1q_p8"><div>poly8x16_t <b><b>vqtbx1q_p8</b></b> (poly8x16_t a, poly8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-t &rarr; Vn.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2_s8" type="checkbox"><label for="vqtbl2_s8"><div>int8x8_t <b><b>vqtbl2_s8</b></b> (int8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2q_s8" type="checkbox"><label for="vqtbl2q_s8"><div>int8x16_t <b><b>vqtbl2q_s8</b></b> (int8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2_u8" type="checkbox"><label for="vqtbl2_u8"><div>uint8x8_t <b><b>vqtbl2_u8</b></b> (uint8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2q_u8" type="checkbox"><label for="vqtbl2q_u8"><div>uint8x16_t <b><b>vqtbl2q_u8</b></b> (uint8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2_p8" type="checkbox"><label for="vqtbl2_p8"><div>poly8x8_t <b><b>vqtbl2_p8</b></b> (poly8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2q_p8" type="checkbox"><label for="vqtbl2q_p8"><div>poly8x16_t <b><b>vqtbl2q_p8</b></b> (poly8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3_s8" type="checkbox"><label for="vqtbl3_s8"><div>int8x8_t <b><b>vqtbl3_s8</b></b> (int8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3q_s8" type="checkbox"><label for="vqtbl3q_s8"><div>int8x16_t <b><b>vqtbl3q_s8</b></b> (int8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3_u8" type="checkbox"><label for="vqtbl3_u8"><div>uint8x8_t <b><b>vqtbl3_u8</b></b> (uint8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3q_u8" type="checkbox"><label for="vqtbl3q_u8"><div>uint8x16_t <b><b>vqtbl3q_u8</b></b> (uint8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3_p8" type="checkbox"><label for="vqtbl3_p8"><div>poly8x8_t <b><b>vqtbl3_p8</b></b> (poly8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3q_p8" type="checkbox"><label for="vqtbl3q_p8"><div>poly8x16_t <b><b>vqtbl3q_p8</b></b> (poly8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4_s8" type="checkbox"><label for="vqtbl4_s8"><div>int8x8_t <b><b>vqtbl4_s8</b></b> (int8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-t.val[3] &rarr; Vn+3.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4q_s8" type="checkbox"><label for="vqtbl4q_s8"><div>int8x16_t <b><b>vqtbl4q_s8</b></b> (int8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-t.val[3] &rarr; Vn+3.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4_u8" type="checkbox"><label for="vqtbl4_u8"><div>uint8x8_t <b><b>vqtbl4_u8</b></b> (uint8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-t.val[3] &rarr; Vn+3.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4q_u8" type="checkbox"><label for="vqtbl4q_u8"><div>uint8x16_t <b><b>vqtbl4q_u8</b></b> (uint8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-t.val[3] &rarr; Vn+3.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4_p8" type="checkbox"><label for="vqtbl4_p8"><div>poly8x8_t <b><b>vqtbl4_p8</b></b> (poly8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-t.val[3] &rarr; Vn+3.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4q_p8" type="checkbox"><label for="vqtbl4q_p8"><div>poly8x16_t <b><b>vqtbl4q_p8</b></b> (poly8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-t.val[3] &rarr; Vn+3.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2_s8" type="checkbox"><label for="vqtbx2_s8"><div>int8x8_t <b><b>vqtbx2_s8</b></b> (int8x8_t a, int8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2q_s8" type="checkbox"><label for="vqtbx2q_s8"><div>int8x16_t <b><b>vqtbx2q_s8</b></b> (int8x16_t a, int8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2_u8" type="checkbox"><label for="vqtbx2_u8"><div>uint8x8_t <b><b>vqtbx2_u8</b></b> (uint8x8_t a, uint8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2q_u8" type="checkbox"><label for="vqtbx2q_u8"><div>uint8x16_t <b><b>vqtbx2q_u8</b></b> (uint8x16_t a, uint8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2_p8" type="checkbox"><label for="vqtbx2_p8"><div>poly8x8_t <b><b>vqtbx2_p8</b></b> (poly8x8_t a, poly8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2q_p8" type="checkbox"><label for="vqtbx2q_p8"><div>poly8x16_t <b><b>vqtbx2q_p8</b></b> (poly8x16_t a, poly8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3_s8" type="checkbox"><label for="vqtbx3_s8"><div>int8x8_t <b><b>vqtbx3_s8</b></b> (int8x8_t a, int8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3q_s8" type="checkbox"><label for="vqtbx3q_s8"><div>int8x16_t <b><b>vqtbx3q_s8</b></b> (int8x16_t a, int8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3_u8" type="checkbox"><label for="vqtbx3_u8"><div>uint8x8_t <b><b>vqtbx3_u8</b></b> (uint8x8_t a, uint8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3q_u8" type="checkbox"><label for="vqtbx3q_u8"><div>uint8x16_t <b><b>vqtbx3q_u8</b></b> (uint8x16_t a, uint8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3_p8" type="checkbox"><label for="vqtbx3_p8"><div>poly8x8_t <b><b>vqtbx3_p8</b></b> (poly8x8_t a, poly8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3q_p8" type="checkbox"><label for="vqtbx3q_p8"><div>poly8x16_t <b><b>vqtbx3q_p8</b></b> (poly8x16_t a, poly8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4_s8" type="checkbox"><label for="vqtbx4_s8"><div>int8x8_t <b><b>vqtbx4_s8</b></b> (int8x8_t a, int8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-t.val[3] &rarr; Vn+3.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4q_s8" type="checkbox"><label for="vqtbx4q_s8"><div>int8x16_t <b><b>vqtbx4q_s8</b></b> (int8x16_t a, int8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-t.val[3] &rarr; Vn+3.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4_u8" type="checkbox"><label for="vqtbx4_u8"><div>uint8x8_t <b><b>vqtbx4_u8</b></b> (uint8x8_t a, uint8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-t.val[3] &rarr; Vn+3.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4q_u8" type="checkbox"><label for="vqtbx4q_u8"><div>uint8x16_t <b><b>vqtbx4q_u8</b></b> (uint8x16_t a, uint8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-t.val[3] &rarr; Vn+3.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4_p8" type="checkbox"><label for="vqtbx4_p8"><div>poly8x8_t <b><b>vqtbx4_p8</b></b> (poly8x8_t a, poly8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-t.val[3] &rarr; Vn+3.16B <br />
-idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4q_p8" type="checkbox"><label for="vqtbx4q_p8"><div>poly8x16_t <b><b>vqtbx4q_p8</b></b> (poly8x16_t a, poly8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
-t.val[0] &rarr; Vn.16B <br />
-t.val[1] &rarr; Vn+1.16B <br />
-t.val[2] &rarr; Vn+2.16B <br />
-t.val[3] &rarr; Vn+3.16B <br />
-idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
-bits(datasize) result;
-integer index;
-
-// Create table from registers
-for i = 0 to regs-1
-    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-    n = (n + 1) MOD 32;
-
-result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-for i = 0 to elements-1
-    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
-    if index &lt; 16 * regs then
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_u8" type="checkbox"><label for="vget_lane_u8"><div>uint8_t <b><b>vget_lane_u8</b></b> (uint8x8_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_u16" type="checkbox"><label for="vget_lane_u16"><div>uint16_t <b><b>vget_lane_u16</b></b> (uint16x4_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_u32" type="checkbox"><label for="vget_lane_u32"><div>uint32_t <b><b>vget_lane_u32</b></b> (uint32x2_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_u64" type="checkbox"><label for="vget_lane_u64"><div>uint64_t <b><b>vget_lane_u64</b></b> (uint64x1_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_p64" type="checkbox"><label for="vget_lane_p64"><div>poly64_t <b><b>vget_lane_p64</b></b> (poly64x1_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_s8" type="checkbox"><label for="vget_lane_s8"><div>int8_t <b><b>vget_lane_s8</b></b> (int8x8_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_s16" type="checkbox"><label for="vget_lane_s16"><div>int16_t <b><b>vget_lane_s16</b></b> (int16x4_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_s32" type="checkbox"><label for="vget_lane_s32"><div>int32_t <b><b>vget_lane_s32</b></b> (int32x2_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_s64" type="checkbox"><label for="vget_lane_s64"><div>int64_t <b><b>vget_lane_s64</b></b> (int64x1_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_p8" type="checkbox"><label for="vget_lane_p8"><div>poly8_t <b><b>vget_lane_p8</b></b> (poly8x8_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_p16" type="checkbox"><label for="vget_lane_p16"><div>poly16_t <b><b>vget_lane_p16</b></b> (poly16x4_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_f32" type="checkbox"><label for="vget_lane_f32"><div>float32_t <b><b>vget_lane_f32</b></b> (float32x2_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_f64" type="checkbox"><label for="vget_lane_f64"><div>float64_t <b><b>vget_lane_f64</b></b> (float64x1_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_u8" type="checkbox"><label for="vgetq_lane_u8"><div>uint8_t <b><b>vgetq_lane_u8</b></b> (uint8x16_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_u16" type="checkbox"><label for="vgetq_lane_u16"><div>uint16_t <b><b>vgetq_lane_u16</b></b> (uint16x8_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_u32" type="checkbox"><label for="vgetq_lane_u32"><div>uint32_t <b><b>vgetq_lane_u32</b></b> (uint32x4_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_u64" type="checkbox"><label for="vgetq_lane_u64"><div>uint64_t <b><b>vgetq_lane_u64</b></b> (uint64x2_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_p64" type="checkbox"><label for="vgetq_lane_p64"><div>poly64_t <b><b>vgetq_lane_p64</b></b> (poly64x2_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_s8" type="checkbox"><label for="vgetq_lane_s8"><div>int8_t <b><b>vgetq_lane_s8</b></b> (int8x16_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_s16" type="checkbox"><label for="vgetq_lane_s16"><div>int16_t <b><b>vgetq_lane_s16</b></b> (int16x8_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_s32" type="checkbox"><label for="vgetq_lane_s32"><div>int32_t <b><b>vgetq_lane_s32</b></b> (int32x4_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_s64" type="checkbox"><label for="vgetq_lane_s64"><div>int64_t <b><b>vgetq_lane_s64</b></b> (int64x2_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_p8" type="checkbox"><label for="vgetq_lane_p8"><div>poly8_t <b><b>vgetq_lane_p8</b></b> (poly8x16_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.B[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_p16" type="checkbox"><label for="vgetq_lane_p16"><div>poly16_t <b><b>vgetq_lane_p16</b></b> (poly16x8_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_f16" type="checkbox"><label for="vget_lane_f16"><div>float16_t <b><b>vget_lane_f16</b></b> (float16x4_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_f16" type="checkbox"><label for="vgetq_lane_f16"><div>float16_t <b><b>vgetq_lane_f16</b></b> (float16x8_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_f32" type="checkbox"><label for="vgetq_lane_f32"><div>float32_t <b><b>vgetq_lane_f32</b></b> (float32x4_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_f64" type="checkbox"><label for="vgetq_lane_f64"><div>float64_t <b><b>vgetq_lane_f64</b></b> (float64x2_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
-</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(datasize) result;
-
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_u8" type="checkbox"><label for="vset_lane_u8"><div>uint8x8_t <b><b>vset_lane_u8</b></b> (uint8_t a, uint8x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_u16" type="checkbox"><label for="vset_lane_u16"><div>uint16x4_t <b><b>vset_lane_u16</b></b> (uint16_t a, uint16x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_u32" type="checkbox"><label for="vset_lane_u32"><div>uint32x2_t <b><b>vset_lane_u32</b></b> (uint32_t a, uint32x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_u64" type="checkbox"><label for="vset_lane_u64"><div>uint64x1_t <b><b>vset_lane_u64</b></b> (uint64_t a, uint64x1_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_p64" type="checkbox"><label for="vset_lane_p64"><div>poly64x1_t <b><b>vset_lane_p64</b></b> (poly64_t a, poly64x1_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_s8" type="checkbox"><label for="vset_lane_s8"><div>int8x8_t <b><b>vset_lane_s8</b></b> (int8_t a, int8x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_s16" type="checkbox"><label for="vset_lane_s16"><div>int16x4_t <b><b>vset_lane_s16</b></b> (int16_t a, int16x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_s32" type="checkbox"><label for="vset_lane_s32"><div>int32x2_t <b><b>vset_lane_s32</b></b> (int32_t a, int32x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_s64" type="checkbox"><label for="vset_lane_s64"><div>int64x1_t <b><b>vset_lane_s64</b></b> (int64_t a, int64x1_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_p8" type="checkbox"><label for="vset_lane_p8"><div>poly8x8_t <b><b>vset_lane_p8</b></b> (poly8_t a, poly8x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.8B <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_p16" type="checkbox"><label for="vset_lane_p16"><div>poly16x4_t <b><b>vset_lane_p16</b></b> (poly16_t a, poly16x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_f16" type="checkbox"><label for="vset_lane_f16"><div>float16x4_t <b><b>vset_lane_f16</b></b> (float16_t a, float16x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Vn.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; VnH <br />
-v &rarr; Vd.4H <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_f16" type="checkbox"><label for="vsetq_lane_f16"><div>float16x8_t <b><b>vsetq_lane_f16</b></b> (float16_t a, float16x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Vn.H[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; VnH <br />
-v &rarr; Vd.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_f32" type="checkbox"><label for="vset_lane_f32"><div>float32x2_t <b><b>vset_lane_f32</b></b> (float32_t a, float32x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.2S <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_f64" type="checkbox"><label for="vset_lane_f64"><div>float64x1_t <b><b>vset_lane_f64</b></b> (float64_t a, float64x1_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.1D <br />
-0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_u8" type="checkbox"><label for="vsetq_lane_u8"><div>uint8x16_t <b><b>vsetq_lane_u8</b></b> (uint8_t a, uint8x16_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_u16" type="checkbox"><label for="vsetq_lane_u16"><div>uint16x8_t <b><b>vsetq_lane_u16</b></b> (uint16_t a, uint16x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_u32" type="checkbox"><label for="vsetq_lane_u32"><div>uint32x4_t <b><b>vsetq_lane_u32</b></b> (uint32_t a, uint32x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_u64" type="checkbox"><label for="vsetq_lane_u64"><div>uint64x2_t <b><b>vsetq_lane_u64</b></b> (uint64_t a, uint64x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_p64" type="checkbox"><label for="vsetq_lane_p64"><div>poly64x2_t <b><b>vsetq_lane_p64</b></b> (poly64_t a, poly64x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_s8" type="checkbox"><label for="vsetq_lane_s8"><div>int8x16_t <b><b>vsetq_lane_s8</b></b> (int8_t a, int8x16_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_s16" type="checkbox"><label for="vsetq_lane_s16"><div>int16x8_t <b><b>vsetq_lane_s16</b></b> (int16_t a, int16x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_s32" type="checkbox"><label for="vsetq_lane_s32"><div>int32x4_t <b><b>vsetq_lane_s32</b></b> (int32_t a, int32x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_s64" type="checkbox"><label for="vsetq_lane_s64"><div>int64x2_t <b><b>vsetq_lane_s64</b></b> (int64_t a, int64x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_p8" type="checkbox"><label for="vsetq_lane_p8"><div>poly8x16_t <b><b>vsetq_lane_p8</b></b> (poly8_t a, poly8x16_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.16B <br />
-0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_p16" type="checkbox"><label for="vsetq_lane_p16"><div>poly16x8_t <b><b>vsetq_lane_p16</b></b> (poly16_t a, poly16x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.8H <br />
-0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_f32" type="checkbox"><label for="vsetq_lane_f32"><div>float32x4_t <b><b>vsetq_lane_f32</b></b> (float32_t a, float32x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.4S <br />
-0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_f64" type="checkbox"><label for="vsetq_lane_f64"><div>float64x2_t <b><b>vsetq_lane_f64</b></b> (float64_t a, float64x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
-v &rarr; Vd.2D <br />
-0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-bits(128) result;
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpxs_f32" type="checkbox"><label for="vrecpxs_f32"><div>float32_t <b><b>vrecpxs_f32</b></b> (float32_t a)<span class="right">Floating-point reciprocal exponent</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal exponent (scalar). This instruction finds an approximate reciprocal exponent for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpx-floating-point-reciprocal-exponent-scalar">FRECPX</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecpX.2" title="function: bits(N) FPRecpX(bits(N) op, FPCRType fpcr)">FPRecpX</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpxd_f64" type="checkbox"><label for="vrecpxd_f64"><div>float64_t <b><b>vrecpxd_f64</b></b> (float64_t a)<span class="right">Floating-point reciprocal exponent</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal exponent (scalar). This instruction finds an approximate reciprocal exponent for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpx-floating-point-reciprocal-exponent-scalar">FRECPX</a> Dd,Dn
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) result;
-bits(esize) element;
-
-for e = 0 to elements-1
-    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecpX.2" title="function: bits(N) FPRecpX(bits(N) op, FPCRType fpcr)">FPRecpX</a>(element, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_n_f32" type="checkbox"><label for="vfma_n_f32"><div>float32x2_t <b><b>vfma_n_f32</b></b> (float32x2_t a, float32x2_t b, float32_t n)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2S,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-n &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_n_f32" type="checkbox"><label for="vfmaq_n_f32"><div>float32x4_t <b><b>vfmaq_n_f32</b></b> (float32x4_t a, float32x4_t b, float32_t n)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.4S,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-n &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfms_n_f32" type="checkbox"><label for="vfms_n_f32"><div>float32x2_t <b><b>vfms_n_f32</b></b> (float32x2_t a, float32x2_t b, float32_t n)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2S,Vn.2S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
-b &rarr; Vn.2S <br />
-n &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_n_f32" type="checkbox"><label for="vfmsq_n_f32"><div>float32x4_t <b><b>vfmsq_n_f32</b></b> (float32x4_t a, float32x4_t b, float32_t n)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.4S,Vn.4S,Vm.S[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
-b &rarr; Vn.4S <br />
-n &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_n_f64" type="checkbox"><label for="vfma_n_f64"><div>float64x1_t <b><b>vfma_n_f64</b></b> (float64x1_t a, float64x1_t b, float64_t n)<span class="right">Floating-point fused multiply-add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add (scalar). This instruction multiplies the values of the first two SIMD&amp;FP source registers, adds the product to the value of the third SIMD&amp;FP source register, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmadd-floating-point-fused-multiply-add-scalar">FMADD</a> Dd,Dn,Dm,Da
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Da <br />
-b &rarr; Dn <br />
-n &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) result;
-bits(datasize) operanda = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[a];
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(operanda, operand1, operand2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_n_f64" type="checkbox"><label for="vfmaq_n_f64"><div>float64x2_t <b><b>vfmaq_n_f64</b></b> (float64x2_t a, float64x2_t b, float64_t n)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2D,Vn.2D,Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-n &rarr; Vm.D[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_n_f64" type="checkbox"><label for="vfms_n_f64"><div>float64x1_t <b><b>vfms_n_f64</b></b> (float64x1_t a, float64x1_t b, float64_t n)<span class="right">Floating-point fused multiply-subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Fused Multiply-Subtract (scalar). This instruction multiplies the values of the first two SIMD&amp;FP source registers, negates the product, adds that to the value of the third SIMD&amp;FP source register, and writes the result to the SIMD&amp;FP destination register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmsub-floating-point-fused-multiply-subtract-scalar">FMSUB</a> Dd,Dn,Dm,Da
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Da <br />
-b &rarr; Dn <br />
-n &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) result;
-bits(datasize) operanda = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[a];
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-
-operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(operand1);
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(operanda, operand1, operand2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_n_f64" type="checkbox"><label for="vfmsq_n_f64"><div>float64x2_t <b><b>vfmsq_n_f64</b></b> (float64x2_t a, float64x2_t b, float64_t n)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2D,Vn.2D,Vm.D[0]
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
-b &rarr; Vn.2D <br />
-n &rarr; Vm.D[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_s8" type="checkbox"><label for="vtrn_s8"><div>int8x8x2_t <b><b>vtrn_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8B,Vn.8B,Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
-Vd2.8B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_s16" type="checkbox"><label for="vtrn_s16"><div>int16x4x2_t <b><b>vtrn_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4H,Vn.4H,Vm.4H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
-Vd2.4H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_u8" type="checkbox"><label for="vtrn_u8"><div>uint8x8x2_t <b><b>vtrn_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8B,Vn.8B,Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
-Vd2.8B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_u16" type="checkbox"><label for="vtrn_u16"><div>uint16x4x2_t <b><b>vtrn_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4H,Vn.4H,Vm.4H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
-Vd2.4H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_p8" type="checkbox"><label for="vtrn_p8"><div>poly8x8x2_t <b><b>vtrn_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8B,Vn.8B,Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
-Vd2.8B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_p16" type="checkbox"><label for="vtrn_p16"><div>poly16x4x2_t <b><b>vtrn_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4H,Vn.4H,Vm.4H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
-Vd2.4H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_s32" type="checkbox"><label for="vtrn_s32"><div>int32x2x2_t <b><b>vtrn_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.2S,Vn.2S,Vm.2S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
-Vd2.2S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_f32" type="checkbox"><label for="vtrn_f32"><div>float32x2x2_t <b><b>vtrn_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.2S,Vn.2S,Vm.2S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
-Vd2.2S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_u32" type="checkbox"><label for="vtrn_u32"><div>uint32x2x2_t <b><b>vtrn_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.2S,Vn.2S,Vm.2S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
-Vd2.2S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_s8" type="checkbox"><label for="vtrnq_s8"><div>int8x16x2_t <b><b>vtrnq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.16B,Vn.16B,Vm.16B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
-Vd2.16B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_s16" type="checkbox"><label for="vtrnq_s16"><div>int16x8x2_t <b><b>vtrnq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8H,Vn.8H,Vm.8H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
-Vd2.8H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_s32" type="checkbox"><label for="vtrnq_s32"><div>int32x4x2_t <b><b>vtrnq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4S,Vn.4S,Vm.4S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
-Vd2.4S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_f32" type="checkbox"><label for="vtrnq_f32"><div>float32x4x2_t <b><b>vtrnq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4S,Vn.4S,Vm.4S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
-Vd2.4S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_u8" type="checkbox"><label for="vtrnq_u8"><div>uint8x16x2_t <b><b>vtrnq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.16B,Vn.16B,Vm.16B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
-Vd2.16B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_u16" type="checkbox"><label for="vtrnq_u16"><div>uint16x8x2_t <b><b>vtrnq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8H,Vn.8H,Vm.8H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
-Vd2.8H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_u32" type="checkbox"><label for="vtrnq_u32"><div>uint32x4x2_t <b><b>vtrnq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4S,Vn.4S,Vm.4S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
-Vd2.4S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_p8" type="checkbox"><label for="vtrnq_p8"><div>poly8x16x2_t <b><b>vtrnq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.16B,Vn.16B,Vm.16B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
-Vd2.16B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_p16" type="checkbox"><label for="vtrnq_p16"><div>poly16x8x2_t <b><b>vtrnq_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8H,Vn.8H,Vm.8H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
-Vd2.8H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_s8" type="checkbox"><label for="vzip_s8"><div>int8x8x2_t <b><b>vzip_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8B,Vn.8B,Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
-Vd2.8B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_s16" type="checkbox"><label for="vzip_s16"><div>int16x4x2_t <b><b>vzip_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4H,Vn.4H,Vm.4H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
-Vd2.4H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_u8" type="checkbox"><label for="vzip_u8"><div>uint8x8x2_t <b><b>vzip_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8B,Vn.8B,Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
-Vd2.8B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_u16" type="checkbox"><label for="vzip_u16"><div>uint16x4x2_t <b><b>vzip_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4H,Vn.4H,Vm.4H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
-Vd2.4H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_p8" type="checkbox"><label for="vzip_p8"><div>poly8x8x2_t <b><b>vzip_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8B,Vn.8B,Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
-Vd2.8B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_p16" type="checkbox"><label for="vzip_p16"><div>poly16x4x2_t <b><b>vzip_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4H,Vn.4H,Vm.4H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
-Vd2.4H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_s32" type="checkbox"><label for="vzip_s32"><div>int32x2x2_t <b><b>vzip_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.2S,Vn.2S,Vm.2S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
-Vd2.2S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_f32" type="checkbox"><label for="vzip_f32"><div>float32x2x2_t <b><b>vzip_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.2S,Vn.2S,Vm.2S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
-Vd2.2S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_u32" type="checkbox"><label for="vzip_u32"><div>uint32x2x2_t <b><b>vzip_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.2S,Vn.2S,Vm.2S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
-Vd2.2S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_s8" type="checkbox"><label for="vzipq_s8"><div>int8x16x2_t <b><b>vzipq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.16B,Vn.16B,Vm.16B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
-Vd2.16B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_s16" type="checkbox"><label for="vzipq_s16"><div>int16x8x2_t <b><b>vzipq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8H,Vn.8H,Vm.8H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
-Vd2.8H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_s32" type="checkbox"><label for="vzipq_s32"><div>int32x4x2_t <b><b>vzipq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4S,Vn.4S,Vm.4S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
-Vd2.4S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_f32" type="checkbox"><label for="vzipq_f32"><div>float32x4x2_t <b><b>vzipq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4S,Vn.4S,Vm.4S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
-Vd2.4S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_u8" type="checkbox"><label for="vzipq_u8"><div>uint8x16x2_t <b><b>vzipq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.16B,Vn.16B,Vm.16B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
-Vd2.16B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_u16" type="checkbox"><label for="vzipq_u16"><div>uint16x8x2_t <b><b>vzipq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8H,Vn.8H,Vm.8H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
-Vd2.8H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_u32" type="checkbox"><label for="vzipq_u32"><div>uint32x4x2_t <b><b>vzipq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4S,Vn.4S,Vm.4S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
-Vd2.4S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_p8" type="checkbox"><label for="vzipq_p8"><div>poly8x16x2_t <b><b>vzipq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.16B,Vn.16B,Vm.16B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
-Vd2.16B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_p16" type="checkbox"><label for="vzipq_p16"><div>poly16x8x2_t <b><b>vzipq_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8H,Vn.8H,Vm.8H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
-Vd2.8H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-integer base = part * pairs;
-
-for p = 0 to pairs-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_s8" type="checkbox"><label for="vuzp_s8"><div>int8x8x2_t <b><b>vuzp_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8B,Vn.8B,Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
-Vd2.8B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_s16" type="checkbox"><label for="vuzp_s16"><div>int16x4x2_t <b><b>vuzp_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4H,Vn.4H,Vm.4H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
-Vd2.4H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_s32" type="checkbox"><label for="vuzp_s32"><div>int32x2x2_t <b><b>vuzp_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.2S,Vn.2S,Vm.2S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
-Vd2.2S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_f32" type="checkbox"><label for="vuzp_f32"><div>float32x2x2_t <b><b>vuzp_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.2S,Vn.2S,Vm.2S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
-Vd2.2S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_u8" type="checkbox"><label for="vuzp_u8"><div>uint8x8x2_t <b><b>vuzp_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8B,Vn.8B,Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
-Vd2.8B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_u16" type="checkbox"><label for="vuzp_u16"><div>uint16x4x2_t <b><b>vuzp_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4H,Vn.4H,Vm.4H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
-Vd2.4H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_u32" type="checkbox"><label for="vuzp_u32"><div>uint32x2x2_t <b><b>vuzp_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.2S,Vn.2S,Vm.2S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.2S,Vn.2S,Vm.2S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
-b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
-Vd2.2S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_p8" type="checkbox"><label for="vuzp_p8"><div>poly8x8x2_t <b><b>vuzp_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8B,Vn.8B,Vm.8B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8B,Vn.8B,Vm.8B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
-b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
-Vd2.8B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_p16" type="checkbox"><label for="vuzp_p16"><div>poly16x4x2_t <b><b>vuzp_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4H,Vn.4H,Vm.4H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4H,Vn.4H,Vm.4H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
-b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
-Vd2.4H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_s8" type="checkbox"><label for="vuzpq_s8"><div>int8x16x2_t <b><b>vuzpq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.16B,Vn.16B,Vm.16B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
-Vd2.16B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_s16" type="checkbox"><label for="vuzpq_s16"><div>int16x8x2_t <b><b>vuzpq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8H,Vn.8H,Vm.8H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
-Vd2.8H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_s32" type="checkbox"><label for="vuzpq_s32"><div>int32x4x2_t <b><b>vuzpq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4S,Vn.4S,Vm.4S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
-Vd2.4S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_f32" type="checkbox"><label for="vuzpq_f32"><div>float32x4x2_t <b><b>vuzpq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4S,Vn.4S,Vm.4S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
-Vd2.4S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_u8" type="checkbox"><label for="vuzpq_u8"><div>uint8x16x2_t <b><b>vuzpq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.16B,Vn.16B,Vm.16B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
-Vd2.16B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_u16" type="checkbox"><label for="vuzpq_u16"><div>uint16x8x2_t <b><b>vuzpq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8H,Vn.8H,Vm.8H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
-Vd2.8H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_u32" type="checkbox"><label for="vuzpq_u32"><div>uint32x4x2_t <b><b>vuzpq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4S,Vn.4S,Vm.4S
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
-b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
-Vd2.4S &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_p8" type="checkbox"><label for="vuzpq_p8"><div>poly8x16x2_t <b><b>vuzpq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.16B,Vn.16B,Vm.16B
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.16B,Vn.16B,Vm.16B
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
-b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
-Vd2.16B &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_p16" type="checkbox"><label for="vuzpq_p16"><div>poly16x8x2_t <b><b>vuzpq_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8H,Vn.8H,Vm.8H
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8H,Vn.8H,Vm.8H
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
-b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
-Vd2.8H &rarr; result.val[1]
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(datasize) result;
-
-bits(datasize*2) zipped = operandh:operandl;
-for e = 0 to elements-1
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_s8" type="checkbox"><label for="vreinterpret_s16_s8"><div>int16x4_t <b><b>vreinterpret_s16_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_s8" type="checkbox"><label for="vreinterpret_s32_s8"><div>int32x2_t <b><b>vreinterpret_s32_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_s8" type="checkbox"><label for="vreinterpret_f32_s8"><div>float32x2_t <b><b>vreinterpret_f32_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_s8" type="checkbox"><label for="vreinterpret_u8_s8"><div>uint8x8_t <b><b>vreinterpret_u8_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_s8" type="checkbox"><label for="vreinterpret_u16_s8"><div>uint16x4_t <b><b>vreinterpret_u16_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_s8" type="checkbox"><label for="vreinterpret_u32_s8"><div>uint32x2_t <b><b>vreinterpret_u32_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_s8" type="checkbox"><label for="vreinterpret_p8_s8"><div>poly8x8_t <b><b>vreinterpret_p8_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_s8" type="checkbox"><label for="vreinterpret_p16_s8"><div>poly16x4_t <b><b>vreinterpret_p16_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_s8" type="checkbox"><label for="vreinterpret_u64_s8"><div>uint64x1_t <b><b>vreinterpret_u64_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_s8" type="checkbox"><label for="vreinterpret_s64_s8"><div>int64x1_t <b><b>vreinterpret_s64_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_s8" type="checkbox"><label for="vreinterpret_f64_s8"><div>float64x1_t <b><b>vreinterpret_f64_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_s8" type="checkbox"><label for="vreinterpret_p64_s8"><div>poly64x1_t <b><b>vreinterpret_p64_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_s8" type="checkbox"><label for="vreinterpret_f16_s8"><div>float16x4_t <b><b>vreinterpret_f16_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_s16" type="checkbox"><label for="vreinterpret_s8_s16"><div>int8x8_t <b><b>vreinterpret_s8_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_s16" type="checkbox"><label for="vreinterpret_s32_s16"><div>int32x2_t <b><b>vreinterpret_s32_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_s16" type="checkbox"><label for="vreinterpret_f32_s16"><div>float32x2_t <b><b>vreinterpret_f32_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_s16" type="checkbox"><label for="vreinterpret_u8_s16"><div>uint8x8_t <b><b>vreinterpret_u8_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_s16" type="checkbox"><label for="vreinterpret_u16_s16"><div>uint16x4_t <b><b>vreinterpret_u16_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_s16" type="checkbox"><label for="vreinterpret_u32_s16"><div>uint32x2_t <b><b>vreinterpret_u32_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_s16" type="checkbox"><label for="vreinterpret_p8_s16"><div>poly8x8_t <b><b>vreinterpret_p8_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_s16" type="checkbox"><label for="vreinterpret_p16_s16"><div>poly16x4_t <b><b>vreinterpret_p16_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_s16" type="checkbox"><label for="vreinterpret_u64_s16"><div>uint64x1_t <b><b>vreinterpret_u64_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_s16" type="checkbox"><label for="vreinterpret_s64_s16"><div>int64x1_t <b><b>vreinterpret_s64_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_s16" type="checkbox"><label for="vreinterpret_f64_s16"><div>float64x1_t <b><b>vreinterpret_f64_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_s16" type="checkbox"><label for="vreinterpret_p64_s16"><div>poly64x1_t <b><b>vreinterpret_p64_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_s16" type="checkbox"><label for="vreinterpret_f16_s16"><div>float16x4_t <b><b>vreinterpret_f16_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_s32" type="checkbox"><label for="vreinterpret_s8_s32"><div>int8x8_t <b><b>vreinterpret_s8_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_s32" type="checkbox"><label for="vreinterpret_s16_s32"><div>int16x4_t <b><b>vreinterpret_s16_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_s32" type="checkbox"><label for="vreinterpret_f32_s32"><div>float32x2_t <b><b>vreinterpret_f32_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_s32" type="checkbox"><label for="vreinterpret_u8_s32"><div>uint8x8_t <b><b>vreinterpret_u8_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_s32" type="checkbox"><label for="vreinterpret_u16_s32"><div>uint16x4_t <b><b>vreinterpret_u16_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_s32" type="checkbox"><label for="vreinterpret_u32_s32"><div>uint32x2_t <b><b>vreinterpret_u32_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_s32" type="checkbox"><label for="vreinterpret_p8_s32"><div>poly8x8_t <b><b>vreinterpret_p8_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_s32" type="checkbox"><label for="vreinterpret_p16_s32"><div>poly16x4_t <b><b>vreinterpret_p16_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_s32" type="checkbox"><label for="vreinterpret_u64_s32"><div>uint64x1_t <b><b>vreinterpret_u64_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_s32" type="checkbox"><label for="vreinterpret_s64_s32"><div>int64x1_t <b><b>vreinterpret_s64_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_s32" type="checkbox"><label for="vreinterpret_f64_s32"><div>float64x1_t <b><b>vreinterpret_f64_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_s32" type="checkbox"><label for="vreinterpret_p64_s32"><div>poly64x1_t <b><b>vreinterpret_p64_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_s32" type="checkbox"><label for="vreinterpret_f16_s32"><div>float16x4_t <b><b>vreinterpret_f16_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_f32" type="checkbox"><label for="vreinterpret_s8_f32"><div>int8x8_t <b><b>vreinterpret_s8_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_f32" type="checkbox"><label for="vreinterpret_s16_f32"><div>int16x4_t <b><b>vreinterpret_s16_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_f32" type="checkbox"><label for="vreinterpret_s32_f32"><div>int32x2_t <b><b>vreinterpret_s32_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_f32" type="checkbox"><label for="vreinterpret_u8_f32"><div>uint8x8_t <b><b>vreinterpret_u8_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_f32" type="checkbox"><label for="vreinterpret_u16_f32"><div>uint16x4_t <b><b>vreinterpret_u16_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_f32" type="checkbox"><label for="vreinterpret_u32_f32"><div>uint32x2_t <b><b>vreinterpret_u32_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_f32" type="checkbox"><label for="vreinterpret_p8_f32"><div>poly8x8_t <b><b>vreinterpret_p8_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_f32" type="checkbox"><label for="vreinterpret_p16_f32"><div>poly16x4_t <b><b>vreinterpret_p16_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_f32" type="checkbox"><label for="vreinterpret_u64_f32"><div>uint64x1_t <b><b>vreinterpret_u64_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_f32" type="checkbox"><label for="vreinterpret_s64_f32"><div>int64x1_t <b><b>vreinterpret_s64_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_f32" type="checkbox"><label for="vreinterpret_f64_f32"><div>float64x1_t <b><b>vreinterpret_f64_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_f32" type="checkbox"><label for="vreinterpret_p64_f32"><div>poly64x1_t <b><b>vreinterpret_p64_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_f64" type="checkbox"><label for="vreinterpret_p64_f64"><div>poly64x1_t <b><b>vreinterpret_p64_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_f32" type="checkbox"><label for="vreinterpret_f16_f32"><div>float16x4_t <b><b>vreinterpret_f16_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_u8" type="checkbox"><label for="vreinterpret_s8_u8"><div>int8x8_t <b><b>vreinterpret_s8_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_u8" type="checkbox"><label for="vreinterpret_s16_u8"><div>int16x4_t <b><b>vreinterpret_s16_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_u8" type="checkbox"><label for="vreinterpret_s32_u8"><div>int32x2_t <b><b>vreinterpret_s32_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_u8" type="checkbox"><label for="vreinterpret_f32_u8"><div>float32x2_t <b><b>vreinterpret_f32_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_u8" type="checkbox"><label for="vreinterpret_u16_u8"><div>uint16x4_t <b><b>vreinterpret_u16_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_u8" type="checkbox"><label for="vreinterpret_u32_u8"><div>uint32x2_t <b><b>vreinterpret_u32_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_u8" type="checkbox"><label for="vreinterpret_p8_u8"><div>poly8x8_t <b><b>vreinterpret_p8_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_u8" type="checkbox"><label for="vreinterpret_p16_u8"><div>poly16x4_t <b><b>vreinterpret_p16_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_u8" type="checkbox"><label for="vreinterpret_u64_u8"><div>uint64x1_t <b><b>vreinterpret_u64_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_u8" type="checkbox"><label for="vreinterpret_s64_u8"><div>int64x1_t <b><b>vreinterpret_s64_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_u8" type="checkbox"><label for="vreinterpret_f64_u8"><div>float64x1_t <b><b>vreinterpret_f64_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_u8" type="checkbox"><label for="vreinterpret_p64_u8"><div>poly64x1_t <b><b>vreinterpret_p64_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_u8" type="checkbox"><label for="vreinterpret_f16_u8"><div>float16x4_t <b><b>vreinterpret_f16_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_u16" type="checkbox"><label for="vreinterpret_s8_u16"><div>int8x8_t <b><b>vreinterpret_s8_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_u16" type="checkbox"><label for="vreinterpret_s16_u16"><div>int16x4_t <b><b>vreinterpret_s16_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_u16" type="checkbox"><label for="vreinterpret_s32_u16"><div>int32x2_t <b><b>vreinterpret_s32_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_u16" type="checkbox"><label for="vreinterpret_f32_u16"><div>float32x2_t <b><b>vreinterpret_f32_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_u16" type="checkbox"><label for="vreinterpret_u8_u16"><div>uint8x8_t <b><b>vreinterpret_u8_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_u16" type="checkbox"><label for="vreinterpret_u32_u16"><div>uint32x2_t <b><b>vreinterpret_u32_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_u16" type="checkbox"><label for="vreinterpret_p8_u16"><div>poly8x8_t <b><b>vreinterpret_p8_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_u16" type="checkbox"><label for="vreinterpret_p16_u16"><div>poly16x4_t <b><b>vreinterpret_p16_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_u16" type="checkbox"><label for="vreinterpret_u64_u16"><div>uint64x1_t <b><b>vreinterpret_u64_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_u16" type="checkbox"><label for="vreinterpret_s64_u16"><div>int64x1_t <b><b>vreinterpret_s64_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_u16" type="checkbox"><label for="vreinterpret_f64_u16"><div>float64x1_t <b><b>vreinterpret_f64_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_u16" type="checkbox"><label for="vreinterpret_p64_u16"><div>poly64x1_t <b><b>vreinterpret_p64_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_u16" type="checkbox"><label for="vreinterpret_f16_u16"><div>float16x4_t <b><b>vreinterpret_f16_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_u32" type="checkbox"><label for="vreinterpret_s8_u32"><div>int8x8_t <b><b>vreinterpret_s8_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_u32" type="checkbox"><label for="vreinterpret_s16_u32"><div>int16x4_t <b><b>vreinterpret_s16_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_u32" type="checkbox"><label for="vreinterpret_s32_u32"><div>int32x2_t <b><b>vreinterpret_s32_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_u32" type="checkbox"><label for="vreinterpret_f32_u32"><div>float32x2_t <b><b>vreinterpret_f32_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_u32" type="checkbox"><label for="vreinterpret_u8_u32"><div>uint8x8_t <b><b>vreinterpret_u8_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_u32" type="checkbox"><label for="vreinterpret_u16_u32"><div>uint16x4_t <b><b>vreinterpret_u16_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_u32" type="checkbox"><label for="vreinterpret_p8_u32"><div>poly8x8_t <b><b>vreinterpret_p8_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_u32" type="checkbox"><label for="vreinterpret_p16_u32"><div>poly16x4_t <b><b>vreinterpret_p16_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_u32" type="checkbox"><label for="vreinterpret_u64_u32"><div>uint64x1_t <b><b>vreinterpret_u64_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_u32" type="checkbox"><label for="vreinterpret_s64_u32"><div>int64x1_t <b><b>vreinterpret_s64_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_u32" type="checkbox"><label for="vreinterpret_f64_u32"><div>float64x1_t <b><b>vreinterpret_f64_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_u32" type="checkbox"><label for="vreinterpret_p64_u32"><div>poly64x1_t <b><b>vreinterpret_p64_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_u32" type="checkbox"><label for="vreinterpret_f16_u32"><div>float16x4_t <b><b>vreinterpret_f16_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_p8" type="checkbox"><label for="vreinterpret_s8_p8"><div>int8x8_t <b><b>vreinterpret_s8_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_p8" type="checkbox"><label for="vreinterpret_s16_p8"><div>int16x4_t <b><b>vreinterpret_s16_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_p8" type="checkbox"><label for="vreinterpret_s32_p8"><div>int32x2_t <b><b>vreinterpret_s32_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_p8" type="checkbox"><label for="vreinterpret_f32_p8"><div>float32x2_t <b><b>vreinterpret_f32_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_p8" type="checkbox"><label for="vreinterpret_u8_p8"><div>uint8x8_t <b><b>vreinterpret_u8_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_p8" type="checkbox"><label for="vreinterpret_u16_p8"><div>uint16x4_t <b><b>vreinterpret_u16_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_p8" type="checkbox"><label for="vreinterpret_u32_p8"><div>uint32x2_t <b><b>vreinterpret_u32_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_p8" type="checkbox"><label for="vreinterpret_p16_p8"><div>poly16x4_t <b><b>vreinterpret_p16_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_p8" type="checkbox"><label for="vreinterpret_u64_p8"><div>uint64x1_t <b><b>vreinterpret_u64_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_p8" type="checkbox"><label for="vreinterpret_s64_p8"><div>int64x1_t <b><b>vreinterpret_s64_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_p8" type="checkbox"><label for="vreinterpret_f64_p8"><div>float64x1_t <b><b>vreinterpret_f64_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_p8" type="checkbox"><label for="vreinterpret_p64_p8"><div>poly64x1_t <b><b>vreinterpret_p64_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_p8" type="checkbox"><label for="vreinterpret_f16_p8"><div>float16x4_t <b><b>vreinterpret_f16_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_p16" type="checkbox"><label for="vreinterpret_s8_p16"><div>int8x8_t <b><b>vreinterpret_s8_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_p16" type="checkbox"><label for="vreinterpret_s16_p16"><div>int16x4_t <b><b>vreinterpret_s16_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_p16" type="checkbox"><label for="vreinterpret_s32_p16"><div>int32x2_t <b><b>vreinterpret_s32_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_p16" type="checkbox"><label for="vreinterpret_f32_p16"><div>float32x2_t <b><b>vreinterpret_f32_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_p16" type="checkbox"><label for="vreinterpret_u8_p16"><div>uint8x8_t <b><b>vreinterpret_u8_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_p16" type="checkbox"><label for="vreinterpret_u16_p16"><div>uint16x4_t <b><b>vreinterpret_u16_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_p16" type="checkbox"><label for="vreinterpret_u32_p16"><div>uint32x2_t <b><b>vreinterpret_u32_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_p16" type="checkbox"><label for="vreinterpret_p8_p16"><div>poly8x8_t <b><b>vreinterpret_p8_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_p16" type="checkbox"><label for="vreinterpret_u64_p16"><div>uint64x1_t <b><b>vreinterpret_u64_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_p16" type="checkbox"><label for="vreinterpret_s64_p16"><div>int64x1_t <b><b>vreinterpret_s64_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_p16" type="checkbox"><label for="vreinterpret_f64_p16"><div>float64x1_t <b><b>vreinterpret_f64_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_p16" type="checkbox"><label for="vreinterpret_p64_p16"><div>poly64x1_t <b><b>vreinterpret_p64_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_p16" type="checkbox"><label for="vreinterpret_f16_p16"><div>float16x4_t <b><b>vreinterpret_f16_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_u64" type="checkbox"><label for="vreinterpret_s8_u64"><div>int8x8_t <b><b>vreinterpret_s8_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_u64" type="checkbox"><label for="vreinterpret_s16_u64"><div>int16x4_t <b><b>vreinterpret_s16_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_u64" type="checkbox"><label for="vreinterpret_s32_u64"><div>int32x2_t <b><b>vreinterpret_s32_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_u64" type="checkbox"><label for="vreinterpret_f32_u64"><div>float32x2_t <b><b>vreinterpret_f32_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_u64" type="checkbox"><label for="vreinterpret_u8_u64"><div>uint8x8_t <b><b>vreinterpret_u8_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_u64" type="checkbox"><label for="vreinterpret_u16_u64"><div>uint16x4_t <b><b>vreinterpret_u16_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_u64" type="checkbox"><label for="vreinterpret_u32_u64"><div>uint32x2_t <b><b>vreinterpret_u32_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_u64" type="checkbox"><label for="vreinterpret_p8_u64"><div>poly8x8_t <b><b>vreinterpret_p8_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_u64" type="checkbox"><label for="vreinterpret_p16_u64"><div>poly16x4_t <b><b>vreinterpret_p16_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_u64" type="checkbox"><label for="vreinterpret_s64_u64"><div>int64x1_t <b><b>vreinterpret_s64_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_u64" type="checkbox"><label for="vreinterpret_f64_u64"><div>float64x1_t <b><b>vreinterpret_f64_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_u64" type="checkbox"><label for="vreinterpret_p64_u64"><div>poly64x1_t <b><b>vreinterpret_p64_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_u64" type="checkbox"><label for="vreinterpret_f16_u64"><div>float16x4_t <b><b>vreinterpret_f16_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_s64" type="checkbox"><label for="vreinterpret_s8_s64"><div>int8x8_t <b><b>vreinterpret_s8_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_s64" type="checkbox"><label for="vreinterpret_s16_s64"><div>int16x4_t <b><b>vreinterpret_s16_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_s64" type="checkbox"><label for="vreinterpret_s32_s64"><div>int32x2_t <b><b>vreinterpret_s32_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_s64" type="checkbox"><label for="vreinterpret_f32_s64"><div>float32x2_t <b><b>vreinterpret_f32_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_s64" type="checkbox"><label for="vreinterpret_u8_s64"><div>uint8x8_t <b><b>vreinterpret_u8_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_s64" type="checkbox"><label for="vreinterpret_u16_s64"><div>uint16x4_t <b><b>vreinterpret_u16_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_s64" type="checkbox"><label for="vreinterpret_u32_s64"><div>uint32x2_t <b><b>vreinterpret_u32_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_s64" type="checkbox"><label for="vreinterpret_p8_s64"><div>poly8x8_t <b><b>vreinterpret_p8_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_s64" type="checkbox"><label for="vreinterpret_p16_s64"><div>poly16x4_t <b><b>vreinterpret_p16_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_s64" type="checkbox"><label for="vreinterpret_u64_s64"><div>uint64x1_t <b><b>vreinterpret_u64_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_s64" type="checkbox"><label for="vreinterpret_f64_s64"><div>float64x1_t <b><b>vreinterpret_f64_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_p64" type="checkbox"><label for="vreinterpret_u64_p64"><div>uint64x1_t <b><b>vreinterpret_u64_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_s64" type="checkbox"><label for="vreinterpret_f16_s64"><div>float16x4_t <b><b>vreinterpret_f16_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_f16" type="checkbox"><label for="vreinterpret_s8_f16"><div>int8x8_t <b><b>vreinterpret_s8_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_f16" type="checkbox"><label for="vreinterpret_s16_f16"><div>int16x4_t <b><b>vreinterpret_s16_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_f16" type="checkbox"><label for="vreinterpret_s32_f16"><div>int32x2_t <b><b>vreinterpret_s32_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_f16" type="checkbox"><label for="vreinterpret_f32_f16"><div>float32x2_t <b><b>vreinterpret_f32_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_f16" type="checkbox"><label for="vreinterpret_u8_f16"><div>uint8x8_t <b><b>vreinterpret_u8_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_f16" type="checkbox"><label for="vreinterpret_u16_f16"><div>uint16x4_t <b><b>vreinterpret_u16_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_f16" type="checkbox"><label for="vreinterpret_u32_f16"><div>uint32x2_t <b><b>vreinterpret_u32_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_f16" type="checkbox"><label for="vreinterpret_p8_f16"><div>poly8x8_t <b><b>vreinterpret_p8_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_f16" type="checkbox"><label for="vreinterpret_p16_f16"><div>poly16x4_t <b><b>vreinterpret_p16_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_f16" type="checkbox"><label for="vreinterpret_u64_f16"><div>uint64x1_t <b><b>vreinterpret_u64_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_f16" type="checkbox"><label for="vreinterpret_s64_f16"><div>int64x1_t <b><b>vreinterpret_s64_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_f16" type="checkbox"><label for="vreinterpret_f64_f16"><div>float64x1_t <b><b>vreinterpret_f64_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_f16" type="checkbox"><label for="vreinterpret_p64_f16"><div>poly64x1_t <b><b>vreinterpret_p64_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_s8" type="checkbox"><label for="vreinterpretq_s16_s8"><div>int16x8_t <b><b>vreinterpretq_s16_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_s8" type="checkbox"><label for="vreinterpretq_s32_s8"><div>int32x4_t <b><b>vreinterpretq_s32_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_s8" type="checkbox"><label for="vreinterpretq_f32_s8"><div>float32x4_t <b><b>vreinterpretq_f32_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_s8" type="checkbox"><label for="vreinterpretq_u8_s8"><div>uint8x16_t <b><b>vreinterpretq_u8_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_s8" type="checkbox"><label for="vreinterpretq_u16_s8"><div>uint16x8_t <b><b>vreinterpretq_u16_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_s8" type="checkbox"><label for="vreinterpretq_u32_s8"><div>uint32x4_t <b><b>vreinterpretq_u32_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_s8" type="checkbox"><label for="vreinterpretq_p8_s8"><div>poly8x16_t <b><b>vreinterpretq_p8_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_s8" type="checkbox"><label for="vreinterpretq_p16_s8"><div>poly16x8_t <b><b>vreinterpretq_p16_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_s8" type="checkbox"><label for="vreinterpretq_u64_s8"><div>uint64x2_t <b><b>vreinterpretq_u64_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_s8" type="checkbox"><label for="vreinterpretq_s64_s8"><div>int64x2_t <b><b>vreinterpretq_s64_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_s8" type="checkbox"><label for="vreinterpretq_f64_s8"><div>float64x2_t <b><b>vreinterpretq_f64_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_s8" type="checkbox"><label for="vreinterpretq_p64_s8"><div>poly64x2_t <b><b>vreinterpretq_p64_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_s8" type="checkbox"><label for="vreinterpretq_p128_s8"><div>poly128_t <b><b>vreinterpretq_p128_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_s8" type="checkbox"><label for="vreinterpretq_f16_s8"><div>float16x8_t <b><b>vreinterpretq_f16_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_s16" type="checkbox"><label for="vreinterpretq_s8_s16"><div>int8x16_t <b><b>vreinterpretq_s8_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_s16" type="checkbox"><label for="vreinterpretq_s32_s16"><div>int32x4_t <b><b>vreinterpretq_s32_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_s16" type="checkbox"><label for="vreinterpretq_f32_s16"><div>float32x4_t <b><b>vreinterpretq_f32_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_s16" type="checkbox"><label for="vreinterpretq_u8_s16"><div>uint8x16_t <b><b>vreinterpretq_u8_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_s16" type="checkbox"><label for="vreinterpretq_u16_s16"><div>uint16x8_t <b><b>vreinterpretq_u16_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_s16" type="checkbox"><label for="vreinterpretq_u32_s16"><div>uint32x4_t <b><b>vreinterpretq_u32_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_s16" type="checkbox"><label for="vreinterpretq_p8_s16"><div>poly8x16_t <b><b>vreinterpretq_p8_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_s16" type="checkbox"><label for="vreinterpretq_p16_s16"><div>poly16x8_t <b><b>vreinterpretq_p16_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_s16" type="checkbox"><label for="vreinterpretq_u64_s16"><div>uint64x2_t <b><b>vreinterpretq_u64_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_s16" type="checkbox"><label for="vreinterpretq_s64_s16"><div>int64x2_t <b><b>vreinterpretq_s64_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_s16" type="checkbox"><label for="vreinterpretq_f64_s16"><div>float64x2_t <b><b>vreinterpretq_f64_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_s16" type="checkbox"><label for="vreinterpretq_p64_s16"><div>poly64x2_t <b><b>vreinterpretq_p64_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_s16" type="checkbox"><label for="vreinterpretq_p128_s16"><div>poly128_t <b><b>vreinterpretq_p128_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_s16" type="checkbox"><label for="vreinterpretq_f16_s16"><div>float16x8_t <b><b>vreinterpretq_f16_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_s32" type="checkbox"><label for="vreinterpretq_s8_s32"><div>int8x16_t <b><b>vreinterpretq_s8_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_s32" type="checkbox"><label for="vreinterpretq_s16_s32"><div>int16x8_t <b><b>vreinterpretq_s16_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_s32" type="checkbox"><label for="vreinterpretq_f32_s32"><div>float32x4_t <b><b>vreinterpretq_f32_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_s32" type="checkbox"><label for="vreinterpretq_u8_s32"><div>uint8x16_t <b><b>vreinterpretq_u8_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_s32" type="checkbox"><label for="vreinterpretq_u16_s32"><div>uint16x8_t <b><b>vreinterpretq_u16_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_s32" type="checkbox"><label for="vreinterpretq_u32_s32"><div>uint32x4_t <b><b>vreinterpretq_u32_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_s32" type="checkbox"><label for="vreinterpretq_p8_s32"><div>poly8x16_t <b><b>vreinterpretq_p8_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_s32" type="checkbox"><label for="vreinterpretq_p16_s32"><div>poly16x8_t <b><b>vreinterpretq_p16_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_s32" type="checkbox"><label for="vreinterpretq_u64_s32"><div>uint64x2_t <b><b>vreinterpretq_u64_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_s32" type="checkbox"><label for="vreinterpretq_s64_s32"><div>int64x2_t <b><b>vreinterpretq_s64_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_s32" type="checkbox"><label for="vreinterpretq_f64_s32"><div>float64x2_t <b><b>vreinterpretq_f64_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_s32" type="checkbox"><label for="vreinterpretq_p64_s32"><div>poly64x2_t <b><b>vreinterpretq_p64_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_s32" type="checkbox"><label for="vreinterpretq_p128_s32"><div>poly128_t <b><b>vreinterpretq_p128_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_s32" type="checkbox"><label for="vreinterpretq_f16_s32"><div>float16x8_t <b><b>vreinterpretq_f16_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_f32" type="checkbox"><label for="vreinterpretq_s8_f32"><div>int8x16_t <b><b>vreinterpretq_s8_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_f32" type="checkbox"><label for="vreinterpretq_s16_f32"><div>int16x8_t <b><b>vreinterpretq_s16_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_f32" type="checkbox"><label for="vreinterpretq_s32_f32"><div>int32x4_t <b><b>vreinterpretq_s32_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_f32" type="checkbox"><label for="vreinterpretq_u8_f32"><div>uint8x16_t <b><b>vreinterpretq_u8_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_f32" type="checkbox"><label for="vreinterpretq_u16_f32"><div>uint16x8_t <b><b>vreinterpretq_u16_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_f32" type="checkbox"><label for="vreinterpretq_u32_f32"><div>uint32x4_t <b><b>vreinterpretq_u32_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_f32" type="checkbox"><label for="vreinterpretq_p8_f32"><div>poly8x16_t <b><b>vreinterpretq_p8_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_f32" type="checkbox"><label for="vreinterpretq_p16_f32"><div>poly16x8_t <b><b>vreinterpretq_p16_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_f32" type="checkbox"><label for="vreinterpretq_u64_f32"><div>uint64x2_t <b><b>vreinterpretq_u64_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_f32" type="checkbox"><label for="vreinterpretq_s64_f32"><div>int64x2_t <b><b>vreinterpretq_s64_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_f32" type="checkbox"><label for="vreinterpretq_f64_f32"><div>float64x2_t <b><b>vreinterpretq_f64_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_f32" type="checkbox"><label for="vreinterpretq_p64_f32"><div>poly64x2_t <b><b>vreinterpretq_p64_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_f32" type="checkbox"><label for="vreinterpretq_p128_f32"><div>poly128_t <b><b>vreinterpretq_p128_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_f64" type="checkbox"><label for="vreinterpretq_p64_f64"><div>poly64x2_t <b><b>vreinterpretq_p64_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_f64" type="checkbox"><label for="vreinterpretq_p128_f64"><div>poly128_t <b><b>vreinterpretq_p128_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_f32" type="checkbox"><label for="vreinterpretq_f16_f32"><div>float16x8_t <b><b>vreinterpretq_f16_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_u8" type="checkbox"><label for="vreinterpretq_s8_u8"><div>int8x16_t <b><b>vreinterpretq_s8_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_u8" type="checkbox"><label for="vreinterpretq_s16_u8"><div>int16x8_t <b><b>vreinterpretq_s16_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_u8" type="checkbox"><label for="vreinterpretq_s32_u8"><div>int32x4_t <b><b>vreinterpretq_s32_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_u8" type="checkbox"><label for="vreinterpretq_f32_u8"><div>float32x4_t <b><b>vreinterpretq_f32_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_u8" type="checkbox"><label for="vreinterpretq_u16_u8"><div>uint16x8_t <b><b>vreinterpretq_u16_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_u8" type="checkbox"><label for="vreinterpretq_u32_u8"><div>uint32x4_t <b><b>vreinterpretq_u32_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_u8" type="checkbox"><label for="vreinterpretq_p8_u8"><div>poly8x16_t <b><b>vreinterpretq_p8_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_u8" type="checkbox"><label for="vreinterpretq_p16_u8"><div>poly16x8_t <b><b>vreinterpretq_p16_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_u8" type="checkbox"><label for="vreinterpretq_u64_u8"><div>uint64x2_t <b><b>vreinterpretq_u64_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_u8" type="checkbox"><label for="vreinterpretq_s64_u8"><div>int64x2_t <b><b>vreinterpretq_s64_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_u8" type="checkbox"><label for="vreinterpretq_f64_u8"><div>float64x2_t <b><b>vreinterpretq_f64_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_u8" type="checkbox"><label for="vreinterpretq_p64_u8"><div>poly64x2_t <b><b>vreinterpretq_p64_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_u8" type="checkbox"><label for="vreinterpretq_p128_u8"><div>poly128_t <b><b>vreinterpretq_p128_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_u8" type="checkbox"><label for="vreinterpretq_f16_u8"><div>float16x8_t <b><b>vreinterpretq_f16_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_u16" type="checkbox"><label for="vreinterpretq_s8_u16"><div>int8x16_t <b><b>vreinterpretq_s8_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_u16" type="checkbox"><label for="vreinterpretq_s16_u16"><div>int16x8_t <b><b>vreinterpretq_s16_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_u16" type="checkbox"><label for="vreinterpretq_s32_u16"><div>int32x4_t <b><b>vreinterpretq_s32_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_u16" type="checkbox"><label for="vreinterpretq_f32_u16"><div>float32x4_t <b><b>vreinterpretq_f32_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_u16" type="checkbox"><label for="vreinterpretq_u8_u16"><div>uint8x16_t <b><b>vreinterpretq_u8_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_u16" type="checkbox"><label for="vreinterpretq_u32_u16"><div>uint32x4_t <b><b>vreinterpretq_u32_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_u16" type="checkbox"><label for="vreinterpretq_p8_u16"><div>poly8x16_t <b><b>vreinterpretq_p8_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_u16" type="checkbox"><label for="vreinterpretq_p16_u16"><div>poly16x8_t <b><b>vreinterpretq_p16_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_u16" type="checkbox"><label for="vreinterpretq_u64_u16"><div>uint64x2_t <b><b>vreinterpretq_u64_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_u16" type="checkbox"><label for="vreinterpretq_s64_u16"><div>int64x2_t <b><b>vreinterpretq_s64_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_u16" type="checkbox"><label for="vreinterpretq_f64_u16"><div>float64x2_t <b><b>vreinterpretq_f64_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_u16" type="checkbox"><label for="vreinterpretq_p64_u16"><div>poly64x2_t <b><b>vreinterpretq_p64_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_u16" type="checkbox"><label for="vreinterpretq_p128_u16"><div>poly128_t <b><b>vreinterpretq_p128_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_u16" type="checkbox"><label for="vreinterpretq_f16_u16"><div>float16x8_t <b><b>vreinterpretq_f16_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_u32" type="checkbox"><label for="vreinterpretq_s8_u32"><div>int8x16_t <b><b>vreinterpretq_s8_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_u32" type="checkbox"><label for="vreinterpretq_s16_u32"><div>int16x8_t <b><b>vreinterpretq_s16_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_u32" type="checkbox"><label for="vreinterpretq_s32_u32"><div>int32x4_t <b><b>vreinterpretq_s32_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_u32" type="checkbox"><label for="vreinterpretq_f32_u32"><div>float32x4_t <b><b>vreinterpretq_f32_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_u32" type="checkbox"><label for="vreinterpretq_u8_u32"><div>uint8x16_t <b><b>vreinterpretq_u8_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_u32" type="checkbox"><label for="vreinterpretq_u16_u32"><div>uint16x8_t <b><b>vreinterpretq_u16_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_u32" type="checkbox"><label for="vreinterpretq_p8_u32"><div>poly8x16_t <b><b>vreinterpretq_p8_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_u32" type="checkbox"><label for="vreinterpretq_p16_u32"><div>poly16x8_t <b><b>vreinterpretq_p16_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_u32" type="checkbox"><label for="vreinterpretq_u64_u32"><div>uint64x2_t <b><b>vreinterpretq_u64_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_u32" type="checkbox"><label for="vreinterpretq_s64_u32"><div>int64x2_t <b><b>vreinterpretq_s64_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_u32" type="checkbox"><label for="vreinterpretq_f64_u32"><div>float64x2_t <b><b>vreinterpretq_f64_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_u32" type="checkbox"><label for="vreinterpretq_p64_u32"><div>poly64x2_t <b><b>vreinterpretq_p64_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_u32" type="checkbox"><label for="vreinterpretq_p128_u32"><div>poly128_t <b><b>vreinterpretq_p128_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_u32" type="checkbox"><label for="vreinterpretq_f16_u32"><div>float16x8_t <b><b>vreinterpretq_f16_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_p8" type="checkbox"><label for="vreinterpretq_s8_p8"><div>int8x16_t <b><b>vreinterpretq_s8_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_p8" type="checkbox"><label for="vreinterpretq_s16_p8"><div>int16x8_t <b><b>vreinterpretq_s16_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_p8" type="checkbox"><label for="vreinterpretq_s32_p8"><div>int32x4_t <b><b>vreinterpretq_s32_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_p8" type="checkbox"><label for="vreinterpretq_f32_p8"><div>float32x4_t <b><b>vreinterpretq_f32_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_p8" type="checkbox"><label for="vreinterpretq_u8_p8"><div>uint8x16_t <b><b>vreinterpretq_u8_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_p8" type="checkbox"><label for="vreinterpretq_u16_p8"><div>uint16x8_t <b><b>vreinterpretq_u16_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_p8" type="checkbox"><label for="vreinterpretq_u32_p8"><div>uint32x4_t <b><b>vreinterpretq_u32_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_p8" type="checkbox"><label for="vreinterpretq_p16_p8"><div>poly16x8_t <b><b>vreinterpretq_p16_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p8" type="checkbox"><label for="vreinterpretq_u64_p8"><div>uint64x2_t <b><b>vreinterpretq_u64_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_p8" type="checkbox"><label for="vreinterpretq_s64_p8"><div>int64x2_t <b><b>vreinterpretq_s64_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_p8" type="checkbox"><label for="vreinterpretq_f64_p8"><div>float64x2_t <b><b>vreinterpretq_f64_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_p8" type="checkbox"><label for="vreinterpretq_p64_p8"><div>poly64x2_t <b><b>vreinterpretq_p64_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_p8" type="checkbox"><label for="vreinterpretq_p128_p8"><div>poly128_t <b><b>vreinterpretq_p128_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_p8" type="checkbox"><label for="vreinterpretq_f16_p8"><div>float16x8_t <b><b>vreinterpretq_f16_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_p16" type="checkbox"><label for="vreinterpretq_s8_p16"><div>int8x16_t <b><b>vreinterpretq_s8_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_p16" type="checkbox"><label for="vreinterpretq_s16_p16"><div>int16x8_t <b><b>vreinterpretq_s16_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_p16" type="checkbox"><label for="vreinterpretq_s32_p16"><div>int32x4_t <b><b>vreinterpretq_s32_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_p16" type="checkbox"><label for="vreinterpretq_f32_p16"><div>float32x4_t <b><b>vreinterpretq_f32_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_p16" type="checkbox"><label for="vreinterpretq_u8_p16"><div>uint8x16_t <b><b>vreinterpretq_u8_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_p16" type="checkbox"><label for="vreinterpretq_u16_p16"><div>uint16x8_t <b><b>vreinterpretq_u16_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_p16" type="checkbox"><label for="vreinterpretq_u32_p16"><div>uint32x4_t <b><b>vreinterpretq_u32_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_p16" type="checkbox"><label for="vreinterpretq_p8_p16"><div>poly8x16_t <b><b>vreinterpretq_p8_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p16" type="checkbox"><label for="vreinterpretq_u64_p16"><div>uint64x2_t <b><b>vreinterpretq_u64_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_p16" type="checkbox"><label for="vreinterpretq_s64_p16"><div>int64x2_t <b><b>vreinterpretq_s64_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_p16" type="checkbox"><label for="vreinterpretq_f64_p16"><div>float64x2_t <b><b>vreinterpretq_f64_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_p16" type="checkbox"><label for="vreinterpretq_p64_p16"><div>poly64x2_t <b><b>vreinterpretq_p64_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_p16" type="checkbox"><label for="vreinterpretq_p128_p16"><div>poly128_t <b><b>vreinterpretq_p128_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_p16" type="checkbox"><label for="vreinterpretq_f16_p16"><div>float16x8_t <b><b>vreinterpretq_f16_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_u64" type="checkbox"><label for="vreinterpretq_s8_u64"><div>int8x16_t <b><b>vreinterpretq_s8_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_u64" type="checkbox"><label for="vreinterpretq_s16_u64"><div>int16x8_t <b><b>vreinterpretq_s16_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_u64" type="checkbox"><label for="vreinterpretq_s32_u64"><div>int32x4_t <b><b>vreinterpretq_s32_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_u64" type="checkbox"><label for="vreinterpretq_f32_u64"><div>float32x4_t <b><b>vreinterpretq_f32_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_u64" type="checkbox"><label for="vreinterpretq_u8_u64"><div>uint8x16_t <b><b>vreinterpretq_u8_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_u64" type="checkbox"><label for="vreinterpretq_u16_u64"><div>uint16x8_t <b><b>vreinterpretq_u16_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_u64" type="checkbox"><label for="vreinterpretq_u32_u64"><div>uint32x4_t <b><b>vreinterpretq_u32_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_u64" type="checkbox"><label for="vreinterpretq_p8_u64"><div>poly8x16_t <b><b>vreinterpretq_p8_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_u64" type="checkbox"><label for="vreinterpretq_p16_u64"><div>poly16x8_t <b><b>vreinterpretq_p16_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_u64" type="checkbox"><label for="vreinterpretq_s64_u64"><div>int64x2_t <b><b>vreinterpretq_s64_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_u64" type="checkbox"><label for="vreinterpretq_f64_u64"><div>float64x2_t <b><b>vreinterpretq_f64_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_s64" type="checkbox"><label for="vreinterpretq_f64_s64"><div>float64x2_t <b><b>vreinterpretq_f64_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_s64" type="checkbox"><label for="vreinterpretq_p64_s64"><div>poly64x2_t <b><b>vreinterpretq_p64_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_s64" type="checkbox"><label for="vreinterpretq_p128_s64"><div>poly128_t <b><b>vreinterpretq_p128_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_u64" type="checkbox"><label for="vreinterpretq_p64_u64"><div>poly64x2_t <b><b>vreinterpretq_p64_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_u64" type="checkbox"><label for="vreinterpretq_p128_u64"><div>poly128_t <b><b>vreinterpretq_p128_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_u64" type="checkbox"><label for="vreinterpretq_f16_u64"><div>float16x8_t <b><b>vreinterpretq_f16_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_s64" type="checkbox"><label for="vreinterpretq_s8_s64"><div>int8x16_t <b><b>vreinterpretq_s8_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_s64" type="checkbox"><label for="vreinterpretq_s16_s64"><div>int16x8_t <b><b>vreinterpretq_s16_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_s64" type="checkbox"><label for="vreinterpretq_s32_s64"><div>int32x4_t <b><b>vreinterpretq_s32_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_s64" type="checkbox"><label for="vreinterpretq_f32_s64"><div>float32x4_t <b><b>vreinterpretq_f32_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_s64" type="checkbox"><label for="vreinterpretq_u8_s64"><div>uint8x16_t <b><b>vreinterpretq_u8_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_s64" type="checkbox"><label for="vreinterpretq_u16_s64"><div>uint16x8_t <b><b>vreinterpretq_u16_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_s64" type="checkbox"><label for="vreinterpretq_u32_s64"><div>uint32x4_t <b><b>vreinterpretq_u32_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_s64" type="checkbox"><label for="vreinterpretq_p8_s64"><div>poly8x16_t <b><b>vreinterpretq_p8_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_s64" type="checkbox"><label for="vreinterpretq_p16_s64"><div>poly16x8_t <b><b>vreinterpretq_p16_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_s64" type="checkbox"><label for="vreinterpretq_u64_s64"><div>uint64x2_t <b><b>vreinterpretq_u64_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p64" type="checkbox"><label for="vreinterpretq_u64_p64"><div>uint64x2_t <b><b>vreinterpretq_u64_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_s64" type="checkbox"><label for="vreinterpretq_f16_s64"><div>float16x8_t <b><b>vreinterpretq_f16_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_f16" type="checkbox"><label for="vreinterpretq_s8_f16"><div>int8x16_t <b><b>vreinterpretq_s8_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_f16" type="checkbox"><label for="vreinterpretq_s16_f16"><div>int16x8_t <b><b>vreinterpretq_s16_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_f16" type="checkbox"><label for="vreinterpretq_s32_f16"><div>int32x4_t <b><b>vreinterpretq_s32_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_f16" type="checkbox"><label for="vreinterpretq_f32_f16"><div>float32x4_t <b><b>vreinterpretq_f32_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_f16" type="checkbox"><label for="vreinterpretq_u8_f16"><div>uint8x16_t <b><b>vreinterpretq_u8_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_f16" type="checkbox"><label for="vreinterpretq_u16_f16"><div>uint16x8_t <b><b>vreinterpretq_u16_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_f16" type="checkbox"><label for="vreinterpretq_u32_f16"><div>uint32x4_t <b><b>vreinterpretq_u32_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_f16" type="checkbox"><label for="vreinterpretq_p8_f16"><div>poly8x16_t <b><b>vreinterpretq_p8_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_f16" type="checkbox"><label for="vreinterpretq_p16_f16"><div>poly16x8_t <b><b>vreinterpretq_p16_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_f16" type="checkbox"><label for="vreinterpretq_u64_f16"><div>uint64x2_t <b><b>vreinterpretq_u64_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_f16" type="checkbox"><label for="vreinterpretq_s64_f16"><div>int64x2_t <b><b>vreinterpretq_s64_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_f16" type="checkbox"><label for="vreinterpretq_f64_f16"><div>float64x2_t <b><b>vreinterpretq_f64_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_f16" type="checkbox"><label for="vreinterpretq_p64_f16"><div>poly64x2_t <b><b>vreinterpretq_p64_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_f16" type="checkbox"><label for="vreinterpretq_p128_f16"><div>poly128_t <b><b>vreinterpretq_p128_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_f64" type="checkbox"><label for="vreinterpret_s8_f64"><div>int8x8_t <b><b>vreinterpret_s8_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_f64" type="checkbox"><label for="vreinterpret_s16_f64"><div>int16x4_t <b><b>vreinterpret_s16_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_f64" type="checkbox"><label for="vreinterpret_s32_f64"><div>int32x2_t <b><b>vreinterpret_s32_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_f64" type="checkbox"><label for="vreinterpret_u8_f64"><div>uint8x8_t <b><b>vreinterpret_u8_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_f64" type="checkbox"><label for="vreinterpret_u16_f64"><div>uint16x4_t <b><b>vreinterpret_u16_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_f64" type="checkbox"><label for="vreinterpret_u32_f64"><div>uint32x2_t <b><b>vreinterpret_u32_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_f64" type="checkbox"><label for="vreinterpret_p8_f64"><div>poly8x8_t <b><b>vreinterpret_p8_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_f64" type="checkbox"><label for="vreinterpret_p16_f64"><div>poly16x4_t <b><b>vreinterpret_p16_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_f64" type="checkbox"><label for="vreinterpret_u64_f64"><div>uint64x1_t <b><b>vreinterpret_u64_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_f64" type="checkbox"><label for="vreinterpret_s64_f64"><div>int64x1_t <b><b>vreinterpret_s64_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_f64" type="checkbox"><label for="vreinterpret_f16_f64"><div>float16x4_t <b><b>vreinterpret_f16_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_f64" type="checkbox"><label for="vreinterpret_f32_f64"><div>float32x2_t <b><b>vreinterpret_f32_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_f64" type="checkbox"><label for="vreinterpretq_s8_f64"><div>int8x16_t <b><b>vreinterpretq_s8_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_f64" type="checkbox"><label for="vreinterpretq_s16_f64"><div>int16x8_t <b><b>vreinterpretq_s16_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_f64" type="checkbox"><label for="vreinterpretq_s32_f64"><div>int32x4_t <b><b>vreinterpretq_s32_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_f64" type="checkbox"><label for="vreinterpretq_u8_f64"><div>uint8x16_t <b><b>vreinterpretq_u8_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_f64" type="checkbox"><label for="vreinterpretq_u16_f64"><div>uint16x8_t <b><b>vreinterpretq_u16_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_f64" type="checkbox"><label for="vreinterpretq_u32_f64"><div>uint32x4_t <b><b>vreinterpretq_u32_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_f64" type="checkbox"><label for="vreinterpretq_p8_f64"><div>poly8x16_t <b><b>vreinterpretq_p8_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_f64" type="checkbox"><label for="vreinterpretq_p16_f64"><div>poly16x8_t <b><b>vreinterpretq_p16_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_f64" type="checkbox"><label for="vreinterpretq_u64_f64"><div>uint64x2_t <b><b>vreinterpretq_u64_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_f64" type="checkbox"><label for="vreinterpretq_s64_f64"><div>int64x2_t <b><b>vreinterpretq_s64_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_f64" type="checkbox"><label for="vreinterpretq_f16_f64"><div>float16x8_t <b><b>vreinterpretq_f16_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_f64" type="checkbox"><label for="vreinterpretq_f32_f64"><div>float32x4_t <b><b>vreinterpretq_f32_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_p64" type="checkbox"><label for="vreinterpret_s8_p64"><div>int8x8_t <b><b>vreinterpret_s8_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_p64" type="checkbox"><label for="vreinterpret_s16_p64"><div>int16x4_t <b><b>vreinterpret_s16_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_p64" type="checkbox"><label for="vreinterpret_s32_p64"><div>int32x2_t <b><b>vreinterpret_s32_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_p64" type="checkbox"><label for="vreinterpret_u8_p64"><div>uint8x8_t <b><b>vreinterpret_u8_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_p64" type="checkbox"><label for="vreinterpret_u16_p64"><div>uint16x4_t <b><b>vreinterpret_u16_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_p64" type="checkbox"><label for="vreinterpret_u32_p64"><div>uint32x2_t <b><b>vreinterpret_u32_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_p64" type="checkbox"><label for="vreinterpret_p8_p64"><div>poly8x8_t <b><b>vreinterpret_p8_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_p64" type="checkbox"><label for="vreinterpret_p16_p64"><div>poly16x4_t <b><b>vreinterpret_p16_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_p64" type="checkbox"><label for="vreinterpret_u64_p64"><div>uint64x1_t <b><b>vreinterpret_u64_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_p64" type="checkbox"><label for="vreinterpret_s64_p64"><div>int64x1_t <b><b>vreinterpret_s64_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_p64" type="checkbox"><label for="vreinterpret_f64_p64"><div>float64x1_t <b><b>vreinterpret_f64_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_p64" type="checkbox"><label for="vreinterpret_f16_p64"><div>float16x4_t <b><b>vreinterpret_f16_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_p64" type="checkbox"><label for="vreinterpretq_s8_p64"><div>int8x16_t <b><b>vreinterpretq_s8_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_p64" type="checkbox"><label for="vreinterpretq_s16_p64"><div>int16x8_t <b><b>vreinterpretq_s16_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_p64" type="checkbox"><label for="vreinterpretq_s32_p64"><div>int32x4_t <b><b>vreinterpretq_s32_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_p64" type="checkbox"><label for="vreinterpretq_u8_p64"><div>uint8x16_t <b><b>vreinterpretq_u8_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_p64" type="checkbox"><label for="vreinterpretq_u16_p64"><div>uint16x8_t <b><b>vreinterpretq_u16_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_p64" type="checkbox"><label for="vreinterpretq_u32_p64"><div>uint32x4_t <b><b>vreinterpretq_u32_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_p64" type="checkbox"><label for="vreinterpretq_p8_p64"><div>poly8x16_t <b><b>vreinterpretq_p8_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_p64" type="checkbox"><label for="vreinterpretq_p16_p64"><div>poly16x8_t <b><b>vreinterpretq_p16_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p64" type="checkbox"><label for="vreinterpretq_u64_p64"><div>uint64x2_t <b><b>vreinterpretq_u64_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_p64" type="checkbox"><label for="vreinterpretq_s64_p64"><div>int64x2_t <b><b>vreinterpretq_s64_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_p64" type="checkbox"><label for="vreinterpretq_f64_p64"><div>float64x2_t <b><b>vreinterpretq_f64_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_p64" type="checkbox"><label for="vreinterpretq_f16_p64"><div>float16x8_t <b><b>vreinterpretq_f16_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_p128" type="checkbox"><label for="vreinterpretq_s8_p128"><div>int8x16_t <b><b>vreinterpretq_s8_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_p128" type="checkbox"><label for="vreinterpretq_s16_p128"><div>int16x8_t <b><b>vreinterpretq_s16_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_p128" type="checkbox"><label for="vreinterpretq_s32_p128"><div>int32x4_t <b><b>vreinterpretq_s32_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_p128" type="checkbox"><label for="vreinterpretq_u8_p128"><div>uint8x16_t <b><b>vreinterpretq_u8_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_p128" type="checkbox"><label for="vreinterpretq_u16_p128"><div>uint16x8_t <b><b>vreinterpretq_u16_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_p128" type="checkbox"><label for="vreinterpretq_u32_p128"><div>uint32x4_t <b><b>vreinterpretq_u32_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_p128" type="checkbox"><label for="vreinterpretq_p8_p128"><div>poly8x16_t <b><b>vreinterpretq_p8_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_p128" type="checkbox"><label for="vreinterpretq_p16_p128"><div>poly16x8_t <b><b>vreinterpretq_p16_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p128" type="checkbox"><label for="vreinterpretq_u64_p128"><div>uint64x2_t <b><b>vreinterpretq_u64_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_p128" type="checkbox"><label for="vreinterpretq_s64_p128"><div>int64x2_t <b><b>vreinterpretq_s64_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_p128" type="checkbox"><label for="vreinterpretq_f64_p128"><div>float64x2_t <b><b>vreinterpretq_f64_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_p128" type="checkbox"><label for="vreinterpretq_f16_p128"><div>float16x8_t <b><b>vreinterpretq_f16_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
-</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vldrq_p128" type="checkbox"><label for="vldrq_p128"><div>poly128_t <b><b>vldrq_p128</b></b> (poly128_t const * ptr)<span class="right">Load SIMD&amp;FP register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load SIMD&amp;FP Register (register offset). This instruction loads a SIMD&amp;FP register from memory. The address that is used for the load is calculated from a base register value and an offset register value. The offset can be optionally shifted and extended.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ldr-register-simdfp-load-simdfp-register-register-offset">LDR</a> Qd,[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">bits(64) offset = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.ExtendReg.3" title="function: bits(N) ExtendReg(integer reg, ExtendType type, integer shift)">ExtendReg</a>(m, extend_type, shift);
-if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    boolean is_load_store = memop IN {<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_STORE" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_STORE</a>, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a>};
-    SetNotTagCheckedInstruction(is_load_store &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(64) address;
-bits(datasize) data;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-address = address + offset;
-
-case memop of
-    when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_STORE" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_STORE</a>
-        data = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address, datasize DIV 8, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = data;
-
-    when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a>
-        data = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address, datasize DIV 8, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = data;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vstrq_p128" type="checkbox"><label for="vstrq_p128"><div>void <b><b>vstrq_p128</b></b> (poly128_t * ptr, poly128_t val)<span class="right">Store SIMD&amp;FP register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store SIMD&amp;FP register (register offset). This instruction stores a single SIMD&amp;FP register to memory. The address that is used for the store is calculated from a base register value and an offset register value. The offset can be optionally shifted and extended.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/str-register-simdfp-store-simdfp-register-register-offset">STR</a> Qt,[Xn]
-</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
-val &rarr; Qt </pre>      <h4>Results</h4>      <pre>void &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">bits(64) offset = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.ExtendReg.3" title="function: bits(N) ExtendReg(integer reg, ExtendType type, integer shift)">ExtendReg</a>(m, extend_type, shift);
-if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
-    boolean is_load_store = memop IN {<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_STORE" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_STORE</a>, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a>};
-    SetNotTagCheckedInstruction(is_load_store &amp;&amp; n == 31);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(64) address;
-bits(datasize) data;
-
-if n == 31 then
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
-else
-    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
-
-address = address + offset;
-
-case memop of
-    when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_STORE" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_STORE</a>
-        data = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address, datasize DIV 8, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = data;
-
-    when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a>
-        data = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address, datasize DIV 8, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
- AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
-        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = data;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaeseq_u8" type="checkbox"><label for="vaeseq_u8"><div>uint8x16_t <b><b>vaeseq_u8</b></b> (uint8x16_t data, uint8x16_t key)<span class="right">AES single round encryption</span></div></label><article>      <h4>Description</h4><p><p class="aml">AES single round encryption.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/aese-aes-single-round-encryption">AESE</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>data &rarr; Vd.16B <br />
-key &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(128) result;
-result = operand1 EOR operand2;
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESSubBytes.1" title="function: bits(128) AESSubBytes(bits(128) op)">AESSubBytes</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESShiftRows.1" title="function: bits(128) AESShiftRows(bits(128) op)">AESShiftRows</a>(result));
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaesdq_u8" type="checkbox"><label for="vaesdq_u8"><div>uint8x16_t <b><b>vaesdq_u8</b></b> (uint8x16_t data, uint8x16_t key)<span class="right">AES single round decryption</span></div></label><article>      <h4>Description</h4><p><p class="aml">AES single round decryption.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/aesd-aes-single-round-decryption">AESD</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>data &rarr; Vd.16B <br />
-key &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(128) result;
-result = operand1 EOR operand2;
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESInvSubBytes.1" title="function: bits(128) AESInvSubBytes(bits(128) op)">AESInvSubBytes</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESInvShiftRows.1" title="function: bits(128) AESInvShiftRows(bits(128) op)">AESInvShiftRows</a>(result));
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaesmcq_u8" type="checkbox"><label for="vaesmcq_u8"><div>uint8x16_t <b><b>vaesmcq_u8</b></b> (uint8x16_t data)<span class="right">AES mix columns</span></div></label><article>      <h4>Description</h4><p><p class="aml">AES mix columns.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/aesmc-aes-mix-columns">AESMC</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>data &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(128) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(128) result;
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESMixColumns.1" title="function: bits(128) AESMixColumns(bits (128) op)">AESMixColumns</a>(operand);
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaesimcq_u8" type="checkbox"><label for="vaesimcq_u8"><div>uint8x16_t <b><b>vaesimcq_u8</b></b> (uint8x16_t data)<span class="right">AES inverse mix columns</span></div></label><article>      <h4>Description</h4><p><p class="aml">AES inverse mix columns.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/aesimc-aes-inverse-mix-columns">AESIMC</a> Vd.16B,Vn.16B
-</pre>      <h4>Argument Preparation</h4><pre>data &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(128) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(128) result;
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESInvMixColumns.1" title="function: bits(128) AESInvMixColumns(bits (128) op)">AESInvMixColumns</a>(operand);
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1cq_u32" type="checkbox"><label for="vsha1cq_u32"><div>uint32x4_t <b><b>vsha1cq_u32</b></b> (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)<span class="right">SHA1 hash update (choose)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 hash update (choose).</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1c-sha1-hash-update-choose">SHA1C</a> Qd,Sn,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>hash_abcd &rarr; Qd <br />
-hash_e &rarr; Sn <br />
-wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(128) X = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(32) Y = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];    // Note: 32 not 128 bits wide
-bits(128) W = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(32) t;
-
-for e = 0 to 3
-    t = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHAchoose.3" title="function: bits(32) SHAchoose(bits(32) x, bits(32) y, bits(32) z)">SHAchoose</a>(X&lt;63:32&gt;, X&lt;95:64&gt;, X&lt;127:96&gt;);
-    Y = Y + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;31:0&gt;, 5) + t + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[W, e, 32];
-    X&lt;63:32&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;63:32&gt;, 30);
-    &lt;Y, X&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(Y:X, 32);
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = X;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1pq_u32" type="checkbox"><label for="vsha1pq_u32"><div>uint32x4_t <b><b>vsha1pq_u32</b></b> (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)<span class="right">SHA1 hash update (parity)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 hash update (parity).</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1p-sha1-hash-update-parity">SHA1P</a> Qd,Sn,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>hash_abcd &rarr; Qd <br />
-hash_e &rarr; Sn <br />
-wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(128) X = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(32) Y = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];    // Note: 32 not 128 bits wide
-bits(128) W = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(32) t;
-
-for e = 0 to 3
-    t = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHAparity.3" title="function: bits(32) SHAparity(bits(32) x, bits(32) y, bits(32) z)">SHAparity</a>(X&lt;63:32&gt;, X&lt;95:64&gt;, X&lt;127:96&gt;);
-    Y = Y + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;31:0&gt;, 5) + t + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[W, e, 32];
-    X&lt;63:32&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;63:32&gt;, 30);
-    &lt;Y, X&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(Y:X, 32);
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = X;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1mq_u32" type="checkbox"><label for="vsha1mq_u32"><div>uint32x4_t <b><b>vsha1mq_u32</b></b> (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)<span class="right">SHA1 hash update (majority)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 hash update (majority).</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1m-sha1-hash-update-majority">SHA1M</a> Qd,Sn,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>hash_abcd &rarr; Qd <br />
-hash_e &rarr; Sn <br />
-wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(128) X = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(32) Y = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];    // Note: 32 not 128 bits wide
-bits(128) W = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(32) t;
-
-for e = 0 to 3
-    t = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHAmajority.3" title="function: bits(32) SHAmajority(bits(32) x, bits(32) y, bits(32) z)">SHAmajority</a>(X&lt;63:32&gt;, X&lt;95:64&gt;, X&lt;127:96&gt;);
-    Y = Y + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;31:0&gt;, 5) + t + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[W, e, 32];
-    X&lt;63:32&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;63:32&gt;, 30);
-    &lt;Y, X&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(Y:X, 32);
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = X;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1h_u32" type="checkbox"><label for="vsha1h_u32"><div>uint32_t <b><b>vsha1h_u32</b></b> (uint32_t hash_e)<span class="right">SHA1 fixed rotate</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 fixed rotate.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1h-sha1-fixed-rotate">SHA1H</a> Sd,Sn
-</pre>      <h4>Argument Preparation</h4><pre>hash_e &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(32) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];    // read element [0] only,  [1-3] zeroed
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(operand, 30);</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1su0q_u32" type="checkbox"><label for="vsha1su0q_u32"><div>uint32x4_t <b><b>vsha1su0q_u32</b></b> (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)<span class="right">SHA1 schedule update 0</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 schedule update 0.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1su0-sha1-schedule-update-0">SHA1SU0</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>w0_3 &rarr; Vd.4S <br />
-w4_7 &rarr; Vn.4S <br />
-w8_11 &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(128) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128) result;
-
-result = operand2&lt;63:0&gt;:operand1&lt;127:64&gt;;
-result = result EOR operand1 EOR operand3;
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1su1q_u32" type="checkbox"><label for="vsha1su1q_u32"><div>uint32x4_t <b><b>vsha1su1q_u32</b></b> (uint32x4_t tw0_3, uint32x4_t w12_15)<span class="right">SHA1 schedule update 1</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 schedule update 1.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1su1-sha1-schedule-update-1">SHA1SU1</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>tw0_3 &rarr; Vd.4S <br />
-w12_15 &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(128) result;
-bits(128) T = operand1 EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(operand2, 32);
-result&lt;31:0&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;31:0&gt;, 1);
-result&lt;63:32&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;63:32&gt;, 1);
-result&lt;95:64&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;95:64&gt;, 1);
-result&lt;127:96&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;127:96&gt;, 1) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;31:0&gt;, 2);
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha256hq_u32" type="checkbox"><label for="vsha256hq_u32"><div>uint32x4_t <b><b>vsha256hq_u32</b></b> (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)<span class="right">SHA256 hash update (part 1)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA256 hash update (part 1).</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha256h-sha256-hash-update-part-1">SHA256H</a> Qd,Qn,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>hash_abcd &rarr; Qd <br />
-hash_efgh &rarr; Qn <br />
-wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(128) result;
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHA256hash.4" title="function: bits(128) SHA256hash(bits (128) X, bits(128) Y, bits(128) W, boolean part1)">SHA256hash</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d], <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n], <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m], TRUE);
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha256h2q_u32" type="checkbox"><label for="vsha256h2q_u32"><div>uint32x4_t <b><b>vsha256h2q_u32</b></b> (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)<span class="right">SHA256 hash update (part 2)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA256 hash update (part 2).</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha256h2-sha256-hash-update-part-2">SHA256H2</a> Qd,Qn,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>hash_efgh &rarr; Qd <br />
-hash_abcd &rarr; Qn <br />
-wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(128) result;
-result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHA256hash.4" title="function: bits(128) SHA256hash(bits (128) X, bits(128) Y, bits(128) W, boolean part1)">SHA256hash</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n], <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d], <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m], FALSE);
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha256su0q_u32" type="checkbox"><label for="vsha256su0q_u32"><div>uint32x4_t <b><b>vsha256su0q_u32</b></b> (uint32x4_t w0_3, uint32x4_t w4_7)<span class="right">SHA256 schedule update 0</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA256 schedule update 0.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha256su0-sha256-schedule-update-0">SHA256SU0</a> Vd.4S,Vn.4S
-</pre>      <h4>Argument Preparation</h4><pre>w0_3 &rarr; Vd.4S <br />
-w4_7 &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(128) result;
-bits(128) T = operand2&lt;31:0&gt;:operand1&lt;127:32&gt;;
-bits(32) elt;
-
-for e = 0 to 3
-    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T, e, 32];
-    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 7) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 18) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(elt, 3);
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = elt + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 32];
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha256su1q_u32" type="checkbox"><label for="vsha256su1q_u32"><div>uint32x4_t <b><b>vsha256su1q_u32</b></b> (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)<span class="right">SHA256 schedule update 1</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA256 schedule update 1.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha256su1-sha256-schedule-update-1">SHA256SU1</a> Vd.4S,Vn.4S,Vm.4S
-</pre>      <h4>Argument Preparation</h4><pre>tw0_3 &rarr; Vd.4S <br />
-w8_11 &rarr; Vn.4S <br />
-w12_15 &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
-
-bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
-bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
-bits(128) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
-bits(128) result;
-bits(128) T0 = operand3&lt;31:0&gt;:operand2&lt;127:32&gt;;
-bits(64) T1;
-bits(32) elt;
-
-T1 = operand3&lt;127:64&gt;;
-for e = 0 to 1
-    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T1, e, 32];
-    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 17) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 19) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(elt, 10);
-    elt = elt + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 32] + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T0, e, 32];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = elt;
-
-T1 = result&lt;63:0&gt;;
-for e = 2 to 3
-    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T1, e-2, 32];
-    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 17) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 19) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(elt, 10);
-    elt = elt + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 32] + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T0, e, 32];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = elt;
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_p64" type="checkbox"><label for="vmull_p64"><div>poly128_t <b><b>vmull_p64</b></b> (poly64_t a, poly64_t b)<span class="right">Polynomial multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmull-pmull2-polynomial-multiply-long">PMULL</a> Vd.1Q,Vn.1D,Vm.1D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.1D <br />
-b &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_p64" type="checkbox"><label for="vmull_high_p64"><div>poly128_t <b><b>vmull_high_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Polynomial multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmull-pmull2-polynomial-multiply-long">PMULL2</a> Vd.1Q,Vn.2D,Vm.2D
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
-b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
-bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
-bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
-bits(2*datasize) result;
-bits(esize) element1;
-bits(esize) element2;
-
-for e = 0 to elements-1
-    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
-    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
-    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2);
-
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32b" type="checkbox"><label for="__crc32b"><div>uint32_t <b><b>__crc32b</b></b> (uint32_t a, uint8_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32b-crc32h-crc32w-crc32x-crc32-checksum">CRC32B</a> Wd,Wn,Wm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
-b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
-bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
-bits(32) poly = 0x04C11DB7&lt;31:0&gt;;
-
-bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
-bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
-
-// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32h" type="checkbox"><label for="__crc32h"><div>uint32_t <b><b>__crc32h</b></b> (uint32_t a, uint16_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32b-crc32h-crc32w-crc32x-crc32-checksum">CRC32H</a> Wd,Wn,Wm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
-b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
-bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
-bits(32) poly = 0x04C11DB7&lt;31:0&gt;;
-
-bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
-bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
-
-// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32w" type="checkbox"><label for="__crc32w"><div>uint32_t <b><b>__crc32w</b></b> (uint32_t a, uint32_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32b-crc32h-crc32w-crc32x-crc32-checksum">CRC32W</a> Wd,Wn,Wm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
-b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
-bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
-bits(32) poly = 0x04C11DB7&lt;31:0&gt;;
-
-bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
-bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
-
-// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32d" type="checkbox"><label for="__crc32d"><div>uint32_t <b><b>__crc32d</b></b> (uint32_t a, uint64_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32b-crc32h-crc32w-crc32x-crc32-checksum">CRC32X</a> Wd,Wn,Xm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
-b &rarr; Xm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
-bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
-bits(32) poly = 0x04C11DB7&lt;31:0&gt;;
-
-bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
-bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
-
-// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32cb" type="checkbox"><label for="__crc32cb"><div>uint32_t <b><b>__crc32cb</b></b> (uint32_t a, uint8_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32cb-crc32ch-crc32cw-crc32cx-crc32c-checksum">CRC32CB</a> Wd,Wn,Wm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
-b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
-bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
-bits(32) poly = 0x1EDC6F41&lt;31:0&gt;;
-
-bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
-bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
-
-// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32ch" type="checkbox"><label for="__crc32ch"><div>uint32_t <b><b>__crc32ch</b></b> (uint32_t a, uint16_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32cb-crc32ch-crc32cw-crc32cx-crc32c-checksum">CRC32CH</a> Wd,Wn,Wm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
-b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
-bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
-bits(32) poly = 0x1EDC6F41&lt;31:0&gt;;
-
-bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
-bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
-
-// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32cw" type="checkbox"><label for="__crc32cw"><div>uint32_t <b><b>__crc32cw</b></b> (uint32_t a, uint32_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32cb-crc32ch-crc32cw-crc32cx-crc32c-checksum">CRC32CW</a> Wd,Wn,Wm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
-b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
-bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
-bits(32) poly = 0x1EDC6F41&lt;31:0&gt;;
-
-bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
-bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
-
-// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32cd" type="checkbox"><label for="__crc32cd"><div>uint32_t <b><b>__crc32cd</b></b> (uint32_t a, uint64_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.</p>
-</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32cb-crc32ch-crc32cw-crc32cx-crc32c-checksum">CRC32CX</a> Wd,Wn,Xm
-</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
-b &rarr; Xm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
-</pre>  <h4>Operation</h4>
-<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
-bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
-bits(32) poly = 0x1EDC6F41&lt;31:0&gt;;
-
-bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
-bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
-
-// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
-<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
-      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div>
-</section>
-            
-        </div>
-    </div>    
-</div>
-
-</div>
-<!-- END ProductItemContent -->
-
-</main>
-
-
-<footer class="c-footer u-no-print" id="footer">
-
-    <!-- START Newsletter -->
-    <div class="c-footer__newsletter">
-        <div class="row">
-            <div class="columns">
-                <!-- START NewsLetterHorizontal -->
-<div class="c-widget c-newsletter-widget is-horizontal">
-    
-    
-    <!-- Newsletter form -->
-    <div class="row">
-        <div class="large-4 columns">
-            <h4 class="c-newsletter-widget__title">Stay Informed</h4>
-            <span class="c-newsletter-widget__description">Sign up for news and updates.</span>
-        </div>
-        <div class="large-8 columns">
-            <div class="row">
-                <form class="js-newsletter__form" data-abide>
-                    <div class="large-3 columns">
-                        <label for="news_sign_first_name" class="hide">First Name</label>
-                        <input type="text" name="First Name" id="news_sign_first_id" class="input" placeholder="First Name" required />
-                        <small class="error">Please enter your first name.</small>
-                    </div>
-                    <div class="large-3 columns">
-                        <label for="news_sign_last_name" class="hide">Last Name</label>
-                        <input type="text" name="Last Name" id="news_sign_last_id" class="input" placeholder="Last Name" required />
-                        <small class="error">Please enter your last name.</small>
-                    </div>
-                    <div class="large-4 columns">
-                        <label for="news_sign_email_address" class="hide">E-Mail</label>
-                        <input type="email" name="Email Address" id="news_sign_email_address_id" class="input" placeholder="E-Mail" required />
-                        <small class="error">Please enter your e-mail.</small>
-                    </div>
-                    <div class="large-2 columns end">
-                        <input type="submit" value="Sign up" class="c-button" style="margin-bottom: 0; margin-top: 0;" />
-                    </div>
-                </form>
-            </div>
-        </div>
-    </div>
-    <!-- END newsletter form -->
-
-    <!-- START newsletter modal window -->
-    <div class="reveal-modal medium" id="newsletterModal" data-reveal aria-hidden="true" role="dialog">
-        <div class="modal-hider">
-            <iframe width="100%" height="100%" data-src="/forms/newsletter-signup"></iframe>
-        </div>
-        <p>
-            <a class="close-reveal-modal" aria-label="Close">&#215;</a>
-        </p>
-    </div>
-    <!-- END newsletter modal window -->
-
-</div>
-<!-- End NewsLetterHorizontal -->
-
-            </div>
-        </div>
-    </div>
-    <!-- END Newsletter -->
-
-    <!-- START Internal Footer -->
-    <div class="c-footer__internal">
-        <div class="row small-text-center large-text-left">
-            <div class="large-3 columns spacing-3 pushing-3">
-                <!-- START Footer Section -->
-            <h3 class="c-footer-section__title">
-Arm Developer            </h3>
-            <div class="row">
-                <div class="columns">
-                    <ul class="o-list c-footer-section__list">
-                            <li><a class="c-footer-section__link" href="/embedded" title="Embedded Software Developers">Embedded Software Developers</a></li>
-    <li><a class="c-footer-section__link" href="/open-source" title="Linux and Open Source">Linux and Open Source</a></li>
-    <li><a class="c-footer-section__link" href="https://www.arm.com/resources/education" title="Education">Education</a></li>
-    <li><a class="c-footer-section__link" href="https://www.arm.com/resources/research" title="Research">Research</a></li>
-    <li><a class="c-footer-section__link" href="/graphics" title="Graphics and Multimedia Development">Graphics and Multimedia Development</a></li>
-    <li><a class="c-footer-section__link" href="/soc" title="SoC Design">SoC Design</a></li>
-    <li><a class="c-footer-section__link" href="/hpc" title="High Performance Computing">High Performance Computing</a></li>
-
-
-                    </ul>
-                </div>
-            </div>
-            <h3 class="c-footer-section__title">
-                    <a href="/products/architecture" title="Architecture" class="home">Architecture</a>
-            </h3>
-            <div class="row">
-                <div class="columns">
-                    <ul class="o-list c-footer-section__list">
-                        
-        <li><a class="c-footer-section__link" href="/products/architecture/cpu-architecture" title="CPU Architecture">CPU Architecture</a></li>
-        <li><a class="c-footer-section__link" href="/products/architecture/system-architectures" title="System Architectures">System Architectures</a></li>
-        <li><a class="c-footer-section__link" href="/products/architecture/security-architectures" title="Security Architectures">Security Architectures</a></li>
-        <li><a class="c-footer-section__link" href="/products/architecture/instruction-sets" title="Instruction Sets">Instruction Sets</a></li>
-        <li><a class="c-footer-section__link" href="/products/architecture/platform-design" title="Platform Design">Platform Design</a></li>
-        <li><a class="c-footer-section__link" href="/products/architecture/reference-library" title="Reference Library">Reference Library</a></li>
-
-                    </ul>
-                </div>
-            </div>
-<!-- END Footer Section -->
-
-            </div>
-            <div class="large-offset-1 large-8 columns spacing-3 pushing-3">
-                <!-- START Internal Right -->
-
-
-            <h3 class="c-footer-section__title">
-                    <a href="/products" title="IP Products" class="home">IP Products</a>
-            </h3>
-<div class="row">                <div class="large-4 medium-6 columns left">
-                    <ul class="o-list c-footer-section__list">
-                        <li>
-
-                            <a href="/products/processors" title="Processors">
-                                    <strong class="c-footer-section__subtitle">Processors</strong>
-                            </a>
-                        </li>
-                        
-        <li><a class="c-footer-section__link" href="/products/processors/cortex-a" title="Cortex-A">Cortex-A</a></li>
-        <li><a class="c-footer-section__link" href="/products/processors/cortex-r" title="Cortex-R">Cortex-R</a></li>
-        <li><a class="c-footer-section__link" href="/products/processors/cortex-m" title="Cortex-M">Cortex-M</a></li>
-        <li><a class="c-footer-section__link" href="/products/processors/classic-processors" title="Classic Processors">Classic Processors</a></li>
-        <li><a class="c-footer-section__link" href="/products/processors/machine-learning" title="Machine Learning">Machine Learning</a></li>
-
-                    </ul>
-                </div>
-                <div class="large-4 medium-6 columns left">
-                    <ul class="o-list c-footer-section__list">
-                        <li>
-
-                            <a href="/products/physical-ip" title="Physical IP">
-                                    <strong class="c-footer-section__subtitle">Physical IP</strong>
-                            </a>
-                        </li>
-                        
-        <li><a class="c-footer-section__link" href="/products/physical-ip/logic-ip" title="Logic IP">Logic IP</a></li>
-        <li><a class="c-footer-section__link" href="/products/physical-ip/memory-compilers" title="Memory Compilers">Memory Compilers</a></li>
-        <li><a class="c-footer-section__link" href="/products/physical-ip/interface-ip" title="Interface IP">Interface IP</a></li>
-        <li><a class="c-footer-section__link" href="/products/physical-ip/pop-ip" title="POP IP">POP IP</a></li>
-
-                    </ul>
-                </div>
-                <div class="large-4 medium-6 columns left">
-                    <ul class="o-list c-footer-section__list">
-                        <li>
-
-                            <a href="/products/system-ip" title="System IP">
-                                    <strong class="c-footer-section__subtitle">System IP</strong>
-                            </a>
-                        </li>
-                        
-        <li><a class="c-footer-section__link" href="/products/system-ip/free-system-ip-whitepapers" title="Free System IP Whitepapers">Free System IP Whitepapers</a></li>
-        <li><a class="c-footer-section__link" href="/products/system-ip/corelink-interconnect" title="CoreLink Interconnect">CoreLink Interconnect</a></li>
-        <li><a class="c-footer-section__link" href="/products/system-ip/coresight-debug-and-trace" title="CoreSight Debug and Trace">CoreSight Debug and Trace</a></li>
-        <li><a class="c-footer-section__link" href="/products/system-ip/socrates-system-builder" title="Socrates System Builder">Socrates System Builder</a></li>
-        <li><a class="c-footer-section__link" href="/products/system-ip/memory-controllers" title="CoreLink Memory Controllers">CoreLink Memory Controllers</a></li>
-        <li><a class="c-footer-section__link" href="/products/system-ip/system-controllers" title="CoreLink System Controllers">CoreLink System Controllers</a></li>
-        <li><a class="c-footer-section__link" href="/products/system-ip/security-ip" title="Security IP">Security IP</a></li>
-
-                    </ul>
-                </div>
-</div><div class="row">                <div class="large-4 medium-6 columns left">
-                    <ul class="o-list c-footer-section__list">
-                        <li>
-
-                            <a href="/products/graphics-and-multimedia" title="Graphics and Multimedia Processors">
-                                    <strong class="c-footer-section__subtitle">Graphics and Multimedia Processors</strong>
-                            </a>
-                        </li>
-                        
-        <li><a class="c-footer-section__link" href="/products/graphics-and-multimedia/mali-gpus" title="Mali GPUs">Mali GPUs</a></li>
-        <li><a class="c-footer-section__link" href="/products/graphics-and-multimedia/mali-video-processors" title="Mali Video Processors">Mali Video Processors</a></li>
-        <li><a class="c-footer-section__link" href="/products/graphics-and-multimedia/mali-display-processors" title="Mali Display Processors">Mali Display Processors</a></li>
-        <li><a class="c-footer-section__link" href="/products/graphics-and-multimedia/assertive-display" title="Assertive Display">Assertive Display</a></li>
-
-                    </ul>
-                </div>
-                <div class="large-4 medium-6 columns left">
-                    <ul class="o-list c-footer-section__list">
-                        <li>
-
-                            <a href="/products/system-design" title="System Design Tools">
-                                    <strong class="c-footer-section__subtitle">System Design Tools</strong>
-                            </a>
-                        </li>
-                        
-        <li><a class="c-footer-section__link" href="/products/system-design/subsystems" title="Subsystems">Subsystems</a></li>
-        <li><a class="c-footer-section__link" href="/products/system-design/corstone-foundation-ip" title="Corstone Foundation IP">Corstone Foundation IP</a></li>
-        <li><a class="c-footer-section__link" href="/products/system-design/system-guidance" title="System Guidance">System Guidance</a></li>
-        <li><a class="c-footer-section__link" href="/products/system-design/fast-models" title="Fast Models">Fast Models</a></li>
-        <li><a class="c-footer-section__link" href="/products/system-design/cycle-models" title="Cycle Models">Cycle Models</a></li>
-        <li><a class="c-footer-section__link" href="/products/system-design/development-boards" title="Development Boards">Development Boards</a></li>
-        <li><a class="c-footer-section__link" href="/products/system-design/fixed-virtual-platforms" title="Fixed Virtual Platforms">Fixed Virtual Platforms</a></li>
-
-                    </ul>
-                </div>
-                <div class="large-4 medium-6 columns left">
-                    <ul class="o-list c-footer-section__list">
-                        <li>
-
-                            <a href="/products/software-development-tools" title="Software Tools">
-                                    <strong class="c-footer-section__subtitle">Software Tools</strong>
-                            </a>
-                        </li>
-                        
-        <li><a class="c-footer-section__link" href="/products/software-development-tools/arm-development-studio" title="Arm Development Studio">Arm Development Studio</a></li>
-        <li><a class="c-footer-section__link" href="/products/software-development-tools/graphics-development-tools" title="Graphics Development Tools">Graphics Development Tools</a></li>
-        <li><a class="c-footer-section__link" href="/products/software-development-tools/buy" title="Buy">Buy</a></li>
-        <li><a class="c-footer-section__link" href="/products/software-development-tools/compilers" title="Compilers">Compilers</a></li>
-        <li><a class="c-footer-section__link" href="/products/software-development-tools/hpc" title="HPC">HPC</a></li>
-        <li><a class="c-footer-section__link" href="/products/software-development-tools/keil-mdk" title="Keil MDK">Keil MDK</a></li>
-        <li><a class="c-footer-section__link" href="/products/software-development-tools/debug-probes-and-adapters" title="Debug Probes and Adapters">Debug Probes and Adapters</a></li>
-
-                    </ul>
-                </div>
-</div><!-- END Internal Right -->
-
-            </div>
-        </div>
-    </div>
-    <!-- END Internal Footer -->
-
-    <!-- START External Footer -->
-    <div class="c-footer__external">
-
-        <!-- START External links -->
-        <div class="row small-text-center large-text-left">
-            <div class="large-4 spacing-3 columns">
-                <!-- START Footer Section -->
-            <h3 class="c-footer-section__title">
-Arm Corporate            </h3>
-            <div class="row">
-                <div class="columns">
-                    <ul class="o-list c-footer-section__list">
-                            <li><a class="c-footer-section__link" href="https://www.arm.com/" title="arm.com">arm.com</a></li>
-    <li><a class="c-footer-section__link" href="https://www.arm.com/company" title="Company Profile">Company Profile</a></li>
-    <li><a class="c-footer-section__link" href="https://www.arm.com/company/careers" title="Careers">Careers</a></li>
-    <li><a class="c-footer-section__link" href="https://www.arm.com/company/news" title="Newsroom">Newsroom</a></li>
-    <li><a class="c-footer-section__link" href="https://www.arm.com/company/offices" title="Our Offices">Our Offices</a></li>
-
-
-                    </ul>
-                </div>
-            </div>
-<!-- END Footer Section -->
-
-            </div>
-            <!-- START External Right -->
-        <div class="large-4 spacing-3 columns">
-                <h3 class="c-footer-section__title">
-More                </h3>
-                <div class="row">
-                    <div class="columns">
-                        <ul class="o-list c-footer-section__list">
-                                <li><a class="c-footer-section__link" href="http://community.arm.com/" title="Arm Community">Arm Community</a></li>
-    <li><a class="c-footer-section__link" href="/support" title="Support">Support</a></li>
-    <li><a class="c-footer-section__link" href="/products/designstart" title="DesignStart">DesignStart</a></li>
-    <li><a class="c-footer-section__link" href="http://www.keil.com/" title="Keil Tools">Keil Tools</a></li>
-    <li><a class="c-footer-section__link" href="/graphics" title="Mali Developer">Mali Developer</a></li>
-
-
-                        </ul>
-                    </div>
-                </div>
-        </div>
-        <div class="large-4 spacing-3 columns">
-                <h3 class="c-footer-section__title">
-Social                </h3>
-                <div class="row">
-                    <div class="columns">
-                        <ul class="o-list c-footer-section__list">
-                                <li><a class="c-footer-section__link" href="https://www.facebook.com/pg/ARM-71946799587/" title="Facebook">Facebook</a></li>
-    <li><a class="c-footer-section__link" href="http://www.linkedin.com/company/Arm" title="LinkedIn">LinkedIn</a></li>
-    <li><a class="c-footer-section__link" href="https://twitter.com/Arm" title="Twitter">Twitter</a></li>
-    <li><a class="c-footer-section__link" href="https://www.youtube.com/user/Armflix" title="YouTube">YouTube</a></li>
-    <li><a class="c-footer-section__link" href="http://i.youku.com/armchina" title="优酷 (YouKu)">优酷 (YouKu)</a></li>
-    <li><a class="c-footer-section__link" href="http://weibo.com/armcn" title="@Arm中国 (Arm Sina)">@Arm中国 (Arm Sina)</a></li>
-
-
-                        </ul>
-                    </div>
-                </div>
-        </div>
-<!-- END External Right -->
-
-        </div>
-        <!-- END External Links -->
-
-        <!-- START Trademark Statement -->
-        <div class="row small-text-center large-text-left">
-            <div class="columns spacing-1">
-                <div class="c-footer__trademark">
-                    AMBA, Arm, Arm7, Arm9, Arm11, Artisan, big.LITTLE, Cordio, CoreLink, CoreSight,
-Cortex, DesignStart, Jazelle, Keil, Mali, Mbed, NEON, POP, SecurCore, Socrates,
-Thumb, TrustZone, ULINK, &#181;Vision, Versatile are trademarks or registered trademarks
-of Arm Limited (or its subsidiaries) in the US and/or elsewhere.
-The related technology may be protected by any or all of patents, copyrights,
-designs and trade secrets. All rights reserved. All other brands or product 
-names are the property of their respective holders. <a href="http://www.arm.com/about/trademarks/">Click here for further details</a>.
-                </div>
-            </div>
-        </div>
-        <!-- END Trademark Statement -->
-
-        <!-- START Legal -->
-        <div class="c-legal row small-text-center large-text-left" role="contentinfo">
-            <div class="large-1 spacing-3 columns">
-                <div class="c-legal__logo"></div>
-            </div>
-            <div class="large-8 spacing-3 columns">
-                    <p class="c-legal__links">
-
-                <a href="https://www.arm.com/company/policies/cookies">Cookie Policy</a>
- |                <a href="https://www.arm.com/company/policies/terms-and-conditions">Terms of Use</a>
- |                <a href="https://www.arm.com/company/policies/privacy">Privacy Policy</a>
- |                <a href="https://www.arm.com/company/policies/accessibility">Accessibility</a>
- |                <a href="https://login.arm.com/subscriptions.php">Subscription Center</a>
- |                <a href="https://www.arm.com/company/policies/trademarks">Trademarks</a>
-        <br class="hide-for-large-up" />
-    </p>
-
-            </div>
-            <div class="large-3 spacing-3 columns">
-                <p class="c-legal__copyright">Copyright &#169; 1995-2018 Arm Limited (or its affiliates). All rights reserved. </p>
-            </div>
-        </div>
-        <!-- END Legal -->
-
-    </div>
-    <!-- END External Footer -->
-
-</footer>
-
-
-
-    <div class="c-component c-policies u-no-print" role="contentinfo">
-            <div class="c-component c-policy c-cookie-policy js-policy" data-key="com.arm.accepted.cookie" data-updated="01/02/2018 16:28:25" data-iscookiepolicy="true" title="Cookie Policy" role="alert" style="display: none;">
-        <div class="row">
-            <div class="small-12 large-9 small-text-center large-text-left columns">
-                <p>Important Information for the Arm website. This site uses cookies to store information on your computer. By continuing to use our site, you consent to our cookies. If you are not happy with the use of these cookies, please review our <a class="cookie-link" target="_blank" href="http://www.arm.com/about/cookie_policy.php" title="Cookie Policy">Cookie Policy</a> to learn how they can be disabled. By disabling cookies, some features of the site will not work.</p>
-            </div>
-            <div class="small-12 large-3 text-center columns">
-                <a class="c-button c-policy__accept-button js-accept-policy" tabindex="1" title="Accept and hide this message ">Accept and hide this message  <i class="fa fa-times"></i></a>
-            </div>
-        </div>
-    </div>
-
-        
-    </div>
-
-<script type="text/javascript" src="https://nebula-cdn.kampyle.com/we/8144/onsite/embed.js"></script>
-
-
-    
-
-<script src="/bundles/modernizr?v=inCVuEFe6J4Q07A0AcRsbJic_UE5MwpRMNGcOtk94TE1"></script>
-
-
-
-<script type="text/javascript">
-    if (Modernizr && !Modernizr.svg) {
-        var imgs = document.getElementsByTagName('img');
-        var svgExtension = /.*\.svg$/;
-        var l = imgs.length;
-        for (var i = 0; i < l; i++) {
-            if (imgs[i].src.match(svgExtension)) {
-                imgs[i].src = imgs[i].src.slice(0, -3) + 'png';
-            }
-        }
-    }
-</script>
-
-
-<script src="/shared/vendor/jquery-1.12.4.min.js"></script>
-<script src="/shared/vendor/foundation.min.js"></script>
-<script src="/shared/vendor/moment.min.js"></script>
-<script src="/shared/vendor/js/jquery-rss/src/jquery.rss.js"></script>
-
-<script src="/bundles/clipboard?v=IPc2U7tMxf_2TKh6_qbfzIsYI3pmBbWZxHb5M8V-fhg1"></script>
-
-<script src="/bundles/placeholder?v=Aw-bm4sJPSuBeTzPpRw_GfXYXI4wKmH607vgMic22c01"></script>
-
-<script src="/bundles/waypoints?v=E5Sm2NPVxzLqGyd5lIz-NjBvArn4w7w7IvCs35wz6dA1"></script>
-
-
-
-<script src="/shared/developer.arm.com/js/common.js?v=09142182FF441DC932039AB1D8CD216F"></script>
-<script src="/shared/developer.arm.com/js/app.bundle.js?v=09142182FF441DC932039AB1D8CD216F"></script>
-
-
-<script src="/shared/arm.com-new/js/app.constants.js?v=09142182FF441DC932039AB1D8CD216F"></script>
-<script src="/shared/arm.com-new/js/app.navigation.js?v=09142182FF441DC932039AB1D8CD216F"></script>
-<script type="text/javascript">
-    (function() {
-        var $userMenu = $('.c-user-menu__root');
-        if ($userMenu) {
-            $userMenu.navigation();
-        }
-    })();
-</script>
-
-
-
-<script src="/bundles/jquery-ui?v=atr-jO-t-9RdxuVusckf7yNy0MEEBlVW5TaJCAetR6A1"></script>
-
-<script src="/bundles/jqueryval?v=shBfM8gvrYJt6eNs9xKMaOYfzyGdVGLhvPUMJ92MwmM1"></script>
-
-<script src="/sitecore%20modules/Web/Web%20Forms%20for%20Marketers/mvc/wffm.min.js"></script>
-<script>
-  $(document).ready(function() {
-    $("form[data-wffm]").each(function() { $(this).wffmForm(); });
-  });
-</script>
-
-<link rel="stylesheet" type="text/css" href="//fast.fonts.net/t/1.css?apiType=css&projectid=5616bfa5-8ba9-4061-8e15-3a2d29551ced" />
-
-
-<script src="//munchkin.marketo.net/munchkin.js" type="text/javascript"></script>
-<script type="text/javascript">
-    Munchkin.init('312-SAX-488', {'asyncOnly': true});
-</script>
-
-
-
-    
-    
-    
-</body>
-</html>
diff --git a/library/stdarch/crates/stdsimd-verify/build.rs b/library/stdarch/crates/stdsimd-verify/build.rs
deleted file mode 100644
index c0dc81b6a61..00000000000
--- a/library/stdarch/crates/stdsimd-verify/build.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-use std::path::Path;
-
-fn main() {
-    let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
-    let root = dir.parent().unwrap();
-    eprintln!("root: {}", root.display());
-    walk(&root.join("core_arch/src/x86"));
-    walk(&root.join("core_arch/src/x86_64"));
-    walk(&root.join("core_arch/src/arm"));
-    walk(&root.join("core_arch/src/aarch64"));
-}
-
-fn walk(root: &Path) {
-    for file in root.read_dir().unwrap() {
-        eprintln!("root: {}", root.display());
-        let file = file.unwrap();
-        if file.file_type().unwrap().is_dir() {
-            walk(&file.path());
-            continue;
-        }
-        let path = file.path();
-        if path.extension().and_then(|s| s.to_str()) != Some("rs") {
-            continue;
-        }
-
-        println!("cargo:rerun-if-changed={}", path.display());
-    }
-}
diff --git a/library/stdarch/crates/stdsimd-verify/mips-msa.h b/library/stdarch/crates/stdsimd-verify/mips-msa.h
deleted file mode 100644
index 881f1918f6b..00000000000
--- a/library/stdarch/crates/stdsimd-verify/mips-msa.h
+++ /dev/null
@@ -1,707 +0,0 @@
-v16i8 __builtin_msa_add_a_b (v16i8, v16i8);
-v8i16 __builtin_msa_add_a_h (v8i16, v8i16);
-v4i32 __builtin_msa_add_a_w (v4i32, v4i32);
-v2i64 __builtin_msa_add_a_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_adds_a_b (v16i8, v16i8);
-v8i16 __builtin_msa_adds_a_h (v8i16, v8i16);
-v4i32 __builtin_msa_adds_a_w (v4i32, v4i32);
-v2i64 __builtin_msa_adds_a_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_adds_s_b (v16i8, v16i8);
-v8i16 __builtin_msa_adds_s_h (v8i16, v8i16);
-v4i32 __builtin_msa_adds_s_w (v4i32, v4i32);
-v2i64 __builtin_msa_adds_s_d (v2i64, v2i64);
-
-v16u8 __builtin_msa_adds_u_b (v16u8, v16u8);
-v8u16 __builtin_msa_adds_u_h (v8u16, v8u16);
-v4u32 __builtin_msa_adds_u_w (v4u32, v4u32);
-v2u64 __builtin_msa_adds_u_d (v2u64, v2u64);
-
-v16i8 __builtin_msa_addv_b (v16i8, v16i8);
-v8i16 __builtin_msa_addv_h (v8i16, v8i16);
-v4i32 __builtin_msa_addv_w (v4i32, v4i32);
-v2i64 __builtin_msa_addv_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_addvi_b (v16i8, imm0_31);
-v8i16 __builtin_msa_addvi_h (v8i16, imm0_31);
-v4i32 __builtin_msa_addvi_w (v4i32, imm0_31);
-v2i64 __builtin_msa_addvi_d (v2i64, imm0_31);
-
-v16u8 __builtin_msa_and_v (v16u8, v16u8);
-
-v16u8 __builtin_msa_andi_b (v16u8, imm0_255);
-
-v16i8 __builtin_msa_asub_s_b (v16i8, v16i8);
-v8i16 __builtin_msa_asub_s_h (v8i16, v8i16);
-v4i32 __builtin_msa_asub_s_w (v4i32, v4i32);
-v2i64 __builtin_msa_asub_s_d (v2i64, v2i64);
-
-v16u8 __builtin_msa_asub_u_b (v16u8, v16u8);
-v8u16 __builtin_msa_asub_u_h (v8u16, v8u16);
-v4u32 __builtin_msa_asub_u_w (v4u32, v4u32);
-v2u64 __builtin_msa_asub_u_d (v2u64, v2u64);
-
-v16i8 __builtin_msa_ave_s_b (v16i8, v16i8);
-v8i16 __builtin_msa_ave_s_h (v8i16, v8i16);
-v4i32 __builtin_msa_ave_s_w (v4i32, v4i32);
-v2i64 __builtin_msa_ave_s_d (v2i64, v2i64);
-
-v16u8 __builtin_msa_ave_u_b (v16u8, v16u8);
-v8u16 __builtin_msa_ave_u_h (v8u16, v8u16);
-v4u32 __builtin_msa_ave_u_w (v4u32, v4u32);
-v2u64 __builtin_msa_ave_u_d (v2u64, v2u64);
-
-v16i8 __builtin_msa_aver_s_b (v16i8, v16i8);
-v8i16 __builtin_msa_aver_s_h (v8i16, v8i16);
-v4i32 __builtin_msa_aver_s_w (v4i32, v4i32);
-v2i64 __builtin_msa_aver_s_d (v2i64, v2i64);
-
-v16u8 __builtin_msa_aver_u_b (v16u8, v16u8);
-v8u16 __builtin_msa_aver_u_h (v8u16, v8u16);
-v4u32 __builtin_msa_aver_u_w (v4u32, v4u32);
-v2u64 __builtin_msa_aver_u_d (v2u64, v2u64);
-
-v16u8 __builtin_msa_bclr_b (v16u8, v16u8);
-v8u16 __builtin_msa_bclr_h (v8u16, v8u16);
-v4u32 __builtin_msa_bclr_w (v4u32, v4u32);
-v2u64 __builtin_msa_bclr_d (v2u64, v2u64);
-
-v16u8 __builtin_msa_bclri_b (v16u8, imm0_7);
-v8u16 __builtin_msa_bclri_h (v8u16, imm0_15);
-v4u32 __builtin_msa_bclri_w (v4u32, imm0_31);
-v2u64 __builtin_msa_bclri_d (v2u64, imm0_63);
-
-v16u8 __builtin_msa_binsl_b (v16u8, v16u8, v16u8);
-v8u16 __builtin_msa_binsl_h (v8u16, v8u16, v8u16);
-v4u32 __builtin_msa_binsl_w (v4u32, v4u32, v4u32);
-v2u64 __builtin_msa_binsl_d (v2u64, v2u64, v2u64);
-
-v16u8 __builtin_msa_binsli_b (v16u8, v16u8, imm0_7);
-v8u16 __builtin_msa_binsli_h (v8u16, v8u16, imm0_15);
-v4u32 __builtin_msa_binsli_w (v4u32, v4u32, imm0_31);
-v2u64 __builtin_msa_binsli_d (v2u64, v2u64, imm0_63);
-
-v16u8 __builtin_msa_binsr_b (v16u8, v16u8, v16u8);
-v8u16 __builtin_msa_binsr_h (v8u16, v8u16, v8u16);
-v4u32 __builtin_msa_binsr_w (v4u32, v4u32, v4u32);
-v2u64 __builtin_msa_binsr_d (v2u64, v2u64, v2u64);
-
-v16u8 __builtin_msa_binsri_b (v16u8, v16u8, imm0_7);
-v8u16 __builtin_msa_binsri_h (v8u16, v8u16, imm0_15);
-v4u32 __builtin_msa_binsri_w (v4u32, v4u32, imm0_31);
-v2u64 __builtin_msa_binsri_d (v2u64, v2u64, imm0_63);
-
-v16u8 __builtin_msa_bmnz_v (v16u8, v16u8, v16u8);
-
-v16u8 __builtin_msa_bmnzi_b (v16u8, v16u8, imm0_255);
-
-v16u8 __builtin_msa_bmz_v (v16u8, v16u8, v16u8);
-
-v16u8 __builtin_msa_bmzi_b (v16u8, v16u8, imm0_255);
-
-v16u8 __builtin_msa_bneg_b (v16u8, v16u8);
-v8u16 __builtin_msa_bneg_h (v8u16, v8u16);
-v4u32 __builtin_msa_bneg_w (v4u32, v4u32);
-v2u64 __builtin_msa_bneg_d (v2u64, v2u64);
-
-v16u8 __builtin_msa_bnegi_b (v16u8, imm0_7);
-v8u16 __builtin_msa_bnegi_h (v8u16, imm0_15);
-v4u32 __builtin_msa_bnegi_w (v4u32, imm0_31);
-v2u64 __builtin_msa_bnegi_d (v2u64, imm0_63);
-
-i32 __builtin_msa_bnz_b (v16u8);
-i32 __builtin_msa_bnz_h (v8u16);
-i32 __builtin_msa_bnz_w (v4u32);
-i32 __builtin_msa_bnz_d (v2u64);
-
-i32 __builtin_msa_bnz_v (v16u8);
-
-v16u8 __builtin_msa_bsel_v (v16u8, v16u8, v16u8);
-
-v16u8 __builtin_msa_bseli_b (v16u8, v16u8, imm0_255);
-
-v16u8 __builtin_msa_bset_b (v16u8, v16u8);
-v8u16 __builtin_msa_bset_h (v8u16, v8u16);
-v4u32 __builtin_msa_bset_w (v4u32, v4u32);
-v2u64 __builtin_msa_bset_d (v2u64, v2u64);
-
-v16u8 __builtin_msa_bseti_b (v16u8, imm0_7);
-v8u16 __builtin_msa_bseti_h (v8u16, imm0_15);
-v4u32 __builtin_msa_bseti_w (v4u32, imm0_31);
-v2u64 __builtin_msa_bseti_d (v2u64, imm0_63);
-
-i32 __builtin_msa_bz_b (v16u8);
-i32 __builtin_msa_bz_h (v8u16);
-i32 __builtin_msa_bz_w (v4u32);
-i32 __builtin_msa_bz_d (v2u64);
-
-i32 __builtin_msa_bz_v (v16u8);
-
-v16i8 __builtin_msa_ceq_b (v16i8, v16i8);
-v8i16 __builtin_msa_ceq_h (v8i16, v8i16);
-v4i32 __builtin_msa_ceq_w (v4i32, v4i32);
-v2i64 __builtin_msa_ceq_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_ceqi_b (v16i8, imm_n16_15);
-v8i16 __builtin_msa_ceqi_h (v8i16, imm_n16_15);
-v4i32 __builtin_msa_ceqi_w (v4i32, imm_n16_15);
-v2i64 __builtin_msa_ceqi_d (v2i64, imm_n16_15);
-
-i32 __builtin_msa_cfcmsa (imm0_31);
-
-v16i8 __builtin_msa_cle_s_b (v16i8, v16i8);
-v8i16 __builtin_msa_cle_s_h (v8i16, v8i16);
-v4i32 __builtin_msa_cle_s_w (v4i32, v4i32);
-v2i64 __builtin_msa_cle_s_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_cle_u_b (v16u8, v16u8);
-v8i16 __builtin_msa_cle_u_h (v8u16, v8u16);
-v4i32 __builtin_msa_cle_u_w (v4u32, v4u32);
-v2i64 __builtin_msa_cle_u_d (v2u64, v2u64);
-
-v16i8 __builtin_msa_clei_s_b (v16i8, imm_n16_15);
-v8i16 __builtin_msa_clei_s_h (v8i16, imm_n16_15);
-v4i32 __builtin_msa_clei_s_w (v4i32, imm_n16_15);
-v2i64 __builtin_msa_clei_s_d (v2i64, imm_n16_15);
-
-v16i8 __builtin_msa_clei_u_b (v16u8, imm0_31);
-v8i16 __builtin_msa_clei_u_h (v8u16, imm0_31);
-v4i32 __builtin_msa_clei_u_w (v4u32, imm0_31);
-v2i64 __builtin_msa_clei_u_d (v2u64, imm0_31);
-
-v16i8 __builtin_msa_clt_s_b (v16i8, v16i8);
-v8i16 __builtin_msa_clt_s_h (v8i16, v8i16);
-v4i32 __builtin_msa_clt_s_w (v4i32, v4i32);
-v2i64 __builtin_msa_clt_s_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_clt_u_b (v16u8, v16u8);
-v8i16 __builtin_msa_clt_u_h (v8u16, v8u16);
-v4i32 __builtin_msa_clt_u_w (v4u32, v4u32);
-v2i64 __builtin_msa_clt_u_d (v2u64, v2u64);
-
-v16i8 __builtin_msa_clti_s_b (v16i8, imm_n16_15);
-v8i16 __builtin_msa_clti_s_h (v8i16, imm_n16_15);
-v4i32 __builtin_msa_clti_s_w (v4i32, imm_n16_15);
-v2i64 __builtin_msa_clti_s_d (v2i64, imm_n16_15);
-
-v16i8 __builtin_msa_clti_u_b (v16u8, imm0_31);
-v8i16 __builtin_msa_clti_u_h (v8u16, imm0_31);
-v4i32 __builtin_msa_clti_u_w (v4u32, imm0_31);
-v2i64 __builtin_msa_clti_u_d (v2u64, imm0_31);
-
-i32 __builtin_msa_copy_s_b (v16i8, imm0_15);
-i32 __builtin_msa_copy_s_h (v8i16, imm0_7);
-i32 __builtin_msa_copy_s_w (v4i32, imm0_3);
-i64 __builtin_msa_copy_s_d (v2i64, imm0_1);
-
-u32 __builtin_msa_copy_u_b (v16i8, imm0_15);
-u32 __builtin_msa_copy_u_h (v8i16, imm0_7);
-u32 __builtin_msa_copy_u_w (v4i32, imm0_3);
-u64 __builtin_msa_copy_u_d (v2i64, imm0_1);
-
-void __builtin_msa_ctcmsa (imm0_31, i32);
-
-v16i8 __builtin_msa_div_s_b (v16i8, v16i8);
-v8i16 __builtin_msa_div_s_h (v8i16, v8i16);
-v4i32 __builtin_msa_div_s_w (v4i32, v4i32);
-v2i64 __builtin_msa_div_s_d (v2i64, v2i64);
-
-v16u8 __builtin_msa_div_u_b (v16u8, v16u8);
-v8u16 __builtin_msa_div_u_h (v8u16, v8u16);
-v4u32 __builtin_msa_div_u_w (v4u32, v4u32);
-v2u64 __builtin_msa_div_u_d (v2u64, v2u64);
-
-v8i16 __builtin_msa_dotp_s_h (v16i8, v16i8);
-v4i32 __builtin_msa_dotp_s_w (v8i16, v8i16);
-v2i64 __builtin_msa_dotp_s_d (v4i32, v4i32);
-
-v8u16 __builtin_msa_dotp_u_h (v16u8, v16u8);
-v4u32 __builtin_msa_dotp_u_w (v8u16, v8u16);
-v2u64 __builtin_msa_dotp_u_d (v4u32, v4u32);
-
-v8i16 __builtin_msa_dpadd_s_h (v8i16, v16i8, v16i8);
-v4i32 __builtin_msa_dpadd_s_w (v4i32, v8i16, v8i16);
-v2i64 __builtin_msa_dpadd_s_d (v2i64, v4i32, v4i32);
-
-v8u16 __builtin_msa_dpadd_u_h (v8u16, v16u8, v16u8);
-v4u32 __builtin_msa_dpadd_u_w (v4u32, v8u16, v8u16);
-v2u64 __builtin_msa_dpadd_u_d (v2u64, v4u32, v4u32);
-
-v8i16 __builtin_msa_dpsub_s_h (v8i16, v16i8, v16i8);
-v4i32 __builtin_msa_dpsub_s_w (v4i32, v8i16, v8i16);
-v2i64 __builtin_msa_dpsub_s_d (v2i64, v4i32, v4i32);
-
-v8i16 __builtin_msa_dpsub_u_h (v8i16, v16u8, v16u8);
-v4i32 __builtin_msa_dpsub_u_w (v4i32, v8u16, v8u16);
-v2i64 __builtin_msa_dpsub_u_d (v2i64, v4u32, v4u32);
-
-v4f32 __builtin_msa_fadd_w (v4f32, v4f32);
-v2f64 __builtin_msa_fadd_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fcaf_w (v4f32, v4f32);
-v2i64 __builtin_msa_fcaf_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fceq_w (v4f32, v4f32);
-v2i64 __builtin_msa_fceq_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fclass_w (v4f32);
-v2i64 __builtin_msa_fclass_d (v2f64);
-
-v4i32 __builtin_msa_fcle_w (v4f32, v4f32);
-v2i64 __builtin_msa_fcle_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fclt_w (v4f32, v4f32);
-v2i64 __builtin_msa_fclt_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fcne_w (v4f32, v4f32);
-v2i64 __builtin_msa_fcne_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fcor_w (v4f32, v4f32);
-v2i64 __builtin_msa_fcor_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fcueq_w (v4f32, v4f32);
-v2i64 __builtin_msa_fcueq_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fcule_w (v4f32, v4f32);
-v2i64 __builtin_msa_fcule_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fcult_w (v4f32, v4f32);
-v2i64 __builtin_msa_fcult_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fcun_w (v4f32, v4f32);
-v2i64 __builtin_msa_fcun_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fcune_w (v4f32, v4f32);
-v2i64 __builtin_msa_fcune_d (v2f64, v2f64);
-
-v4f32 __builtin_msa_fdiv_w (v4f32, v4f32);
-v2f64 __builtin_msa_fdiv_d (v2f64, v2f64);
-
-v8i16 __builtin_msa_fexdo_h (v4f32, v4f32);
-v4f32 __builtin_msa_fexdo_w (v2f64, v2f64);
-
-v4f32 __builtin_msa_fexp2_w (v4f32, v4i32);
-v2f64 __builtin_msa_fexp2_d (v2f64, v2i64);
-
-v4f32 __builtin_msa_fexupl_w (v8i16);
-v2f64 __builtin_msa_fexupl_d (v4f32);
-
-v4f32 __builtin_msa_fexupr_w (v8i16);
-v2f64 __builtin_msa_fexupr_d (v4f32);
-
-v4f32 __builtin_msa_ffint_s_w (v4i32);
-v2f64 __builtin_msa_ffint_s_d (v2i64);
-
-v4f32 __builtin_msa_ffint_u_w (v4u32);
-v2f64 __builtin_msa_ffint_u_d (v2u64);
-
-v4f32 __builtin_msa_ffql_w (v8i16);
-v2f64 __builtin_msa_ffql_d (v4i32);
-
-v4f32 __builtin_msa_ffqr_w (v8i16);
-v2f64 __builtin_msa_ffqr_d (v4i32);
-
-v16i8 __builtin_msa_fill_b (i32);
-v8i16 __builtin_msa_fill_h (i32);
-v4i32 __builtin_msa_fill_w (i32);
-v2i64 __builtin_msa_fill_d (i64);
-
-v4f32 __builtin_msa_flog2_w (v4f32);
-v2f64 __builtin_msa_flog2_d (v2f64);
-
-v4f32 __builtin_msa_fmadd_w (v4f32, v4f32, v4f32);
-v2f64 __builtin_msa_fmadd_d (v2f64, v2f64, v2f64);
-
-v4f32 __builtin_msa_fmax_w (v4f32, v4f32);
-v2f64 __builtin_msa_fmax_d (v2f64, v2f64);
-
-v4f32 __builtin_msa_fmax_a_w (v4f32, v4f32);
-v2f64 __builtin_msa_fmax_a_d (v2f64, v2f64);
-
-v4f32 __builtin_msa_fmin_w (v4f32, v4f32);
-v2f64 __builtin_msa_fmin_d (v2f64, v2f64);
-
-v4f32 __builtin_msa_fmin_a_w (v4f32, v4f32);
-v2f64 __builtin_msa_fmin_a_d (v2f64, v2f64);
-
-v4f32 __builtin_msa_fmsub_w (v4f32, v4f32, v4f32);
-v2f64 __builtin_msa_fmsub_d (v2f64, v2f64, v2f64);
-
-v4f32 __builtin_msa_fmul_w (v4f32, v4f32);
-v2f64 __builtin_msa_fmul_d (v2f64, v2f64);
-
-v4f32 __builtin_msa_frint_w (v4f32);
-v2f64 __builtin_msa_frint_d (v2f64);
-
-v4f32 __builtin_msa_frcp_w (v4f32);
-v2f64 __builtin_msa_frcp_d (v2f64);
-
-v4f32 __builtin_msa_frsqrt_w (v4f32);
-v2f64 __builtin_msa_frsqrt_d (v2f64);
-
-v4i32 __builtin_msa_fsaf_w (v4f32, v4f32);
-v2i64 __builtin_msa_fsaf_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fseq_w (v4f32, v4f32);
-v2i64 __builtin_msa_fseq_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fsle_w (v4f32, v4f32);
-v2i64 __builtin_msa_fsle_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fslt_w (v4f32, v4f32);
-v2i64 __builtin_msa_fslt_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fsne_w (v4f32, v4f32);
-v2i64 __builtin_msa_fsne_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fsor_w (v4f32, v4f32);
-v2i64 __builtin_msa_fsor_d (v2f64, v2f64);
-
-v4f32 __builtin_msa_fsqrt_w (v4f32);
-v2f64 __builtin_msa_fsqrt_d (v2f64);
-
-v4f32 __builtin_msa_fsub_w (v4f32, v4f32);
-v2f64 __builtin_msa_fsub_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fsueq_w (v4f32, v4f32);
-v2i64 __builtin_msa_fsueq_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fsule_w (v4f32, v4f32);
-v2i64 __builtin_msa_fsule_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fsult_w (v4f32, v4f32);
-v2i64 __builtin_msa_fsult_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fsun_w (v4f32, v4f32);
-v2i64 __builtin_msa_fsun_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_fsune_w (v4f32, v4f32);
-v2i64 __builtin_msa_fsune_d (v2f64, v2f64);
-
-v4i32 __builtin_msa_ftint_s_w (v4f32);
-v2i64 __builtin_msa_ftint_s_d (v2f64);
-
-v4u32 __builtin_msa_ftint_u_w (v4f32);
-v2u64 __builtin_msa_ftint_u_d (v2f64);
-
-v8i16 __builtin_msa_ftq_h (v4f32, v4f32);
-v4i32 __builtin_msa_ftq_w (v2f64, v2f64);
-
-v4i32 __builtin_msa_ftrunc_s_w (v4f32);
-v2i64 __builtin_msa_ftrunc_s_d (v2f64);
-
-v4u32 __builtin_msa_ftrunc_u_w (v4f32);
-v2u64 __builtin_msa_ftrunc_u_d (v2f64);
-
-v8i16 __builtin_msa_hadd_s_h (v16i8, v16i8);
-v4i32 __builtin_msa_hadd_s_w (v8i16, v8i16);
-v2i64 __builtin_msa_hadd_s_d (v4i32, v4i32);
-
-v8u16 __builtin_msa_hadd_u_h (v16u8, v16u8);
-v4u32 __builtin_msa_hadd_u_w (v8u16, v8u16);
-v2u64 __builtin_msa_hadd_u_d (v4u32, v4u32);
-
-v8i16 __builtin_msa_hsub_s_h (v16i8, v16i8);
-v4i32 __builtin_msa_hsub_s_w (v8i16, v8i16);
-v2i64 __builtin_msa_hsub_s_d (v4i32, v4i32);
-
-v8i16 __builtin_msa_hsub_u_h (v16u8, v16u8);
-v4i32 __builtin_msa_hsub_u_w (v8u16, v8u16);
-v2i64 __builtin_msa_hsub_u_d (v4u32, v4u32);
-
-v16i8 __builtin_msa_ilvev_b (v16i8, v16i8);
-v8i16 __builtin_msa_ilvev_h (v8i16, v8i16);
-v4i32 __builtin_msa_ilvev_w (v4i32, v4i32);
-v2i64 __builtin_msa_ilvev_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_ilvl_b (v16i8, v16i8);
-v8i16 __builtin_msa_ilvl_h (v8i16, v8i16);
-v4i32 __builtin_msa_ilvl_w (v4i32, v4i32);
-v2i64 __builtin_msa_ilvl_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_ilvod_b (v16i8, v16i8);
-v8i16 __builtin_msa_ilvod_h (v8i16, v8i16);
-v4i32 __builtin_msa_ilvod_w (v4i32, v4i32);
-v2i64 __builtin_msa_ilvod_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_ilvr_b (v16i8, v16i8);
-v8i16 __builtin_msa_ilvr_h (v8i16, v8i16);
-v4i32 __builtin_msa_ilvr_w (v4i32, v4i32);
-v2i64 __builtin_msa_ilvr_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_insert_b (v16i8, imm0_15, i32);
-v8i16 __builtin_msa_insert_h (v8i16, imm0_7, i32);
-v4i32 __builtin_msa_insert_w (v4i32, imm0_3, i32);
-v2i64 __builtin_msa_insert_d (v2i64, imm0_1, i64);
-
-v16i8 __builtin_msa_insve_b (v16i8, imm0_15, v16i8);
-v8i16 __builtin_msa_insve_h (v8i16, imm0_7, v8i16);
-v4i32 __builtin_msa_insve_w (v4i32, imm0_3, v4i32);
-v2i64 __builtin_msa_insve_d (v2i64, imm0_1, v2i64);
-
-v16i8 __builtin_msa_ld_b (void *, imm_n512_511);
-v8i16 __builtin_msa_ld_h (void *, imm_n1024_1022);
-v4i32 __builtin_msa_ld_w (void *, imm_n2048_2044);
-v2i64 __builtin_msa_ld_d (void *, imm_n4096_4088);
-
-v16i8 __builtin_msa_ldi_b (imm_n512_511);
-v8i16 __builtin_msa_ldi_h (imm_n512_511);
-v4i32 __builtin_msa_ldi_w (imm_n512_511);
-v2i64 __builtin_msa_ldi_d (imm_n512_511);
-
-v8i16 __builtin_msa_madd_q_h (v8i16, v8i16, v8i16);
-v4i32 __builtin_msa_madd_q_w (v4i32, v4i32, v4i32);
-
-v8i16 __builtin_msa_maddr_q_h (v8i16, v8i16, v8i16);
-v4i32 __builtin_msa_maddr_q_w (v4i32, v4i32, v4i32);
-
-v16i8 __builtin_msa_maddv_b (v16i8, v16i8, v16i8);
-v8i16 __builtin_msa_maddv_h (v8i16, v8i16, v8i16);
-v4i32 __builtin_msa_maddv_w (v4i32, v4i32, v4i32);
-v2i64 __builtin_msa_maddv_d (v2i64, v2i64, v2i64);
-
-v16i8 __builtin_msa_max_a_b (v16i8, v16i8);
-v8i16 __builtin_msa_max_a_h (v8i16, v8i16);
-v4i32 __builtin_msa_max_a_w (v4i32, v4i32);
-v2i64 __builtin_msa_max_a_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_max_s_b (v16i8, v16i8);
-v8i16 __builtin_msa_max_s_h (v8i16, v8i16);
-v4i32 __builtin_msa_max_s_w (v4i32, v4i32);
-v2i64 __builtin_msa_max_s_d (v2i64, v2i64);
-
-v16u8 __builtin_msa_max_u_b (v16u8, v16u8);
-v8u16 __builtin_msa_max_u_h (v8u16, v8u16);
-v4u32 __builtin_msa_max_u_w (v4u32, v4u32);
-v2u64 __builtin_msa_max_u_d (v2u64, v2u64);
-
-v16i8 __builtin_msa_maxi_s_b (v16i8, imm_n16_15);
-v8i16 __builtin_msa_maxi_s_h (v8i16, imm_n16_15);
-v4i32 __builtin_msa_maxi_s_w (v4i32, imm_n16_15);
-v2i64 __builtin_msa_maxi_s_d (v2i64, imm_n16_15);
-
-v16u8 __builtin_msa_maxi_u_b (v16u8, imm0_31);
-v8u16 __builtin_msa_maxi_u_h (v8u16, imm0_31);
-v4u32 __builtin_msa_maxi_u_w (v4u32, imm0_31);
-v2u64 __builtin_msa_maxi_u_d (v2u64, imm0_31);
-
-v16i8 __builtin_msa_min_a_b (v16i8, v16i8);
-v8i16 __builtin_msa_min_a_h (v8i16, v8i16);
-v4i32 __builtin_msa_min_a_w (v4i32, v4i32);
-v2i64 __builtin_msa_min_a_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_min_s_b (v16i8, v16i8);
-v8i16 __builtin_msa_min_s_h (v8i16, v8i16);
-v4i32 __builtin_msa_min_s_w (v4i32, v4i32);
-v2i64 __builtin_msa_min_s_d (v2i64, v2i64);
-
-v16u8 __builtin_msa_min_u_b (v16u8, v16u8);
-v8u16 __builtin_msa_min_u_h (v8u16, v8u16);
-v4u32 __builtin_msa_min_u_w (v4u32, v4u32);
-v2u64 __builtin_msa_min_u_d (v2u64, v2u64);
-
-v16i8 __builtin_msa_mini_s_b (v16i8, imm_n16_15);
-v8i16 __builtin_msa_mini_s_h (v8i16, imm_n16_15);
-v4i32 __builtin_msa_mini_s_w (v4i32, imm_n16_15);
-v2i64 __builtin_msa_mini_s_d (v2i64, imm_n16_15);
-
-v16u8 __builtin_msa_mini_u_b (v16u8, imm0_31);
-v8u16 __builtin_msa_mini_u_h (v8u16, imm0_31);
-v4u32 __builtin_msa_mini_u_w (v4u32, imm0_31);
-v2u64 __builtin_msa_mini_u_d (v2u64, imm0_31);
-
-v16i8 __builtin_msa_mod_s_b (v16i8, v16i8);
-v8i16 __builtin_msa_mod_s_h (v8i16, v8i16);
-v4i32 __builtin_msa_mod_s_w (v4i32, v4i32);
-v2i64 __builtin_msa_mod_s_d (v2i64, v2i64);
-
-v16u8 __builtin_msa_mod_u_b (v16u8, v16u8);
-v8u16 __builtin_msa_mod_u_h (v8u16, v8u16);
-v4u32 __builtin_msa_mod_u_w (v4u32, v4u32);
-v2u64 __builtin_msa_mod_u_d (v2u64, v2u64);
-
-v16i8 __builtin_msa_move_v (v16i8);
-
-v8i16 __builtin_msa_msub_q_h (v8i16, v8i16, v8i16);
-v4i32 __builtin_msa_msub_q_w (v4i32, v4i32, v4i32);
-
-v8i16 __builtin_msa_msubr_q_h (v8i16, v8i16, v8i16);
-v4i32 __builtin_msa_msubr_q_w (v4i32, v4i32, v4i32);
-
-v16i8 __builtin_msa_msubv_b (v16i8, v16i8, v16i8);
-v8i16 __builtin_msa_msubv_h (v8i16, v8i16, v8i16);
-v4i32 __builtin_msa_msubv_w (v4i32, v4i32, v4i32);
-v2i64 __builtin_msa_msubv_d (v2i64, v2i64, v2i64);
-
-v8i16 __builtin_msa_mul_q_h (v8i16, v8i16);
-v4i32 __builtin_msa_mul_q_w (v4i32, v4i32);
-
-v8i16 __builtin_msa_mulr_q_h (v8i16, v8i16);
-v4i32 __builtin_msa_mulr_q_w (v4i32, v4i32);
-
-v16i8 __builtin_msa_mulv_b (v16i8, v16i8);
-v8i16 __builtin_msa_mulv_h (v8i16, v8i16);
-v4i32 __builtin_msa_mulv_w (v4i32, v4i32);
-v2i64 __builtin_msa_mulv_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_nloc_b (v16i8);
-v8i16 __builtin_msa_nloc_h (v8i16);
-v4i32 __builtin_msa_nloc_w (v4i32);
-v2i64 __builtin_msa_nloc_d (v2i64);
-
-v16i8 __builtin_msa_nlzc_b (v16i8);
-v8i16 __builtin_msa_nlzc_h (v8i16);
-v4i32 __builtin_msa_nlzc_w (v4i32);
-v2i64 __builtin_msa_nlzc_d (v2i64);
-
-v16u8 __builtin_msa_nor_v (v16u8, v16u8);
-
-v16u8 __builtin_msa_nori_b (v16u8, imm0_255);
-
-v16u8 __builtin_msa_or_v (v16u8, v16u8);
-
-v16u8 __builtin_msa_ori_b (v16u8, imm0_255);
-
-v16i8 __builtin_msa_pckev_b (v16i8, v16i8);
-v8i16 __builtin_msa_pckev_h (v8i16, v8i16);
-v4i32 __builtin_msa_pckev_w (v4i32, v4i32);
-v2i64 __builtin_msa_pckev_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_pckod_b (v16i8, v16i8);
-v8i16 __builtin_msa_pckod_h (v8i16, v8i16);
-v4i32 __builtin_msa_pckod_w (v4i32, v4i32);
-v2i64 __builtin_msa_pckod_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_pcnt_b (v16i8);
-v8i16 __builtin_msa_pcnt_h (v8i16);
-v4i32 __builtin_msa_pcnt_w (v4i32);
-v2i64 __builtin_msa_pcnt_d (v2i64);
-
-v16i8 __builtin_msa_sat_s_b (v16i8, imm0_7);
-v8i16 __builtin_msa_sat_s_h (v8i16, imm0_15);
-v4i32 __builtin_msa_sat_s_w (v4i32, imm0_31);
-v2i64 __builtin_msa_sat_s_d (v2i64, imm0_63);
-
-v16u8 __builtin_msa_sat_u_b (v16u8, imm0_7);
-v8u16 __builtin_msa_sat_u_h (v8u16, imm0_15);
-v4u32 __builtin_msa_sat_u_w (v4u32, imm0_31);
-v2u64 __builtin_msa_sat_u_d (v2u64, imm0_63);
-
-v16i8 __builtin_msa_shf_b (v16i8, imm0_255);
-v8i16 __builtin_msa_shf_h (v8i16, imm0_255);
-v4i32 __builtin_msa_shf_w (v4i32, imm0_255);
-
-v16i8 __builtin_msa_sld_b (v16i8, v16i8, i32);
-v8i16 __builtin_msa_sld_h (v8i16, v8i16, i32);
-v4i32 __builtin_msa_sld_w (v4i32, v4i32, i32);
-v2i64 __builtin_msa_sld_d (v2i64, v2i64, i32);
-
-v16i8 __builtin_msa_sldi_b (v16i8, v16i8, imm0_15);
-v8i16 __builtin_msa_sldi_h (v8i16, v8i16, imm0_7);
-v4i32 __builtin_msa_sldi_w (v4i32, v4i32, imm0_3);
-v2i64 __builtin_msa_sldi_d (v2i64, v2i64, imm0_1);
-
-v16i8 __builtin_msa_sll_b (v16i8, v16i8);
-v8i16 __builtin_msa_sll_h (v8i16, v8i16);
-v4i32 __builtin_msa_sll_w (v4i32, v4i32);
-v2i64 __builtin_msa_sll_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_slli_b (v16i8, imm0_7);
-v8i16 __builtin_msa_slli_h (v8i16, imm0_15);
-v4i32 __builtin_msa_slli_w (v4i32, imm0_31);
-v2i64 __builtin_msa_slli_d (v2i64, imm0_63);
-
-v16i8 __builtin_msa_splat_b (v16i8, i32);
-v8i16 __builtin_msa_splat_h (v8i16, i32);
-v4i32 __builtin_msa_splat_w (v4i32, i32);
-v2i64 __builtin_msa_splat_d (v2i64, i32);
-
-v16i8 __builtin_msa_splati_b (v16i8, imm0_15);
-v8i16 __builtin_msa_splati_h (v8i16, imm0_7);
-v4i32 __builtin_msa_splati_w (v4i32, imm0_3);
-v2i64 __builtin_msa_splati_d (v2i64, imm0_1);
-
-v16i8 __builtin_msa_sra_b (v16i8, v16i8);
-v8i16 __builtin_msa_sra_h (v8i16, v8i16);
-v4i32 __builtin_msa_sra_w (v4i32, v4i32);
-v2i64 __builtin_msa_sra_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_srai_b (v16i8, imm0_7);
-v8i16 __builtin_msa_srai_h (v8i16, imm0_15);
-v4i32 __builtin_msa_srai_w (v4i32, imm0_31);
-v2i64 __builtin_msa_srai_d (v2i64, imm0_63);
-
-v16i8 __builtin_msa_srar_b (v16i8, v16i8);
-v8i16 __builtin_msa_srar_h (v8i16, v8i16);
-v4i32 __builtin_msa_srar_w (v4i32, v4i32);
-v2i64 __builtin_msa_srar_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_srari_b (v16i8, imm0_7);
-v8i16 __builtin_msa_srari_h (v8i16, imm0_15);
-v4i32 __builtin_msa_srari_w (v4i32, imm0_31);
-v2i64 __builtin_msa_srari_d (v2i64, imm0_63);
-
-v16i8 __builtin_msa_srl_b (v16i8, v16i8);
-v8i16 __builtin_msa_srl_h (v8i16, v8i16);
-v4i32 __builtin_msa_srl_w (v4i32, v4i32);
-v2i64 __builtin_msa_srl_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_srli_b (v16i8, imm0_7);
-v8i16 __builtin_msa_srli_h (v8i16, imm0_15);
-v4i32 __builtin_msa_srli_w (v4i32, imm0_31);
-v2i64 __builtin_msa_srli_d (v2i64, imm0_63);
-
-v16i8 __builtin_msa_srlr_b (v16i8, v16i8);
-v8i16 __builtin_msa_srlr_h (v8i16, v8i16);
-v4i32 __builtin_msa_srlr_w (v4i32, v4i32);
-v2i64 __builtin_msa_srlr_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_srlri_b (v16i8, imm0_7);
-v8i16 __builtin_msa_srlri_h (v8i16, imm0_15);
-v4i32 __builtin_msa_srlri_w (v4i32, imm0_31);
-v2i64 __builtin_msa_srlri_d (v2i64, imm0_63);
-
-void __builtin_msa_st_b (v16i8, void *, imm_n512_511);
-void __builtin_msa_st_h (v8i16, void *, imm_n1024_1022);
-void __builtin_msa_st_w (v4i32, void *, imm_n2048_2044);
-void __builtin_msa_st_d (v2i64, void *, imm_n4096_4088);
-
-v16i8 __builtin_msa_subs_s_b (v16i8, v16i8);
-v8i16 __builtin_msa_subs_s_h (v8i16, v8i16);
-v4i32 __builtin_msa_subs_s_w (v4i32, v4i32);
-v2i64 __builtin_msa_subs_s_d (v2i64, v2i64);
-
-v16u8 __builtin_msa_subs_u_b (v16u8, v16u8);
-v8u16 __builtin_msa_subs_u_h (v8u16, v8u16);
-v4u32 __builtin_msa_subs_u_w (v4u32, v4u32);
-v2u64 __builtin_msa_subs_u_d (v2u64, v2u64);
-
-v16u8 __builtin_msa_subsus_u_b (v16u8, v16i8);
-v8u16 __builtin_msa_subsus_u_h (v8u16, v8i16);
-v4u32 __builtin_msa_subsus_u_w (v4u32, v4i32);
-v2u64 __builtin_msa_subsus_u_d (v2u64, v2i64);
-
-v16i8 __builtin_msa_subsuu_s_b (v16u8, v16u8);
-v8i16 __builtin_msa_subsuu_s_h (v8u16, v8u16);
-v4i32 __builtin_msa_subsuu_s_w (v4u32, v4u32);
-v2i64 __builtin_msa_subsuu_s_d (v2u64, v2u64);
-
-v16i8 __builtin_msa_subv_b (v16i8, v16i8);
-v8i16 __builtin_msa_subv_h (v8i16, v8i16);
-v4i32 __builtin_msa_subv_w (v4i32, v4i32);
-v2i64 __builtin_msa_subv_d (v2i64, v2i64);
-
-v16i8 __builtin_msa_subvi_b (v16i8, imm0_31);
-v8i16 __builtin_msa_subvi_h (v8i16, imm0_31);
-v4i32 __builtin_msa_subvi_w (v4i32, imm0_31);
-v2i64 __builtin_msa_subvi_d (v2i64, imm0_31);
-
-v16i8 __builtin_msa_vshf_b (v16i8, v16i8, v16i8);
-v8i16 __builtin_msa_vshf_h (v8i16, v8i16, v8i16);
-v4i32 __builtin_msa_vshf_w (v4i32, v4i32, v4i32);
-v2i64 __builtin_msa_vshf_d (v2i64, v2i64, v2i64);
-
-v16u8 __builtin_msa_xor_v (v16u8, v16u8);
-
-v16u8 __builtin_msa_xori_b (v16u8, imm0_255);
diff --git a/library/stdarch/crates/stdsimd-verify/src/lib.rs b/library/stdarch/crates/stdsimd-verify/src/lib.rs
deleted file mode 100644
index cbe0530970b..00000000000
--- a/library/stdarch/crates/stdsimd-verify/src/lib.rs
+++ /dev/null
@@ -1,387 +0,0 @@
-extern crate proc_macro;
-extern crate proc_macro2;
-#[macro_use]
-extern crate quote;
-#[macro_use]
-extern crate syn;
-
-use proc_macro::TokenStream;
-use proc_macro2::Span;
-use std::{fs::File, io::Read, path::Path};
-use syn::ext::IdentExt;
-
-#[proc_macro]
-pub fn x86_functions(input: TokenStream) -> TokenStream {
-    functions(input, &["core_arch/src/x86", "core_arch/src/x86_64"])
-}
-
-#[proc_macro]
-pub fn arm_functions(input: TokenStream) -> TokenStream {
-    functions(input, &["core_arch/src/arm", "core_arch/src/aarch64"])
-}
-
-#[proc_macro]
-pub fn mips_functions(input: TokenStream) -> TokenStream {
-    functions(input, &["core_arch/src/mips"])
-}
-
-fn functions(input: TokenStream, dirs: &[&str]) -> TokenStream {
-    let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
-    let root = dir.parent().expect("root-dir not found");
-
-    let mut files = Vec::new();
-    for dir in dirs {
-        walk(&root.join(dir), &mut files);
-    }
-    assert!(!files.is_empty());
-
-    let mut functions = Vec::new();
-    for &mut (ref mut file, ref path) in &mut files {
-        for item in file.items.drain(..) {
-            if let syn::Item::Fn(f) = item {
-                functions.push((f, path))
-            }
-        }
-    }
-    assert!(!functions.is_empty());
-
-    functions.retain(|&(ref f, _)| {
-        if let syn::Visibility::Public(_) = f.vis {
-            if f.unsafety.is_some() {
-                return true;
-            }
-        }
-        false
-    });
-    assert!(!functions.is_empty());
-
-    let input = proc_macro2::TokenStream::from(input);
-
-    let functions = functions
-        .iter()
-        .map(|&(ref f, path)| {
-            let name = &f.ident;
-            // println!("{}", name);
-            let mut arguments = Vec::new();
-            for input in f.decl.inputs.iter() {
-                let ty = match *input {
-                    syn::FnArg::Captured(ref c) => &c.ty,
-                    _ => panic!("invalid argument on {}", name),
-                };
-                arguments.push(to_type(ty));
-            }
-            let ret = match f.decl.output {
-                syn::ReturnType::Default => quote! { None },
-                syn::ReturnType::Type(_, ref t) => {
-                    let ty = to_type(t);
-                    quote! { Some(#ty) }
-                }
-            };
-            let instrs = find_instrs(&f.attrs);
-            let target_feature = if let Some(i) = find_target_feature(&f.attrs) {
-                quote! { Some(#i) }
-            } else {
-                quote! { None }
-            };
-            let required_const = find_required_const(&f.attrs);
-            quote! {
-                Function {
-                    name: stringify!(#name),
-                    arguments: &[#(#arguments),*],
-                    ret: #ret,
-                    target_feature: #target_feature,
-                    instrs: &[#(#instrs),*],
-                    file: stringify!(#path),
-                    required_const: &[#(#required_const),*],
-                }
-            }
-        })
-        .collect::<Vec<_>>();
-
-    let ret = quote! { #input: &[Function] = &[#(#functions),*]; };
-    // println!("{}", ret);
-    ret.into()
-}
-
-fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
-    match *t {
-        syn::Type::Path(ref p) => match extract_path_ident(&p.path).to_string().as_ref() {
-            // x86 ...
-            "__m128" => quote! { &M128 },
-            "__m128d" => quote! { &M128D },
-            "__m128i" => quote! { &M128I },
-            "__m256" => quote! { &M256 },
-            "__m256d" => quote! { &M256D },
-            "__m256i" => quote! { &M256I },
-            "__m512" => quote! { &M512 },
-            "__m512d" => quote! { &M512D },
-            "__m512i" => quote! { &M512I },
-            "__mmask16" => quote! { &MMASK16 },
-            "__m64" => quote! { &M64 },
-            "bool" => quote! { &BOOL },
-            "f32" => quote! { &F32 },
-            "f64" => quote! { &F64 },
-            "i16" => quote! { &I16 },
-            "i32" => quote! { &I32 },
-            "i64" => quote! { &I64 },
-            "i8" => quote! { &I8 },
-            "u16" => quote! { &U16 },
-            "u32" => quote! { &U32 },
-            "u64" => quote! { &U64 },
-            "u128" => quote! { &U128 },
-            "u8" => quote! { &U8 },
-            "Ordering" => quote! { &ORDERING },
-            "CpuidResult" => quote! { &CPUID },
-
-            // arm ...
-            "int8x4_t" => quote! { &I8X4 },
-            "int8x8_t" => quote! { &I8X8 },
-            "int8x8x2_t" => quote! { &I8X8X2 },
-            "int8x8x3_t" => quote! { &I8X8X3 },
-            "int8x8x4_t" => quote! { &I8X8X4 },
-            "int8x16x2_t" => quote! { &I8X16X2 },
-            "int8x16x3_t" => quote! { &I8X16X3 },
-            "int8x16x4_t" => quote! { &I8X16X4 },
-            "int8x16_t" => quote! { &I8X16 },
-            "int16x2_t" => quote! { &I16X2 },
-            "int16x4_t" => quote! { &I16X4 },
-            "int16x8_t" => quote! { &I16X8 },
-            "int32x2_t" => quote! { &I32X2 },
-            "int32x4_t" => quote! { &I32X4 },
-            "int64x1_t" => quote! { &I64X1 },
-            "int64x2_t" => quote! { &I64X2 },
-            "uint8x8_t" => quote! { &U8X8 },
-            "uint8x8x2_t" => quote! { &U8X8X2 },
-            "uint8x16x2_t" => quote! { &U8X16X2 },
-            "uint8x16x3_t" => quote! { &U8X16X3 },
-            "uint8x16x4_t" => quote! { &U8X16X4 },
-            "uint8x8x3_t" => quote! { &U8X8X3 },
-            "uint8x8x4_t" => quote! { &U8X8X4 },
-            "uint8x16_t" => quote! { &U8X16 },
-            "uint16x4_t" => quote! { &U16X4 },
-            "uint16x8_t" => quote! { &U16X8 },
-            "uint32x2_t" => quote! { &U32X2 },
-            "uint32x4_t" => quote! { &U32X4 },
-            "uint64x1_t" => quote! { &U64X1 },
-            "uint64x2_t" => quote! { &U64X2 },
-            "float32x2_t" => quote! { &F32X2 },
-            "float32x4_t" => quote! { &F32X4 },
-            "float64x1_t" => quote! { &F64X1 },
-            "float64x2_t" => quote! { &F64X2 },
-            "poly8x8_t" => quote! { &POLY8X8 },
-            "poly8x8x2_t" => quote! { &POLY8X8X2 },
-            "poly8x8x3_t" => quote! { &POLY8X8X3 },
-            "poly8x8x4_t" => quote! { &POLY8X8X4 },
-            "poly8x16x2_t" => quote! { &POLY8X16X2 },
-            "poly8x16x3_t" => quote! { &POLY8X16X3 },
-            "poly8x16x4_t" => quote! { &POLY8X16X4 },
-            "poly64x1_t" => quote! { &POLY64X1 },
-            "poly64x2_t" => quote! { &POLY64X2 },
-            "poly8x16_t" => quote! { &POLY8X16 },
-            "poly16x4_t" => quote! { &POLY16X4 },
-            "poly16x8_t" => quote! { &POLY16X8 },
-
-            "v16i8" => quote! { &v16i8 },
-            "v8i16" => quote! { &v8i16 },
-            "v4i32" => quote! { &v4i32 },
-            "v2i64" => quote! { &v2i64 },
-            "v16u8" => quote! { &v16u8 },
-            "v8u16" => quote! { &v8u16 },
-            "v4u32" => quote! { &v4u32 },
-            "v2u64" => quote! { &v2u64 },
-            "v8f16" => quote! { &v8f16 },
-            "v4f32" => quote! { &v4f32 },
-            "v2f64" => quote! { &v2f64 },
-
-            s => panic!("unspported type: \"{}\"", s),
-        },
-        syn::Type::Ptr(syn::TypePtr {
-            ref elem,
-            ref mutability,
-            ..
-        })
-        | syn::Type::Reference(syn::TypeReference {
-            ref elem,
-            ref mutability,
-            ..
-        }) => {
-            // Both pointers and references can have a mut token (*mut and &mut)
-            if mutability.is_some() {
-                let tokens = to_type(&elem);
-                quote! { &Type::MutPtr(#tokens) }
-            } else {
-                // If they don't (*const or &) then they are "const"
-                let tokens = to_type(&elem);
-                quote! { &Type::ConstPtr(#tokens) }
-            }
-        }
-
-        syn::Type::Slice(_) => panic!("unsupported slice"),
-        syn::Type::Array(_) => panic!("unsupported array"),
-        syn::Type::Tuple(_) => quote! { &TUPLE },
-        syn::Type::Never(_) => quote! { &NEVER },
-        _ => panic!("unsupported type"),
-    }
-}
-
-fn extract_path_ident(path: &syn::Path) -> syn::Ident {
-    if path.leading_colon.is_some() {
-        panic!("unsupported leading colon in path")
-    }
-    if path.segments.len() != 1 {
-        panic!("unsupported path that needs name resolution")
-    }
-    match path
-        .segments
-        .first()
-        .expect("segment not found")
-        .value()
-        .arguments
-    {
-        syn::PathArguments::None => {}
-        _ => panic!("unsupported path that has path arguments"),
-    }
-    path.segments
-        .first()
-        .expect("segment not found")
-        .value()
-        .ident
-        .clone()
-}
-
-fn walk(root: &Path, files: &mut Vec<(syn::File, String)>) {
-    for file in root.read_dir().unwrap() {
-        let file = file.unwrap();
-        if file.file_type().unwrap().is_dir() {
-            walk(&file.path(), files);
-            continue;
-        }
-        let path = file.path();
-        if path.extension().and_then(std::ffi::OsStr::to_str) != Some("rs") {
-            continue;
-        }
-
-        if path.file_name().and_then(std::ffi::OsStr::to_str) == Some("test.rs") {
-            continue;
-        }
-
-        let mut contents = String::new();
-        File::open(&path)
-            .unwrap_or_else(|_| panic!("can't open file at path: {}", path.display()))
-            .read_to_string(&mut contents)
-            .expect("failed to read file to string");
-
-        files.push((
-            syn::parse_str::<syn::File>(&contents).expect("failed to parse"),
-            path.display().to_string(),
-        ));
-    }
-}
-
-fn find_instrs(attrs: &[syn::Attribute]) -> Vec<String> {
-    struct AssertInstr {
-        instr: String,
-    }
-
-    // A small custom parser to parse out the instruction in `assert_instr`.
-    //
-    // TODO: should probably just reuse `Invoc` from the `assert-instr-macro`
-    // crate.
-    impl syn::parse::Parse for AssertInstr {
-        fn parse(content: syn::parse::ParseStream) -> syn::parse::Result<Self> {
-            let input;
-            parenthesized!(input in content);
-            let _ = input.parse::<syn::Meta>()?;
-            let _ = input.parse::<Token![,]>()?;
-            let ident = input.parse::<syn::Ident>()?;
-            if ident != "assert_instr" {
-                return Err(input.error("expected `assert_instr`"));
-            }
-            let instrs;
-            parenthesized!(instrs in input);
-
-            let mut instr = String::new();
-            while !instrs.is_empty() {
-                if let Ok(lit) = instrs.parse::<syn::LitStr>() {
-                    instr.push_str(&lit.value());
-                } else if let Ok(ident) = instrs.call(syn::Ident::parse_any) {
-                    instr.push_str(&ident.to_string());
-                } else if instrs.parse::<Token![.]>().is_ok() {
-                    instr.push_str(".");
-                } else if instrs.parse::<Token![,]>().is_ok() {
-                    // consume everything remaining
-                    drop(instrs.parse::<proc_macro2::TokenStream>());
-                    break;
-                } else {
-                    return Err(input.error("failed to parse instruction"));
-                }
-            }
-            Ok(Self { instr })
-        }
-    }
-
-    attrs
-        .iter()
-        .filter(|a| a.path == syn::Ident::new("cfg_attr", Span::call_site()).into())
-        .filter_map(|a| {
-            syn::parse2::<AssertInstr>(a.tts.clone())
-                .ok()
-                .map(|a| a.instr)
-        })
-        .collect()
-}
-
-fn find_target_feature(attrs: &[syn::Attribute]) -> Option<syn::Lit> {
-    attrs
-        .iter()
-        .flat_map(|a| {
-            if let Some(a) = a.interpret_meta() {
-                if let syn::Meta::List(i) = a {
-                    if i.ident == "target_feature" {
-                        return i.nested;
-                    }
-                }
-            }
-            syn::punctuated::Punctuated::new()
-        })
-        .filter_map(|nested| match nested {
-            syn::NestedMeta::Meta(m) => Some(m),
-            syn::NestedMeta::Literal(_) => None,
-        })
-        .find_map(|m| match m {
-            syn::Meta::NameValue(ref i) if i.ident == "enable" => Some(i.clone().lit),
-            _ => None,
-        })
-}
-
-fn find_required_const(attrs: &[syn::Attribute]) -> Vec<usize> {
-    attrs
-        .iter()
-        .flat_map(|a| {
-            if a.path.segments[0].ident == "rustc_args_required_const" {
-                syn::parse::<RustcArgsRequiredConst>(a.tts.clone().into())
-                    .unwrap()
-                    .args
-            } else {
-                Vec::new()
-            }
-        })
-        .collect()
-}
-
-struct RustcArgsRequiredConst {
-    args: Vec<usize>,
-}
-
-impl syn::parse::Parse for RustcArgsRequiredConst {
-    #[allow(clippy::cast_possible_truncation)]
-    fn parse(input: syn::parse::ParseStream) -> syn::parse::Result<Self> {
-        let content;
-        parenthesized!(content in input);
-        let list =
-            syn::punctuated::Punctuated::<syn::LitInt, Token![,]>::parse_terminated(&content)?;
-        Ok(Self {
-            args: list.into_iter().map(|a| a.value() as usize).collect(),
-        })
-    }
-}
diff --git a/library/stdarch/crates/stdsimd-verify/tests/arm.rs b/library/stdarch/crates/stdsimd-verify/tests/arm.rs
deleted file mode 100644
index 05acd241b51..00000000000
--- a/library/stdarch/crates/stdsimd-verify/tests/arm.rs
+++ /dev/null
@@ -1,592 +0,0 @@
-#![allow(bad_style)]
-
-use std::{collections::HashMap, rc::Rc};
-
-use html5ever::{
-    driver::ParseOpts,
-    parse_document,
-    rcdom::{Node, NodeData, RcDom},
-    tendril::TendrilSink,
-    tree_builder::TreeBuilderOpts,
-};
-
-struct Function {
-    name: &'static str,
-    arguments: &'static [&'static Type],
-    ret: Option<&'static Type>,
-    target_feature: Option<&'static str>,
-    instrs: &'static [&'static str],
-    file: &'static str,
-    required_const: &'static [usize],
-}
-
-static F16: Type = Type::PrimFloat(16);
-static F32: Type = Type::PrimFloat(32);
-static F64: Type = Type::PrimFloat(64);
-static I16: Type = Type::PrimSigned(16);
-static I32: Type = Type::PrimSigned(32);
-static I64: Type = Type::PrimSigned(64);
-static I8: Type = Type::PrimSigned(8);
-static U16: Type = Type::PrimUnsigned(16);
-static U32: Type = Type::PrimUnsigned(32);
-static U64: Type = Type::PrimUnsigned(64);
-static U8: Type = Type::PrimUnsigned(8);
-static NEVER: Type = Type::Never;
-
-static F16X4: Type = Type::F(16, 4, 1);
-static F16X4X2: Type = Type::F(16, 4, 2);
-static F16X4X3: Type = Type::F(16, 4, 3);
-static F16X4X4: Type = Type::F(16, 4, 4);
-static F16X8: Type = Type::F(16, 8, 1);
-static F16X8X2: Type = Type::F(16, 8, 2);
-static F16X8X3: Type = Type::F(16, 8, 3);
-static F16X8X4: Type = Type::F(16, 8, 4);
-static F32X2: Type = Type::F(32, 2, 1);
-static F32X2X2: Type = Type::F(32, 2, 2);
-static F32X2X3: Type = Type::F(32, 2, 3);
-static F32X2X4: Type = Type::F(32, 2, 4);
-static F32X4: Type = Type::F(32, 4, 1);
-static F32X4X2: Type = Type::F(32, 4, 2);
-static F32X4X3: Type = Type::F(32, 4, 3);
-static F32X4X4: Type = Type::F(32, 4, 4);
-static F64X1: Type = Type::F(64, 1, 1);
-static F64X1X2: Type = Type::F(64, 1, 2);
-static F64X1X3: Type = Type::F(64, 1, 3);
-static F64X1X4: Type = Type::F(64, 1, 4);
-static F64X2: Type = Type::F(64, 2, 1);
-static F64X2X2: Type = Type::F(64, 2, 2);
-static F64X2X3: Type = Type::F(64, 2, 3);
-static F64X2X4: Type = Type::F(64, 2, 4);
-static I16X2: Type = Type::I(16, 2, 1);
-static I16X4: Type = Type::I(16, 4, 1);
-static I16X4X2: Type = Type::I(16, 4, 2);
-static I16X4X3: Type = Type::I(16, 4, 3);
-static I16X4X4: Type = Type::I(16, 4, 4);
-static I16X8: Type = Type::I(16, 8, 1);
-static I16X8X2: Type = Type::I(16, 8, 2);
-static I16X8X3: Type = Type::I(16, 8, 3);
-static I16X8X4: Type = Type::I(16, 8, 4);
-static I32X2: Type = Type::I(32, 2, 1);
-static I32X2X2: Type = Type::I(32, 2, 2);
-static I32X2X3: Type = Type::I(32, 2, 3);
-static I32X2X4: Type = Type::I(32, 2, 4);
-static I32X4: Type = Type::I(32, 4, 1);
-static I32X4X2: Type = Type::I(32, 4, 2);
-static I32X4X3: Type = Type::I(32, 4, 3);
-static I32X4X4: Type = Type::I(32, 4, 4);
-static I64X1: Type = Type::I(64, 1, 1);
-static I64X1X2: Type = Type::I(64, 1, 2);
-static I64X1X3: Type = Type::I(64, 1, 3);
-static I64X1X4: Type = Type::I(64, 1, 4);
-static I64X2: Type = Type::I(64, 2, 1);
-static I64X2X2: Type = Type::I(64, 2, 2);
-static I64X2X3: Type = Type::I(64, 2, 3);
-static I64X2X4: Type = Type::I(64, 2, 4);
-static I8X16: Type = Type::I(8, 16, 1);
-static I8X16X2: Type = Type::I(8, 16, 2);
-static I8X16X3: Type = Type::I(8, 16, 3);
-static I8X16X4: Type = Type::I(8, 16, 4);
-static I8X4: Type = Type::I(8, 4, 1);
-static I8X8: Type = Type::I(8, 8, 1);
-static I8X8X2: Type = Type::I(8, 8, 2);
-static I8X8X3: Type = Type::I(8, 8, 3);
-static I8X8X4: Type = Type::I(8, 8, 4);
-static P128: Type = Type::PrimPoly(128);
-static P16: Type = Type::PrimPoly(16);
-static P16X4X2: Type = Type::P(16, 4, 2);
-static P16X4X3: Type = Type::P(16, 4, 3);
-static P16X4X4: Type = Type::P(16, 4, 4);
-static P16X8X2: Type = Type::P(16, 8, 2);
-static P16X8X3: Type = Type::P(16, 8, 3);
-static P16X8X4: Type = Type::P(16, 8, 4);
-static P64: Type = Type::PrimPoly(64);
-static P64X1X2: Type = Type::P(64, 1, 2);
-static P64X1X3: Type = Type::P(64, 1, 3);
-static P64X1X4: Type = Type::P(64, 1, 4);
-static P64X2X2: Type = Type::P(64, 2, 2);
-static P64X2X3: Type = Type::P(64, 2, 3);
-static P64X2X4: Type = Type::P(64, 2, 4);
-static P8: Type = Type::PrimPoly(8);
-static POLY16X4: Type = Type::P(16, 4, 1);
-static POLY16X8: Type = Type::P(16, 8, 1);
-static POLY64X1: Type = Type::P(64, 1, 1);
-static POLY64X2: Type = Type::P(64, 2, 1);
-static POLY8X16: Type = Type::P(8, 16, 1);
-static POLY8X16X2: Type = Type::P(8, 16, 2);
-static POLY8X16X3: Type = Type::P(8, 16, 3);
-static POLY8X16X4: Type = Type::P(8, 16, 4);
-static POLY8X8: Type = Type::P(8, 8, 1);
-static POLY8X8X2: Type = Type::P(8, 8, 2);
-static POLY8X8X3: Type = Type::P(8, 8, 3);
-static POLY8X8X4: Type = Type::P(8, 8, 4);
-static U16X4: Type = Type::U(16, 4, 1);
-static U16X4X2: Type = Type::U(16, 4, 2);
-static U16X4X3: Type = Type::U(16, 4, 3);
-static U16X4X4: Type = Type::U(16, 4, 4);
-static U16X8: Type = Type::U(16, 8, 1);
-static U16X8X2: Type = Type::U(16, 8, 2);
-static U16X8X3: Type = Type::U(16, 8, 3);
-static U16X8X4: Type = Type::U(16, 8, 4);
-static U32X2: Type = Type::U(32, 2, 1);
-static U32X2X2: Type = Type::U(32, 2, 2);
-static U32X2X3: Type = Type::U(32, 2, 3);
-static U32X2X4: Type = Type::U(32, 2, 4);
-static U32X4: Type = Type::U(32, 4, 1);
-static U32X4X2: Type = Type::U(32, 4, 2);
-static U32X4X3: Type = Type::U(32, 4, 3);
-static U32X4X4: Type = Type::U(32, 4, 4);
-static U64X1: Type = Type::U(64, 1, 1);
-static U64X1X2: Type = Type::U(64, 1, 2);
-static U64X1X3: Type = Type::U(64, 1, 3);
-static U64X1X4: Type = Type::U(64, 1, 4);
-static U64X2: Type = Type::U(64, 2, 1);
-static U64X2X2: Type = Type::U(64, 2, 2);
-static U64X2X3: Type = Type::U(64, 2, 3);
-static U64X2X4: Type = Type::U(64, 2, 4);
-static U8X16: Type = Type::U(8, 16, 1);
-static U8X16X2: Type = Type::U(8, 16, 2);
-static U8X16X3: Type = Type::U(8, 16, 3);
-static U8X16X4: Type = Type::U(8, 16, 4);
-static U8X8: Type = Type::U(8, 8, 1);
-static U8X8X2: Type = Type::U(8, 8, 2);
-static U8X8X3: Type = Type::U(8, 8, 3);
-static U8X8X4: Type = Type::U(8, 8, 4);
-
-#[derive(Debug, Copy, Clone, PartialEq)]
-enum Type {
-    PrimFloat(u8),
-    PrimSigned(u8),
-    PrimUnsigned(u8),
-    PrimPoly(u8),
-    MutPtr(&'static Type),
-    ConstPtr(&'static Type),
-    I(u8, u8, u8),
-    U(u8, u8, u8),
-    P(u8, u8, u8),
-    F(u8, u8, u8),
-    Never,
-}
-
-stdsimd_verify::arm_functions!(static FUNCTIONS);
-
-macro_rules! bail {
-    ($($t:tt)*) => (return Err(format!($($t)*)))
-}
-
-#[test]
-fn verify_all_signatures() {
-    // This is a giant HTML blob downloaded from
-    // https://developer.arm.com/technologies/neon/intrinsics which contains all
-    // NEON intrinsics at least. We do manual HTML parsing below.
-    let html = include_bytes!("../arm-intrinsics.html");
-    let mut html = &html[..];
-    let opts = ParseOpts {
-        tree_builder: TreeBuilderOpts {
-            drop_doctype: true,
-            ..Default::default()
-        },
-        ..Default::default()
-    };
-    let dom = parse_document(RcDom::default(), opts)
-        .from_utf8()
-        .read_from(&mut html)
-        .unwrap();
-
-    let accordion = find_accordion(&dom.document).unwrap();
-    let map = parse_intrinsics(&accordion);
-
-    let mut all_valid = true;
-    'outer: for rust in FUNCTIONS {
-        // Skip some intrinsics that aren't NEON and are located in different
-        // places than the whitelists below.
-        match rust.name {
-            "brk" | "__breakpoint" | "udf" => continue,
-            _ => {}
-        }
-        let arm = match map.get(rust.name) {
-            Some(i) => i,
-            None => {
-                // Skip all these intrinsics as they're not listed in NEON
-                // descriptions online.
-                //
-                // TODO: we still need to verify these intrinsics or find a
-                // reference for them, need to figure out where though!
-                if !rust.file.ends_with("dsp.rs\"")
-                    && !rust.file.ends_with("cmsis.rs\"")
-                    && !rust.file.ends_with("v6.rs\"")
-                    && !rust.file.ends_with("v7.rs\"")
-                    && !rust.file.ends_with("v8.rs\"")
-                {
-                    println!(
-                        "missing arm definition for {:?} in {}",
-                        rust.name, rust.file
-                    );
-                    all_valid = false;
-                }
-                continue;
-            }
-        };
-
-        if let Err(e) = matches(rust, arm) {
-            println!("failed to verify `{}`", rust.name);
-            println!("  * {}", e);
-            all_valid = false;
-        }
-    }
-    assert!(all_valid);
-}
-
-fn matches(rust: &Function, arm: &Intrinsic) -> Result<(), String> {
-    if rust.ret != arm.ret.as_ref() {
-        bail!("mismatched return value")
-    }
-    if rust.arguments.len() != arm.arguments.len() {
-        bail!("mismatched argument lengths");
-    }
-
-    let mut nconst = 0;
-    let iter = rust.arguments.iter().zip(&arm.arguments).enumerate();
-    for (i, (rust_ty, (arm, arm_const))) in iter {
-        if *rust_ty != arm {
-            bail!("mismatched arguments")
-        }
-        if *arm_const {
-            nconst += 1;
-            if !rust.required_const.contains(&i) {
-                bail!("argument const mismatch");
-            }
-        }
-    }
-    if nconst != rust.required_const.len() {
-        bail!("wrong number of const arguments");
-    }
-
-    if rust.instrs.is_empty() {
-        bail!(
-            "instruction not listed for `{}`, but arm lists {:?}",
-            rust.name,
-            arm.instruction
-        );
-    } else if false
-    /* not super reliable, but can be used to manually check */
-    {
-        for instr in rust.instrs {
-            if arm.instruction.starts_with(instr) {
-                continue;
-            }
-            // sometimes arm says `foo` and disassemblers say `vfoo`, or
-            // sometimes disassemblers say `vfoo` and arm says `sfoo` or `ffoo`
-            if instr.starts_with("v")
-                && (arm.instruction.starts_with(&instr[1..])
-                    || arm.instruction[1..].starts_with(&instr[1..]))
-            {
-                continue;
-            }
-            bail!(
-                "arm failed to list `{}` as an instruction for `{}` in {:?}",
-                instr,
-                rust.name,
-                arm.instruction,
-            );
-        }
-    }
-
-    // TODO: verify `target_feature`.
-
-    Ok(())
-}
-
-fn find_accordion(node: &Rc<Node>) -> Option<Rc<Node>> {
-    if let NodeData::Element { attrs, .. } = &node.data {
-        for attr in attrs.borrow().iter() {
-            if attr.name.local.eq_str_ignore_ascii_case("class") {
-                if attr.value.to_string() == "intrinsic-accordion" {
-                    return Some(node.clone());
-                }
-            }
-        }
-    }
-
-    node.children
-        .borrow()
-        .iter()
-        .filter_map(|node| find_accordion(node))
-        .next()
-}
-
-#[derive(PartialEq)]
-struct Intrinsic {
-    name: String,
-    ret: Option<Type>,
-    arguments: Vec<(Type, bool)>,
-    instruction: String,
-}
-
-fn parse_intrinsics(node: &Rc<Node>) -> HashMap<String, Intrinsic> {
-    let mut ret = HashMap::new();
-    for child in node.children.borrow().iter() {
-        if let NodeData::Element { .. } = child.data {
-            let f = parse_intrinsic(child);
-            ret.insert(f.name.clone(), f);
-        }
-    }
-    return ret;
-}
-
-fn parse_intrinsic(node: &Rc<Node>) -> Intrinsic {
-    // <div class='intrinsic'>
-    //  <input>...</input>
-    //  <label for=$name>
-    //    <div>
-    //      $signature...
-    //  <article>
-    //    ...
-
-    let children = node.children.borrow();
-    let mut children = children.iter().filter(|node| match node.data {
-        NodeData::Element { .. } => true,
-        _ => false,
-    });
-    let _input = children.next().expect("no <input>");
-    let label = children.next().expect("no <label>");
-    let article = children.next().expect("no <article>");
-    assert!(children.next().is_none());
-
-    // Find `for="..."` in `<label>`
-    let name = match &label.data {
-        NodeData::Element { attrs, .. } => attrs
-            .borrow()
-            .iter()
-            .filter(|attr| attr.name.local.eq_str_ignore_ascii_case("for"))
-            .map(|attr| attr.value.to_string())
-            .next()
-            .expect("no `for` attribute"),
-        _ => panic!(),
-    };
-
-    // Find contents of inner `<div>` in `<label>`
-    let label_children = label.children.borrow();
-    let mut label_children = label_children.iter().filter(|node| match node.data {
-        NodeData::Element { .. } => true,
-        _ => false,
-    });
-    let label_div = label_children.next().expect("no <div> in <label>");
-    assert!(label_children.next().is_none());
-    let text = label_div.children.borrow();
-    let mut text = text.iter().filter_map(|node| match &node.data {
-        NodeData::Text { contents } => Some(contents.borrow().to_string()),
-        _ => None,
-    });
-    let ret = text.next().unwrap();
-    let ret = ret.trim();
-    let args = text.next().unwrap();
-    let args = args.trim();
-    assert!(text.next().is_none());
-
-    // Find the instruction within the article
-    let article_children = article.children.borrow();
-    let mut article_children = article_children.iter().filter(|node| match node.data {
-        NodeData::Element { .. } => true,
-        _ => false,
-    });
-    let mut instruction = None;
-    while let Some(child) = article_children.next() {
-        let mut header = String::new();
-        collect_text(&mut header, child);
-        if !header.ends_with(" Instruction") {
-            continue;
-        }
-        let next = article_children.next().expect("no next child");
-        assert!(instruction.is_none());
-        let mut instr = String::new();
-        collect_text(&mut instr, &next);
-        instruction = Some(instr);
-    }
-
-    let instruction = match instruction {
-        Some(s) => s.trim().to_lowercase(),
-        None => panic!("can't find instruction for `{}`", name),
-    };
-
-    Intrinsic {
-        name,
-        ret: if ret == "void" {
-            None
-        } else {
-            Some(parse_ty(ret))
-        },
-        instruction,
-        arguments: args // "(...)"
-            .trim_start_matches("(") // "...)"
-            .trim_end_matches(")") // "..."
-            .split(",") // " Type name ", ".."
-            .map(|s| s.trim()) // "Type name"
-            .map(|s| s.rsplitn(2, ' ').nth(1).unwrap()) // "Type"
-            .map(|s| {
-                let const_ = "const ";
-                if s.starts_with(const_) {
-                    (parse_ty(&s[const_.len()..]), true)
-                } else {
-                    (parse_ty(s), false)
-                }
-            })
-            .collect(),
-    }
-}
-
-fn parse_ty(s: &str) -> Type {
-    let suffix = " const *";
-    if s.ends_with(suffix) {
-        Type::ConstPtr(parse_ty_base(&s[..s.len() - suffix.len()]))
-    } else if s.ends_with(" *") {
-        Type::MutPtr(parse_ty_base(&s[..s.len() - 2]))
-    } else {
-        *parse_ty_base(s)
-    }
-}
-
-fn parse_ty_base(s: &str) -> &'static Type {
-    match s {
-        "float16_t" => &F16,
-        "float16x4_t" => &F16X4,
-        "float16x4x2_t" => &F16X4X2,
-        "float16x4x3_t" => &F16X4X3,
-        "float16x4x4_t" => &F16X4X4,
-        "float16x8_t" => &F16X8,
-        "float16x8x2_t" => &F16X8X2,
-        "float16x8x3_t" => &F16X8X3,
-        "float16x8x4_t" => &F16X8X4,
-        "float32_t" => &F32,
-        "float32x2_t" => &F32X2,
-        "float32x2x2_t" => &F32X2X2,
-        "float32x2x3_t" => &F32X2X3,
-        "float32x2x4_t" => &F32X2X4,
-        "float32x4_t" => &F32X4,
-        "float32x4x2_t" => &F32X4X2,
-        "float32x4x3_t" => &F32X4X3,
-        "float32x4x4_t" => &F32X4X4,
-        "float64_t" => &F64,
-        "float64x1_t" => &F64X1,
-        "float64x1x2_t" => &F64X1X2,
-        "float64x1x3_t" => &F64X1X3,
-        "float64x1x4_t" => &F64X1X4,
-        "float64x2_t" => &F64X2,
-        "float64x2x2_t" => &F64X2X2,
-        "float64x2x3_t" => &F64X2X3,
-        "float64x2x4_t" => &F64X2X4,
-        "int16_t" => &I16,
-        "int16x2_t" => &I16X2,
-        "int16x4_t" => &I16X4,
-        "int16x4x2_t" => &I16X4X2,
-        "int16x4x3_t" => &I16X4X3,
-        "int16x4x4_t" => &I16X4X4,
-        "int16x8_t" => &I16X8,
-        "int16x8x2_t" => &I16X8X2,
-        "int16x8x3_t" => &I16X8X3,
-        "int16x8x4_t" => &I16X8X4,
-        "int32_t" | "int" => &I32,
-        "int32x2_t" => &I32X2,
-        "int32x2x2_t" => &I32X2X2,
-        "int32x2x3_t" => &I32X2X3,
-        "int32x2x4_t" => &I32X2X4,
-        "int32x4_t" => &I32X4,
-        "int32x4x2_t" => &I32X4X2,
-        "int32x4x3_t" => &I32X4X3,
-        "int32x4x4_t" => &I32X4X4,
-        "int64_t" => &I64,
-        "int64x1_t" => &I64X1,
-        "int64x1x2_t" => &I64X1X2,
-        "int64x1x3_t" => &I64X1X3,
-        "int64x1x4_t" => &I64X1X4,
-        "int64x2_t" => &I64X2,
-        "int64x2x2_t" => &I64X2X2,
-        "int64x2x3_t" => &I64X2X3,
-        "int64x2x4_t" => &I64X2X4,
-        "int8_t" => &I8,
-        "int8x16_t" => &I8X16,
-        "int8x16x2_t" => &I8X16X2,
-        "int8x16x3_t" => &I8X16X3,
-        "int8x16x4_t" => &I8X16X4,
-        "int8x4_t" => &I8X4,
-        "int8x8_t" => &I8X8,
-        "int8x8x2_t" => &I8X8X2,
-        "int8x8x3_t" => &I8X8X3,
-        "int8x8x4_t" => &I8X8X4,
-        "poly128_t" => &P128,
-        "poly16_t" => &P16,
-        "poly16x4_t" => &POLY16X4,
-        "poly16x4x2_t" => &P16X4X2,
-        "poly16x4x3_t" => &P16X4X3,
-        "poly16x4x4_t" => &P16X4X4,
-        "poly16x8_t" => &POLY16X8,
-        "poly16x8x2_t" => &P16X8X2,
-        "poly16x8x3_t" => &P16X8X3,
-        "poly16x8x4_t" => &P16X8X4,
-        "poly64_t" => &P64,
-        "poly64x1_t" => &POLY64X1,
-        "poly64x1x2_t" => &P64X1X2,
-        "poly64x1x3_t" => &P64X1X3,
-        "poly64x1x4_t" => &P64X1X4,
-        "poly64x2_t" => &POLY64X2,
-        "poly64x2x2_t" => &P64X2X2,
-        "poly64x2x3_t" => &P64X2X3,
-        "poly64x2x4_t" => &P64X2X4,
-        "poly8_t" => &P8,
-        "poly8x16_t" => &POLY8X16,
-        "poly8x16x2_t" => &POLY8X16X2,
-        "poly8x16x3_t" => &POLY8X16X3,
-        "poly8x16x4_t" => &POLY8X16X4,
-        "poly8x8_t" => &POLY8X8,
-        "poly8x8x2_t" => &POLY8X8X2,
-        "poly8x8x3_t" => &POLY8X8X3,
-        "poly8x8x4_t" => &POLY8X8X4,
-        "uint16_t" => &U16,
-        "uint16x4_t" => &U16X4,
-        "uint16x4x2_t" => &U16X4X2,
-        "uint16x4x3_t" => &U16X4X3,
-        "uint16x4x4_t" => &U16X4X4,
-        "uint16x8_t" => &U16X8,
-        "uint16x8x2_t" => &U16X8X2,
-        "uint16x8x3_t" => &U16X8X3,
-        "uint16x8x4_t" => &U16X8X4,
-        "uint32_t" => &U32,
-        "uint32x2_t" => &U32X2,
-        "uint32x2x2_t" => &U32X2X2,
-        "uint32x2x3_t" => &U32X2X3,
-        "uint32x2x4_t" => &U32X2X4,
-        "uint32x4_t" => &U32X4,
-        "uint32x4x2_t" => &U32X4X2,
-        "uint32x4x3_t" => &U32X4X3,
-        "uint32x4x4_t" => &U32X4X4,
-        "uint64_t" => &U64,
-        "uint64x1_t" => &U64X1,
-        "uint64x1x2_t" => &U64X1X2,
-        "uint64x1x3_t" => &U64X1X3,
-        "uint64x1x4_t" => &U64X1X4,
-        "uint64x2_t" => &U64X2,
-        "uint64x2x2_t" => &U64X2X2,
-        "uint64x2x3_t" => &U64X2X3,
-        "uint64x2x4_t" => &U64X2X4,
-        "uint8_t" => &U8,
-        "uint8x16_t" => &U8X16,
-        "uint8x16x2_t" => &U8X16X2,
-        "uint8x16x3_t" => &U8X16X3,
-        "uint8x16x4_t" => &U8X16X4,
-        "uint8x8_t" => &U8X8,
-        "uint8x8x2_t" => &U8X8X2,
-        "uint8x8x3_t" => &U8X8X3,
-        "uint8x8x4_t" => &U8X8X4,
-
-        _ => panic!("failed to parse html type {:?}", s),
-    }
-}
-
-fn collect_text(s: &mut String, node: &Node) {
-    if let NodeData::Text { contents } = &node.data {
-        s.push_str(" ");
-        s.push_str(&contents.borrow().to_string());
-    }
-    for child in node.children.borrow().iter() {
-        collect_text(s, child);
-    }
-}
diff --git a/library/stdarch/crates/stdsimd-verify/tests/mips.rs b/library/stdarch/crates/stdsimd-verify/tests/mips.rs
deleted file mode 100644
index 334b001566e..00000000000
--- a/library/stdarch/crates/stdsimd-verify/tests/mips.rs
+++ /dev/null
@@ -1,335 +0,0 @@
-//! Verification of MIPS MSA intrinsics
-#![feature(try_trait)]
-#![allow(bad_style, unused)]
-
-// This file is obtained from
-// https://gcc.gnu.org/onlinedocs//gcc/MIPS-SIMD-Architecture-Built-in-Functions.html
-static HEADER: &str = include_str!("../mips-msa.h");
-
-stdsimd_verify::mips_functions!(static FUNCTIONS);
-
-struct Function {
-    name: &'static str,
-    arguments: &'static [&'static Type],
-    ret: Option<&'static Type>,
-    target_feature: Option<&'static str>,
-    instrs: &'static [&'static str],
-    file: &'static str,
-    required_const: &'static [usize],
-}
-
-static F16: Type = Type::PrimFloat(16);
-static F32: Type = Type::PrimFloat(32);
-static F64: Type = Type::PrimFloat(64);
-static I8: Type = Type::PrimSigned(8);
-static I16: Type = Type::PrimSigned(16);
-static I32: Type = Type::PrimSigned(32);
-static I64: Type = Type::PrimSigned(64);
-static U8: Type = Type::PrimUnsigned(8);
-static U16: Type = Type::PrimUnsigned(16);
-static U32: Type = Type::PrimUnsigned(32);
-static U64: Type = Type::PrimUnsigned(64);
-static NEVER: Type = Type::Never;
-static TUPLE: Type = Type::Tuple;
-static v16i8: Type = Type::I(8, 16, 1);
-static v8i16: Type = Type::I(16, 8, 1);
-static v4i32: Type = Type::I(32, 4, 1);
-static v2i64: Type = Type::I(64, 2, 1);
-static v16u8: Type = Type::U(8, 16, 1);
-static v8u16: Type = Type::U(16, 8, 1);
-static v4u32: Type = Type::U(32, 4, 1);
-static v2u64: Type = Type::U(64, 2, 1);
-static v8f16: Type = Type::F(16, 8, 1);
-static v4f32: Type = Type::F(32, 4, 1);
-static v2f64: Type = Type::F(64, 2, 1);
-
-#[derive(Debug, Copy, Clone, PartialEq)]
-enum Type {
-    PrimFloat(u8),
-    PrimSigned(u8),
-    PrimUnsigned(u8),
-    PrimPoly(u8),
-    MutPtr(&'static Type),
-    ConstPtr(&'static Type),
-    Tuple,
-    I(u8, u8, u8),
-    U(u8, u8, u8),
-    P(u8, u8, u8),
-    F(u8, u8, u8),
-    Never,
-}
-
-#[derive(Copy, Clone, Debug, PartialEq)]
-#[allow(non_camel_case_types)]
-enum MsaTy {
-    v16i8,
-    v8i16,
-    v4i32,
-    v2i64,
-    v16u8,
-    v8u16,
-    v4u32,
-    v2u64,
-    v8f16,
-    v4f32,
-    v2f64,
-    imm0_1,
-    imm0_3,
-    imm0_7,
-    imm0_15,
-    imm0_31,
-    imm0_63,
-    imm0_255,
-    imm_n16_15,
-    imm_n512_511,
-    imm_n1024_1022,
-    imm_n2048_2044,
-    imm_n4096_4088,
-    i32,
-    u32,
-    i64,
-    u64,
-    Void,
-    MutVoidPtr,
-}
-
-impl<'a> From<&'a str> for MsaTy {
-    fn from(s: &'a str) -> MsaTy {
-        match s {
-            "v16i8" => MsaTy::v16i8,
-            "v8i16" => MsaTy::v8i16,
-            "v4i32" => MsaTy::v4i32,
-            "v2i64" => MsaTy::v2i64,
-            "v16u8" => MsaTy::v16u8,
-            "v8u16" => MsaTy::v8u16,
-            "v4u32" => MsaTy::v4u32,
-            "v2u64" => MsaTy::v2u64,
-            "v8f16" => MsaTy::v8f16,
-            "v4f32" => MsaTy::v4f32,
-            "v2f64" => MsaTy::v2f64,
-            "imm0_1" => MsaTy::imm0_1,
-            "imm0_3" => MsaTy::imm0_3,
-            "imm0_7" => MsaTy::imm0_7,
-            "imm0_15" => MsaTy::imm0_15,
-            "imm0_31" => MsaTy::imm0_31,
-            "imm0_63" => MsaTy::imm0_63,
-            "imm0_255" => MsaTy::imm0_255,
-            "imm_n16_15" => MsaTy::imm_n16_15,
-            "imm_n512_511" => MsaTy::imm_n512_511,
-            "imm_n1024_1022" => MsaTy::imm_n1024_1022,
-            "imm_n2048_2044" => MsaTy::imm_n2048_2044,
-            "imm_n4096_4088" => MsaTy::imm_n4096_4088,
-            "i32" => MsaTy::i32,
-            "u32" => MsaTy::u32,
-            "i64" => MsaTy::i64,
-            "u64" => MsaTy::u64,
-            "void" => MsaTy::Void,
-            "void *" => MsaTy::MutVoidPtr,
-            v => panic!("unknown ty: \"{}\"", v),
-        }
-    }
-}
-
-#[derive(Debug, Clone)]
-struct MsaIntrinsic {
-    id: String,
-    arg_tys: Vec<MsaTy>,
-    ret_ty: MsaTy,
-    instruction: String,
-}
-
-impl std::convert::TryFrom<&'static str> for MsaIntrinsic {
-    // The intrinsics are just C function declarations of the form:
-    // $ret_ty __builtin_${fn_id}($($arg_ty),*);
-    type Error = std::option::NoneError;
-    fn try_from(line: &'static str) -> Result<Self, Self::Error> {
-        let first_whitespace = line.find(char::is_whitespace)?;
-        let ret_ty = &line[0..first_whitespace];
-        let ret_ty = MsaTy::from(ret_ty);
-
-        let first_parentheses = line.find('(')?;
-        assert!(first_parentheses > first_whitespace);
-        let id = &line[first_whitespace + 1..first_parentheses].trim();
-        assert!(id.starts_with("__builtin"));
-        let mut id_str = "_".to_string();
-        id_str += &id[9..];
-        let id = id_str;
-
-        let mut arg_tys = Vec::new();
-
-        let last_parentheses = line.find(')')?;
-        for arg in (&line[first_parentheses + 1..last_parentheses]).split(',') {
-            let arg = arg.trim();
-            arg_tys.push(MsaTy::from(arg));
-        }
-
-        // The instruction is the intrinsic name without the __msa_ prefix.
-        let instruction = &id[6..];
-        let mut instruction = instruction.to_string();
-        // With all underscores but the first one replaced with a `.`
-        if let Some(first_underscore) = instruction.find('_') {
-            let postfix = instruction[first_underscore + 1..].replace('_', ".");
-            instruction = instruction[0..=first_underscore].to_string();
-            instruction += &postfix;
-        }
-
-        Ok(MsaIntrinsic {
-            id,
-            ret_ty,
-            arg_tys,
-            instruction,
-        })
-    }
-}
-
-#[test]
-fn verify_all_signatures() {
-    // Parse the C intrinsic header file:
-    let mut intrinsics = std::collections::HashMap::<String, MsaIntrinsic>::new();
-    for line in HEADER.lines() {
-        if line.is_empty() {
-            continue;
-        }
-
-        use std::convert::TryFrom;
-        let intrinsic: MsaIntrinsic =
-            TryFrom::try_from(line).expect(&format!("failed to parse line: \"{}\"", line));
-        assert!(!intrinsics.contains_key(&intrinsic.id));
-        intrinsics.insert(intrinsic.id.clone(), intrinsic);
-    }
-
-    let mut all_valid = true;
-    for rust in FUNCTIONS {
-        // Skip some intrinsics that aren't part of MSA
-        match rust.name {
-            "break_" => continue,
-            _ => {}
-        }
-        let mips = match intrinsics.get(rust.name) {
-            Some(i) => i,
-            None => {
-                eprintln!(
-                    "missing mips definition for {:?} in {}",
-                    rust.name, rust.file
-                );
-                all_valid = false;
-                continue;
-            }
-        };
-
-        if let Err(e) = matches(rust, mips) {
-            println!("failed to verify `{}`", rust.name);
-            println!("  * {}", e);
-            all_valid = false;
-        }
-    }
-    assert!(all_valid);
-}
-
-fn matches(rust: &Function, mips: &MsaIntrinsic) -> Result<(), String> {
-    macro_rules! bail {
-        ($($t:tt)*) => (return Err(format!($($t)*)))
-    }
-
-    if rust.ret.is_none() && mips.ret_ty != MsaTy::Void {
-        bail!("mismatched return value")
-    }
-
-    if rust.arguments.len() != mips.arg_tys.len() {
-        bail!("mismatched argument lengths");
-    }
-
-    let mut nconst = 0;
-    for (i, (rust_arg, mips_arg)) in rust.arguments.iter().zip(mips.arg_tys.iter()).enumerate() {
-        match mips_arg {
-            MsaTy::v16i8 if **rust_arg == v16i8 => (),
-            MsaTy::v8i16 if **rust_arg == v8i16 => (),
-            MsaTy::v4i32 if **rust_arg == v4i32 => (),
-            MsaTy::v2i64 if **rust_arg == v2i64 => (),
-            MsaTy::v16u8 if **rust_arg == v16u8 => (),
-            MsaTy::v8u16 if **rust_arg == v8u16 => (),
-            MsaTy::v4u32 if **rust_arg == v4u32 => (),
-            MsaTy::v2u64 if **rust_arg == v2u64 => (),
-            MsaTy::v4f32 if **rust_arg == v4f32 => (),
-            MsaTy::v2f64 if **rust_arg == v2f64 => (),
-            MsaTy::imm0_1
-            | MsaTy::imm0_3
-            | MsaTy::imm0_7
-            | MsaTy::imm0_15
-            | MsaTy::imm0_31
-            | MsaTy::imm0_63
-            | MsaTy::imm0_255
-            | MsaTy::imm_n16_15
-            | MsaTy::imm_n512_511
-            | MsaTy::imm_n1024_1022
-            | MsaTy::imm_n2048_2044
-            | MsaTy::imm_n4096_4088
-                if **rust_arg == I32 =>
-            {
-                ()
-            }
-            MsaTy::i32 if **rust_arg == I32 => (),
-            MsaTy::i64 if **rust_arg == I64 => (),
-            MsaTy::u32 if **rust_arg == U32 => (),
-            MsaTy::u64 if **rust_arg == U64 => (),
-            MsaTy::MutVoidPtr if **rust_arg == Type::MutPtr(&U8) => (),
-            m => bail!(
-                "mismatched argument \"{}\"= \"{:?}\" != \"{:?}\"",
-                i,
-                m,
-                *rust_arg
-            ),
-        }
-
-        let is_const = match mips_arg {
-            MsaTy::imm0_1
-            | MsaTy::imm0_3
-            | MsaTy::imm0_7
-            | MsaTy::imm0_15
-            | MsaTy::imm0_31
-            | MsaTy::imm0_63
-            | MsaTy::imm0_255
-            | MsaTy::imm_n16_15
-            | MsaTy::imm_n512_511
-            | MsaTy::imm_n1024_1022
-            | MsaTy::imm_n2048_2044
-            | MsaTy::imm_n4096_4088 => true,
-            _ => false,
-        };
-        if is_const {
-            nconst += 1;
-            if !rust.required_const.contains(&i) {
-                bail!("argument const mismatch");
-            }
-        }
-    }
-
-    if nconst != rust.required_const.len() {
-        bail!("wrong number of const arguments");
-    }
-
-    if rust.target_feature != Some("msa") {
-        bail!("wrong target_feature");
-    }
-
-    if !rust.instrs.is_empty() {
-        // Normalize slightly to get rid of assembler differences
-        let actual = rust.instrs[0].replace(".", "_");
-        let expected = mips.instruction.replace(".", "_");
-        if actual != expected {
-            bail!(
-                "wrong instruction: \"{}\" != \"{}\"",
-                rust.instrs[0],
-                mips.instruction
-            );
-        }
-    } else {
-        bail!(
-            "missing assert_instr for \"{}\" (should be \"{}\")",
-            mips.id,
-            mips.instruction
-        );
-    }
-
-    Ok(())
-}
diff --git a/library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs b/library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs
deleted file mode 100644
index 0b1eee07c47..00000000000
--- a/library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs
+++ /dev/null
@@ -1,537 +0,0 @@
-#![allow(bad_style)]
-#![allow(
-    clippy::shadow_reuse,
-    clippy::cast_lossless,
-    clippy::match_same_arms,
-    clippy::nonminimal_bool,
-    clippy::print_stdout,
-    clippy::use_debug,
-    clippy::eq_op,
-    clippy::useless_format
-)]
-
-use std::collections::{BTreeMap, HashMap};
-
-use serde::Deserialize;
-
-const PRINT_INSTRUCTION_VIOLATIONS: bool = false;
-const PRINT_MISSING_LISTS: bool = false;
-const PRINT_MISSING_LISTS_MARKDOWN: bool = false;
-
-struct Function {
-    name: &'static str,
-    arguments: &'static [&'static Type],
-    ret: Option<&'static Type>,
-    target_feature: Option<&'static str>,
-    instrs: &'static [&'static str],
-    file: &'static str,
-    required_const: &'static [usize],
-}
-
-static F32: Type = Type::PrimFloat(32);
-static F64: Type = Type::PrimFloat(64);
-static I16: Type = Type::PrimSigned(16);
-static I32: Type = Type::PrimSigned(32);
-static I64: Type = Type::PrimSigned(64);
-static I8: Type = Type::PrimSigned(8);
-static U16: Type = Type::PrimUnsigned(16);
-static U32: Type = Type::PrimUnsigned(32);
-static U64: Type = Type::PrimUnsigned(64);
-static U128: Type = Type::PrimUnsigned(128);
-static U8: Type = Type::PrimUnsigned(8);
-static ORDERING: Type = Type::Ordering;
-
-static M64: Type = Type::M64;
-static M128: Type = Type::M128;
-static M128I: Type = Type::M128I;
-static M128D: Type = Type::M128D;
-static M256: Type = Type::M256;
-static M256I: Type = Type::M256I;
-static M256D: Type = Type::M256D;
-static M512: Type = Type::M512;
-static M512I: Type = Type::M512I;
-static M512D: Type = Type::M512D;
-static MMASK16: Type = Type::MMASK16;
-
-static TUPLE: Type = Type::Tuple;
-static CPUID: Type = Type::CpuidResult;
-static NEVER: Type = Type::Never;
-
-#[derive(Debug)]
-enum Type {
-    PrimFloat(u8),
-    PrimSigned(u8),
-    PrimUnsigned(u8),
-    MutPtr(&'static Type),
-    ConstPtr(&'static Type),
-    M64,
-    M128,
-    M128D,
-    M128I,
-    M256,
-    M256D,
-    M256I,
-    M512,
-    M512D,
-    M512I,
-    MMASK16,
-    Tuple,
-    CpuidResult,
-    Never,
-    Ordering,
-}
-
-stdsimd_verify::x86_functions!(static FUNCTIONS);
-
-#[derive(Deserialize)]
-struct Data {
-    #[serde(rename = "intrinsic", default)]
-    intrinsics: Vec<Intrinsic>,
-}
-
-#[derive(Deserialize)]
-struct Intrinsic {
-    rettype: String,
-    name: String,
-    #[serde(rename = "CPUID", default)]
-    cpuid: Vec<String>,
-    #[serde(rename = "parameter", default)]
-    parameters: Vec<Parameter>,
-    #[serde(default)]
-    instruction: Vec<Instruction>,
-}
-
-#[derive(Deserialize)]
-struct Parameter {
-    #[serde(rename = "type")]
-    type_: String,
-}
-
-#[derive(Deserialize, Debug)]
-struct Instruction {
-    name: String,
-}
-
-macro_rules! bail {
-    ($($t:tt)*) => (return Err(format!($($t)*)))
-}
-
-#[test]
-fn verify_all_signatures() {
-    // This XML document was downloaded from Intel's site. To update this you
-    // can visit intel's intrinsics guide online documentation:
-    //
-    //   https://software.intel.com/sites/landingpage/IntrinsicsGuide/#
-    //
-    // Open up the network console and you'll see an xml file was downloaded
-    // (currently called data-3.4.xml). That's the file we downloaded
-    // here.
-    let xml = include_bytes!("../x86-intel.xml");
-
-    let xml = &xml[..];
-    let data: Data = serde_xml_rs::from_reader(xml).expect("failed to deserialize xml");
-    let mut map = HashMap::new();
-    for intrinsic in &data.intrinsics {
-        map.entry(&intrinsic.name[..])
-            .or_insert_with(Vec::new)
-            .push(intrinsic);
-    }
-
-    let mut all_valid = true;
-    'outer: for rust in FUNCTIONS {
-        match rust.name {
-            // These aren't defined by Intel but they're defined by what appears
-            // to be all other compilers. For more information see
-            // rust-lang-nursery/stdsimd#307, and otherwise these signatures
-            // have all been manually verified.
-            "__readeflags" |
-            "__writeeflags" |
-            "__cpuid_count" |
-            "__cpuid" |
-            "__get_cpuid_max" |
-            // Not listed with intel, but manually verified
-            "cmpxchg16b" |
-            // The UD2 intrinsic is not defined by Intel, but it was agreed on
-            // in the RFC Issue 2512:
-            // https://github.com/rust-lang/rfcs/issues/2512
-            "ud2"
-                => continue,
-            // Intel requires the mask argument for _mm_shuffle_ps to be an
-            // unsigned integer, but all other _mm_shuffle_.. intrinsics
-            // take a signed-integer. This breaks `_MM_SHUFFLE` for
-            // `_mm_shuffle_ps`:
-            "_mm_shuffle_ps" => continue,
-            _ => {}
-        }
-
-        // these are all AMD-specific intrinsics
-        if let Some(feature) = rust.target_feature {
-            if feature.contains("sse4a") || feature.contains("tbm") {
-                continue;
-            }
-        }
-
-        let intel = match map.remove(rust.name) {
-            Some(i) => i,
-            None => panic!("missing intel definition for {}", rust.name),
-        };
-
-        let mut errors = Vec::new();
-        for intel in intel {
-            match matches(rust, intel) {
-                Ok(()) => continue 'outer,
-                Err(e) => errors.push(e),
-            }
-        }
-        println!("failed to verify `{}`", rust.name);
-        for error in errors {
-            println!("  * {}", error);
-        }
-        all_valid = false;
-    }
-    assert!(all_valid);
-
-    let mut missing = BTreeMap::new();
-    for (name, intel) in &map {
-        // currently focused mainly on missing SIMD intrinsics, but there's
-        // definitely some other assorted ones that we're missing.
-        if !name.starts_with("_mm") {
-            continue;
-        }
-
-        // we'll get to avx-512 later
-        // let avx512 = intel.iter().any(|i| {
-        //     i.name.starts_with("_mm512") || i.cpuid.iter().any(|c| {
-        //         c.contains("512")
-        //     })
-        // });
-        // if avx512 {
-        //     continue
-        // }
-
-        for intel in intel {
-            missing
-                .entry(&intel.cpuid)
-                .or_insert_with(Vec::new)
-                .push(intel);
-        }
-    }
-
-    // generate a bulleted list of missing intrinsics
-    if PRINT_MISSING_LISTS || PRINT_MISSING_LISTS_MARKDOWN {
-        for (k, v) in missing {
-            if PRINT_MISSING_LISTS_MARKDOWN {
-                println!("\n<details><summary>{:?}</summary><p>\n", k);
-                for intel in v {
-                    let url = format!(
-                        "https://software.intel.com/sites/landingpage\
-                         /IntrinsicsGuide/#text={}&expand=5236",
-                        intel.name
-                    );
-                    println!("  * [ ] [`{}`]({})", intel.name, url);
-                }
-                println!("</p></details>\n");
-            } else {
-                println!("\n{:?}\n", k);
-                for intel in v {
-                    println!("\t{}", intel.name);
-                }
-            }
-        }
-    }
-}
-
-fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
-    // Verify that all `#[target_feature]` annotations are correct,
-    // ensuring that we've actually enabled the right instruction
-    // set for this intrinsic.
-    match rust.name {
-        "_bswap" | "_bswap64" => {}
-
-        // These don't actually have a target feature unlike their brethren with
-        // the `x` inside the name which requires adx
-        "_addcarry_u32" | "_addcarry_u64" | "_subborrow_u32" | "_subborrow_u64" => {}
-
-        "_bittest"
-        | "_bittestandset"
-        | "_bittestandreset"
-        | "_bittestandcomplement"
-        | "_bittest64"
-        | "_bittestandset64"
-        | "_bittestandreset64"
-        | "_bittestandcomplement64" => {}
-
-        _ => {
-            if intel.cpuid.is_empty() {
-                bail!("missing cpuid for {}", rust.name);
-            }
-        }
-    }
-
-    for cpuid in &intel.cpuid {
-        // The pause intrinsic is in the SSE2 module, but it is backwards
-        // compatible with CPUs without SSE2, and it therefore does not need the
-        // target-feature attribute.
-        if rust.name == "_mm_pause" {
-            continue;
-        }
-        // this is needed by _xsave and probably some related intrinsics,
-        // but let's just skip it for now.
-        if *cpuid == "XSS" {
-            continue;
-        }
-
-        // these flags on the rdtsc/rtdscp intrinsics we don't test for right
-        // now, but we may wish to add these one day!
-        //
-        // For more info see #308
-        if *cpuid == "TSC" || *cpuid == "RDTSCP" {
-            continue;
-        }
-
-        let cpuid = cpuid
-            .chars()
-            .flat_map(|c| c.to_lowercase())
-            .collect::<String>();
-
-        // Fix mismatching feature names:
-        let fixup_cpuid = |cpuid: String| match cpuid.as_ref() {
-            // The XML file names IFMA as "avx512ifma52", while Rust calls
-            // it "avx512ifma".
-            "avx512ifma52" => String::from("avx512ifma"),
-            // See: https://github.com/rust-lang-nursery/stdsimd/issues/738
-            // The intrinsics guide calls `f16c` `fp16c` in disagreement with
-            // Intel's architecture manuals.
-            "fp16c" => String::from("f16c"),
-            _ => cpuid,
-        };
-        let fixed_cpuid = fixup_cpuid(cpuid);
-
-        let rust_feature = rust
-            .target_feature
-            .expect(&format!("no target feature listed for {}", rust.name));
-
-        if rust_feature.contains(&fixed_cpuid) {
-            continue;
-        }
-        bail!(
-            "intel cpuid `{}` not in `{}` for {}",
-            fixed_cpuid,
-            rust_feature,
-            rust.name
-        )
-    }
-
-    if PRINT_INSTRUCTION_VIOLATIONS {
-        if rust.instrs.is_empty() {
-            if !intel.instruction.is_empty() {
-                println!(
-                    "instruction not listed for `{}`, but intel lists {:?}",
-                    rust.name, intel.instruction
-                );
-            }
-
-        // If intel doesn't list any instructions and we do then don't
-        // bother trying to look for instructions in intel, we've just got
-        // some extra assertions on our end.
-        } else if !intel.instruction.is_empty() {
-            for instr in rust.instrs {
-                let asserting = intel.instruction.iter().any(|a| a.name.starts_with(instr));
-                if !asserting {
-                    println!(
-                        "intel failed to list `{}` as an instruction for `{}`",
-                        instr, rust.name
-                    );
-                }
-            }
-        }
-    }
-
-    // Make sure we've got the right return type.
-    if let Some(t) = rust.ret {
-        equate(t, &intel.rettype, rust.name, false)?;
-    } else if intel.rettype != "" && intel.rettype != "void" {
-        bail!(
-            "{} returns `{}` with intel, void in rust",
-            rust.name,
-            intel.rettype
-        )
-    }
-
-    // If there's no arguments on Rust's side intel may list one "void"
-    // argument, so handle that here.
-    if rust.arguments.is_empty() && intel.parameters.len() == 1 {
-        if intel.parameters[0].type_ != "void" {
-            bail!("rust has 0 arguments, intel has one for")
-        }
-    } else {
-        // Otherwise we want all parameters to be exactly the same
-        if rust.arguments.len() != intel.parameters.len() {
-            bail!("wrong number of arguments on {}", rust.name)
-        }
-        for (i, (a, b)) in intel.parameters.iter().zip(rust.arguments).enumerate() {
-            let is_const = rust.required_const.contains(&i);
-            equate(b, &a.type_, &intel.name, is_const)?;
-        }
-    }
-
-    let any_i64 = rust
-        .arguments
-        .iter()
-        .cloned()
-        .chain(rust.ret)
-        .any(|arg| match *arg {
-            Type::PrimSigned(64) | Type::PrimUnsigned(64) => true,
-            _ => false,
-        });
-    let any_i64_exempt = match rust.name {
-        // These intrinsics have all been manually verified against Clang's
-        // headers to be available on x86, and the u64 arguments seem
-        // spurious I guess?
-        "_xsave" | "_xrstor" | "_xsetbv" | "_xgetbv" | "_xsaveopt" | "_xsavec" | "_xsaves"
-        | "_xrstors" => true,
-
-        // Apparently all of clang/msvc/gcc accept these intrinsics on
-        // 32-bit, so let's do the same
-        "_mm_set_epi64x" | "_mm_set1_epi64x" | "_mm256_set_epi64x" | "_mm256_setr_epi64x"
-        | "_mm256_set1_epi64x" | "_mm512_set1_epi64" => true,
-
-        // These return a 64-bit argument but they're assembled from other
-        // 32-bit registers, so these work on 32-bit just fine. See #308 for
-        // more info.
-        "_rdtsc" | "__rdtscp" => true,
-
-        _ => false,
-    };
-    if any_i64 && !any_i64_exempt && !rust.file.contains("x86_64") {
-        bail!(
-            "intrinsic `{}` uses a 64-bit bare type but may be \
-             available on 32-bit platforms",
-            rust.name
-        )
-    }
-    Ok(())
-}
-
-fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(), String> {
-    // Make pointer adjacent to the type: float * foo => float* foo
-    let mut intel = intel.replace(" *", "*");
-    // Make mutability modifier adjacent to the pointer:
-    // float const * foo => float const* foo
-    intel = intel.replace("const *", "const*");
-    // Normalize mutability modifier to after the type:
-    // const float* foo => float const*
-    if intel.starts_with("const") && intel.ends_with("*") {
-        intel = intel.replace("const ", "");
-        intel = intel.replace("*", " const*");
-    }
-    let require_const = || {
-        if is_const {
-            return Ok(());
-        }
-        Err(format!("argument required to be const but isn't"))
-    };
-    match (t, &intel[..]) {
-        (&Type::PrimFloat(32), "float") => {}
-        (&Type::PrimFloat(64), "double") => {}
-        (&Type::PrimSigned(16), "__int16") => {}
-        (&Type::PrimSigned(16), "short") => {}
-        (&Type::PrimSigned(32), "__int32") => {}
-        (&Type::PrimSigned(32), "const int") => require_const()?,
-        (&Type::PrimSigned(32), "int") => {}
-        (&Type::PrimSigned(64), "__int64") => {}
-        (&Type::PrimSigned(64), "long long") => {}
-        (&Type::PrimSigned(8), "__int8") => {}
-        (&Type::PrimSigned(8), "char") => {}
-        (&Type::PrimUnsigned(16), "unsigned short") => {}
-        (&Type::PrimUnsigned(32), "unsigned int") => {}
-        (&Type::PrimUnsigned(32), "const unsigned int") => {}
-        (&Type::PrimUnsigned(64), "unsigned __int64") => {}
-        (&Type::PrimUnsigned(8), "unsigned char") => {}
-        (&Type::M64, "__m64") => {}
-        (&Type::M128, "__m128") => {}
-        (&Type::M128I, "__m128i") => {}
-        (&Type::M128D, "__m128d") => {}
-        (&Type::M256, "__m256") => {}
-        (&Type::M256I, "__m256i") => {}
-        (&Type::M256D, "__m256d") => {}
-        (&Type::M512, "__m512") => {}
-        (&Type::M512I, "__m512i") => {}
-        (&Type::M512D, "__m512d") => {}
-
-        (&Type::MutPtr(&Type::PrimFloat(32)), "float*") => {}
-        (&Type::MutPtr(&Type::PrimFloat(64)), "double*") => {}
-        (&Type::MutPtr(&Type::PrimSigned(32)), "int*") => {}
-        (&Type::MutPtr(&Type::PrimSigned(32)), "__int32*") => {}
-        (&Type::MutPtr(&Type::PrimSigned(64)), "__int64*") => {}
-        (&Type::MutPtr(&Type::PrimSigned(8)), "char*") => {}
-        (&Type::MutPtr(&Type::PrimUnsigned(16)), "unsigned short*") => {}
-        (&Type::MutPtr(&Type::PrimUnsigned(32)), "unsigned int*") => {}
-        (&Type::MutPtr(&Type::PrimUnsigned(64)), "unsigned __int64*") => {}
-        (&Type::MutPtr(&Type::PrimUnsigned(8)), "void*") => {}
-        (&Type::MutPtr(&Type::M64), "__m64*") => {}
-        (&Type::MutPtr(&Type::M128), "__m128*") => {}
-        (&Type::MutPtr(&Type::M128I), "__m128i*") => {}
-        (&Type::MutPtr(&Type::M128D), "__m128d*") => {}
-        (&Type::MutPtr(&Type::M256), "__m256*") => {}
-        (&Type::MutPtr(&Type::M256I), "__m256i*") => {}
-        (&Type::MutPtr(&Type::M256D), "__m256d*") => {}
-        (&Type::MutPtr(&Type::M512), "__m512*") => {}
-        (&Type::MutPtr(&Type::M512I), "__m512i*") => {}
-        (&Type::MutPtr(&Type::M512D), "__m512d*") => {}
-
-        (&Type::ConstPtr(&Type::PrimFloat(32)), "float const*") => {}
-        (&Type::ConstPtr(&Type::PrimFloat(64)), "double const*") => {}
-        (&Type::ConstPtr(&Type::PrimSigned(32)), "int const*") => {}
-        (&Type::ConstPtr(&Type::PrimSigned(32)), "__int32 const*") => {}
-        (&Type::ConstPtr(&Type::PrimSigned(64)), "__int64 const*") => {}
-        (&Type::ConstPtr(&Type::PrimSigned(8)), "char const*") => {}
-        (&Type::ConstPtr(&Type::PrimUnsigned(16)), "unsigned short const*") => {}
-        (&Type::ConstPtr(&Type::PrimUnsigned(32)), "unsigned int const*") => {}
-        (&Type::ConstPtr(&Type::PrimUnsigned(64)), "unsigned __int64 const*") => {}
-        (&Type::ConstPtr(&Type::PrimUnsigned(8)), "void const*") => {}
-        (&Type::ConstPtr(&Type::M64), "__m64 const*") => {}
-        (&Type::ConstPtr(&Type::M128), "__m128 const*") => {}
-        (&Type::ConstPtr(&Type::M128I), "__m128i const*") => {}
-        (&Type::ConstPtr(&Type::M128D), "__m128d const*") => {}
-        (&Type::ConstPtr(&Type::M256), "__m256 const*") => {}
-        (&Type::ConstPtr(&Type::M256I), "__m256i const*") => {}
-        (&Type::ConstPtr(&Type::M256D), "__m256d const*") => {}
-        (&Type::ConstPtr(&Type::M512), "__m512 const*") => {}
-        (&Type::ConstPtr(&Type::M512I), "__m512i const*") => {}
-        (&Type::ConstPtr(&Type::M512D), "__m512d const*") => {}
-
-        (&Type::MMASK16, "__mmask16") => {}
-
-        // This is a macro (?) in C which seems to mutate its arguments, but
-        // that means that we're taking pointers to arguments in rust
-        // as we're not exposing it as a macro.
-        (&Type::MutPtr(&Type::M128), "__m128") if intrinsic == "_MM_TRANSPOSE4_PS" => {}
-
-        // The _rdtsc intrinsic uses a __int64 return type, but this is a bug in
-        // the intrinsics guide: https://github.com/rust-lang-nursery/stdsimd/issues/559
-        // We have manually fixed the bug by changing the return type to `u64`.
-        (&Type::PrimUnsigned(64), "__int64") if intrinsic == "_rdtsc" => {}
-
-        // The _bittest and _bittest64 intrinsics takes a mutable pointer in the
-        // intrinsics guide even though it never writes through the pointer:
-        (&Type::ConstPtr(&Type::PrimSigned(32)), "__int32*") if intrinsic == "_bittest" => {}
-        (&Type::ConstPtr(&Type::PrimSigned(64)), "__int64*") if intrinsic == "_bittest64" => {}
-        // The _xrstor, _fxrstor, _xrstor64, _fxrstor64 intrinsics take a
-        // mutable pointer in the intrinsics guide even though they never write
-        // through the pointer:
-        (&Type::ConstPtr(&Type::PrimUnsigned(8)), "void*")
-            if intrinsic == "_xrstor"
-                || intrinsic == "_xrstor64"
-                || intrinsic == "_fxrstor"
-                || intrinsic == "_fxrstor64" => {}
-
-        _ => bail!(
-            "failed to equate: `{}` and {:?} for {}",
-            intel,
-            t,
-            intrinsic
-        ),
-    }
-    Ok(())
-}
diff --git a/library/stdarch/crates/stdsimd-verify/x86-intel.xml b/library/stdarch/crates/stdsimd-verify/x86-intel.xml
deleted file mode 100644
index c22a3adaeca..00000000000
--- a/library/stdarch/crates/stdsimd-verify/x86-intel.xml
+++ /dev/null
@@ -1,134861 +0,0 @@
-<intrinsics_list version='3.4' date='09/07/2017'>
-<intrinsic tech='MMX' rettype='__m64' name='_m_from_int64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__int64'/>
-	<description>Copy 64-bit integer "a" to "dst".</description>
-	<operation>
-dst[63:0] := a[63:0]
-	</operation>
-	<instruction name='movq' form='mm, r64'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__int64' name='_m_to_int64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m64'/>
-	<description>Copy 64-bit integer "a" to "dst".</description>
-	<operation>
-dst[63:0] := a[63:0]
-	</operation>
-	<instruction name='movq' form='r64, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='void' name='_m_empty'>
-	<CPUID>MMX</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures.</description>
-	<instruction name='emms' form=''/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_from_int'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='int'/>
-	<description>Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper element of "dst".</description>
-	<operation>
-dst[31:0] := a[31:0]
-dst[63:32] := 0
-	</operation>
-	<instruction name='movd' form='mm, r32'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='int' name='_m_to_int'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m64'/>
-	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
-	<operation>
-dst[31:0] := a[31:0]
-	</operation>
-	<instruction name='movd' form='r32, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_packsswb'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".
-	</description>
-	<operation>
-dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
-dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
-dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
-dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
-dst[39:32] := Saturate_Int16_To_Int8 (b[15:0])
-dst[47:40] := Saturate_Int16_To_Int8 (b[31:16])
-dst[55:48] := Saturate_Int16_To_Int8 (b[47:32])
-dst[63:56] := Saturate_Int16_To_Int8 (b[63:48])
-	</operation>
-	<instruction name='packsswb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_packssdw'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
-	<operation>
-dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
-dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
-dst[47:32] := Saturate_Int32_To_Int16 (b[31:0])
-dst[63:48] := Saturate_Int32_To_Int16 (b[63:32])
-	</operation>
-	<instruction name='packssdw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_packuswb'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
-	<operation>
-dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
-dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
-dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
-dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
-dst[39:32] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
-dst[47:40] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
-dst[55:48] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
-dst[63:56] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
-	</operation>
-	<instruction name='packuswb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_punpckhbw'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]){
-	dst[7:0] := src1[39:32]
-	dst[15:8] := src2[39:32] 
-	dst[23:16] := src1[47:40]
-	dst[31:24] := src2[47:40]
-	dst[39:32] := src1[55:48]
-	dst[47:40] := src2[55:48]
-	dst[55:48] := src1[63:56]
-	dst[63:56] := src2[63:56]
-	RETURN dst[63:0]
-}	
-	
-dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0])
-	</operation>
-	<instruction name='punpckhbw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_punpckhwd'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]){
-	dst[15:0] := src1[47:32]
-	dst[31:16] := src2[47:32]
-	dst[47:32] := src1[63:48]
-	dst[63:48] := src2[63:48]
-	RETURN dst[63:0]
-}
-
-dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0])
-	</operation>
-	<instruction name='punpcklbw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_punpckhdq'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
-	<operation>
-dst[31:0] := a[63:32]
-dst[63:32] := b[63:32]
-	</operation>
-	<instruction name='punpckhdq' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_punpcklbw'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_BYTES(src1[63:0], src2[63:0]){
-	dst[7:0] := src1[7:0] 
-	dst[15:8] := src2[7:0] 
-	dst[23:16] := src1[15:8] 
-	dst[31:24] := src2[15:8] 
-	dst[39:32] := src1[23:16] 
-	dst[47:40] := src2[23:16] 
-	dst[55:48] := src1[31:24] 
-	dst[63:56] := src2[31:24] 
-	RETURN dst[63:0]
-}	
-
-dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0])
-	</operation>
-	<instruction name='punpcklbw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_punpcklwd'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_WORDS(src1[63:0], src2[63:0]){
-	dst[15:0] := src1[15:0] 
-	dst[31:16] := src2[15:0] 
-	dst[47:32] := src1[31:16] 
-	dst[63:48] := src2[31:16] 
-	RETURN dst[63:0]
-}	
-
-dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0])
-	</operation>
-	<instruction name='punpcklwd' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_punpckldq'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-dst[31:0] := a[31:0]
-dst[63:32] := b[31:0]
-	</operation>
-	<instruction name='punpckldq' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_paddb'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := a[i+7:i] + b[i+7:i]
-ENDFOR
-	</operation>
-	<instruction name='paddb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_paddw'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := a[i+15:i] + b[i+15:i]
-ENDFOR
-	</operation>
-	<instruction name='paddw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_paddd'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	dst[i+31:i] := a[i+31:i] + b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='paddd' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_paddsb'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
-ENDFOR
-	</operation>
-	<instruction name='paddsb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_paddsw'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
-ENDFOR
-	</operation>
-	<instruction name='paddsw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_paddusb'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
-ENDFOR
-	</operation>
-	<instruction name='paddusb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_paddusw'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
-ENDFOR
-	</operation>
-	<instruction name='paddusw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psubb'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := a[i+7:i] - b[i+7:i]
-ENDFOR
-	</operation>
-	<instruction name='psubb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psubw'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := a[i+15:i] - b[i+15:i]
-ENDFOR
-	</operation>
-	<instruction name='psubw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psubd'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	dst[i+31:i] := a[i+31:i] - b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='psubd' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psubsb'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])	
-ENDFOR
-	</operation>
-	<instruction name='psubsb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psubsw'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
-ENDFOR
-	</operation>
-	<instruction name='psubsw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psubusb'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])	
-ENDFOR
-	</operation>
-	<instruction name='psubusb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psubusw'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])	
-ENDFOR
-	</operation>
-	<instruction name='psubusw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pmaddwd'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	st[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
-ENDFOR
-	</operation>
-	<instruction name='pmaddwd' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pmulhw'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[31:16]
-ENDFOR
-	</operation>
-	<instruction name='pmulhw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pmullw'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[15:0]
-ENDFOR
-	</operation>
-	<instruction name='pmullw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psllw'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psllw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psllwi'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psllw' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pslld'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pslld' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pslldi'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pslld' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psllq'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift 64-bit integer "a" left by "count" while shifting in zeros, and store the result in "dst". </description>
-	<operation>
-IF count[63:0] &gt; 63
-	dst[63:0] := 0
-ELSE
-	dst[63:0] := ZeroExtend(a[63:0] &lt;&lt; count[63:0])
-FI
-	</operation>
-	<instruction name='psllq' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psllqi'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift 64-bit integer "a" left by "imm8" while shifting in zeros, and store the result in "dst". </description>
-	<operation>
-IF imm8[7:0] &gt; 63
-	dst[63:0] := 0
-ELSE
-	dst[63:0] := ZeroExtend(a[63:0] &lt;&lt; imm8[7:0])
-FI
-	</operation>
-	<instruction name='psllq' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psraw'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := SignBit
-	ELSE
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psraw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psrawi'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := SignBit
-	ELSE
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psraw' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psrad'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := SignBit
-	ELSE
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrad' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psradi'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := SignBit
-	ELSE
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrad' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psrlw'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrlw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psrlwi'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrlw' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psrld'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrld' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psrldi'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrld' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psrlq'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift 64-bit integer "a" right by "count" while shifting in zeros, and store the result in "dst". </description>
-	<operation>
-IF count[63:0] &gt; 63
-	dst[63:0] := 0
-ELSE
-	dst[63:0] := ZeroExtend(a[63:0] &gt;&gt; count[63:0])
-FI
-	</operation>
-	<instruction name='psrlq' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_psrlqi'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift 64-bit integer "a" right by "imm8" while shifting in zeros, and store the result in "dst". </description>
-	<operation>
-IF imm8[7:0] &gt; 63
-	dst[63:0] := 0
-ELSE
-	dst[63:0] := ZeroExtend(a[63:0] &gt;&gt; imm8[7:0])
-FI
-	</operation>
-	<instruction name='psrlq' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pand'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compute the bitwise AND of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] AND b[63:0])
-	</operation>
-	<instruction name='pand' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pandn'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compute the bitwise NOT of 64 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
-	<operation>
-dst[63:0] := ((NOT a[63:0]) AND b[63:0])
-	</operation>
-	<instruction name='pandn' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_por'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] OR b[63:0])
-	</operation>
-	<instruction name='por' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pxor'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] XOR b[63:0])
-	</operation>
-	<instruction name='pxor' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pcmpeqb'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpeqb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pcmpeqw'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpeqw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pcmpeqd'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpeqd' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pcmpgtb'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := ( a[i+7:i] &gt; b[i+7:i] ) ? 0xFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpgtb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pcmpgtw'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := ( a[i+15:i] &gt; b[i+15:i] ) ? 0xFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpgtw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_m_pcmpgtd'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xFFFFFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpgtd' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='void' name='_mm_empty'>
-	<CPUID>MMX</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures.</description>
-	<instruction name='emms' form=''/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_add_pi8'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := a[i+7:i] + b[i+7:i]
-ENDFOR
-	</operation>
-	<instruction name='paddb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_add_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := a[i+15:i] + b[i+15:i]
-ENDFOR
-	</operation>
-	<instruction name='paddw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_add_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	dst[i+31:i] := a[i+31:i] + b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='paddd' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_adds_pi8'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
-ENDFOR
-	</operation>
-	<instruction name='paddsb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_adds_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
-ENDFOR
-	</operation>
-	<instruction name='paddsw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_adds_pu8'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
-ENDFOR
-	</operation>
-	<instruction name='paddusb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_adds_pu16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
-ENDFOR
-	</operation>
-	<instruction name='paddusw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_sub_pi8'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := a[i+7:i] - b[i+7:i]
-ENDFOR
-	</operation>
-	<instruction name='psubb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_sub_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := a[i+15:i] - b[i+15:i]
-ENDFOR
-	</operation>
-	<instruction name='psubw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_sub_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	dst[i+31:i] := a[i+31:i] - b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='psubd' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_subs_pi8'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])	
-ENDFOR
-	</operation>
-	<instruction name='psubsb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_subs_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
-ENDFOR
-	</operation>
-	<instruction name='psubsw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_subs_pu8'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])	
-ENDFOR
-	</operation>
-	<instruction name='psubusb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_subs_pu16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])	
-ENDFOR
-	</operation>
-	<instruction name='psubusw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_madd_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	st[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
-ENDFOR
-	</operation>
-	<instruction name='pmaddwd' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_mulhi_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[31:16]
-ENDFOR
-	</operation>
-	<instruction name='pmulhw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_mullo_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[15:0]
-ENDFOR
-	</operation>
-	<instruction name='pmullw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_sll_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psllw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_slli_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psllw' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_sll_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pslld' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_slli_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pslld' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_sll_si64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift 64-bit integer "a" left by "count" while shifting in zeros, and store the result in "dst". </description>
-	<operation>
-IF count[63:0] &gt; 63
-	dst[63:0] := 0
-ELSE
-	dst[63:0] := ZeroExtend(a[63:0] &lt;&lt; count[63:0])
-FI
-	</operation>
-	<instruction name='psllq' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_slli_si64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift 64-bit integer "a" left by "imm8" while shifting in zeros, and store the result in "dst". </description>
-	<operation>
-IF imm8[7:0] &gt; 63
-	dst[63:0] := 0
-ELSE
-	dst[63:0] := ZeroExtend(a[63:0] &lt;&lt; imm8[7:0])
-FI
-	</operation>
-	<instruction name='psllq' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_sra_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := SignBit
-	ELSE
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psraw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_srai_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := SignBit
-	ELSE
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psraw' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_sra_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := SignBit
-	ELSE
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrad' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_srai_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := SignBit
-	ELSE
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrad' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_srl_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrlw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_srli_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrlw' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_srl_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrld' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_srli_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrld' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_srl_si64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='count' type='__m64'/>
-	<description>Shift 64-bit integer "a" right by "count" while shifting in zeros, and store the result in "dst". </description>
-	<operation>
-IF count[63:0] &gt; 63
-	dst[63:0] := 0
-ELSE
-	dst[63:0] := ZeroExtend(a[63:0] &gt;&gt; count[63:0])
-FI
-	</operation>
-	<instruction name='psrlq' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_srli_si64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift 64-bit integer "a" right by "imm8" while shifting in zeros, and store the result in "dst". </description>
-	<operation>
-IF imm8[7:0] &gt; 63
-	dst[63:0] := 0
-ELSE
-	dst[63:0] := ZeroExtend(a[63:0] &gt;&gt; imm8[7:0])
-FI
-	</operation>
-	<instruction name='psrlq' form='mm, imm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_and_si64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compute the bitwise AND of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] AND b[63:0])
-	</operation>
-	<instruction name='pand' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_andnot_si64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compute the bitwise NOT of 64 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
-	<operation>
-dst[63:0] := ((NOT a[63:0]) AND b[63:0])
-	</operation>
-	<instruction name='pandn' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_or_si64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] OR b[63:0])
-	</operation>
-	<instruction name='por' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_xor_si64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compute the bitwise XOR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] XOR b[63:0])
-	</operation>
-	<instruction name='pxor' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_cmpeq_pi8'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpeqb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_cmpeq_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpeqw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_cmpeq_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpeqd' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_cmpgt_pi8'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := ( a[i+7:i] &gt; b[i+7:i] ) ? 0xFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpgtb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_cmpgt_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := ( a[i+15:i] &gt; b[i+15:i] ) ? 0xFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpgtw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_cmpgt_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xFFFFFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpgtd' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_cvtsi32_si64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='int'/>
-	<description>Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper element of "dst".</description>
-	<operation>
-dst[31:0] := a[31:0]
-dst[63:32] := 0
-	</operation>
-	<instruction name='movd' form='mm, r32'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='int' name='_mm_cvtsi64_si32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m64'/>
-	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
-	<operation>
-dst[31:0] := a[31:0]
-	</operation>
-	<instruction name='movd' form='r32, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__int64' name='_mm_cvtm64_si64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m64'/>
-	<description>Copy 64-bit integer "a" to "dst".</description>
-	<operation>
-dst[63:0] := a[63:0]
-	</operation>
-	<instruction name='movq' form='r64, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_cvtsi64_m64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__int64'/>
-	<description>Copy 64-bit integer "a" to "dst".</description>
-	<operation>
-dst[63:0] := a[63:0]
-	</operation>
-	<instruction name='movq' form='mm, r64'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_setzero_si64'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Set</category>
-	<parameter varname='' type='void'/>
-	<description>Return vector of type __m64 with all elements set to zero.</description>
-	<operation>
-dst[MAX:0] := 0
-	</operation>
-	<instruction name='pxor' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_set_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Set</category>
-	<parameter varname='e1' type='int'/>
-	<parameter varname='e0' type='int'/>
-	<description>Set packed 32-bit integers in "dst" with the supplied values.</description>
-	<operation>
-dst[31:0] := e0
-dst[63:32] := e1
-	</operation>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_set_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Set</category>
-	<parameter varname='e3' type='short'/>
-	<parameter varname='e2' type='short'/>
-	<parameter varname='e1' type='short'/>
-	<parameter varname='e0' type='short'/>
-	<description>Set packed 16-bit integers in "dst" with the supplied values.</description>
-	<operation>
-dst[15:0] := e0
-dst[31:16] := e1
-dst[47:32] := e2
-dst[63:48] := e3
-	</operation>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_set_pi8'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Set</category>
-	<parameter varname='e7' type='char'/>
-	<parameter varname='e6' type='char'/>
-	<parameter varname='e5' type='char'/>
-	<parameter varname='e4' type='char'/>
-	<parameter varname='e3' type='char'/>
-	<parameter varname='e2' type='char'/>
-	<parameter varname='e1' type='char'/>
-	<parameter varname='e0' type='char'/>
-	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[7:0] := e0
-dst[15:8] := e1
-dst[23:16] := e2
-dst[31:24] := e3
-dst[39:32] := e4
-dst[47:40] := e5
-dst[55:48] := e6
-dst[63:56] := e7
-	</operation>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_set1_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='int'/>
-	<description>Broadcast 32-bit integer "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-	</operation>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_set1_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='short'/>
-	<description>Broadcast 16-bit integer "a" to all all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := a[15:0]
-ENDFOR
-	</operation>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_set1_pi8'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='char'/>
-	<description>Broadcast 8-bit integer "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := a[7:0]
-ENDFOR
-	</operation>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_setr_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Set</category>
-	<parameter varname='e1' type='int'/>
-	<parameter varname='e0' type='int'/>
-	<description>Set packed 32-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[31:0] := e1
-dst[63:32] := e0
-	</operation>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_setr_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Set</category>
-	<parameter varname='e3' type='short'/>
-	<parameter varname='e2' type='short'/>
-	<parameter varname='e1' type='short'/>
-	<parameter varname='e0' type='short'/>
-	<description>Set packed 16-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[15:0] := e3
-dst[31:16] := e2
-dst[47:32] := e1
-dst[63:48] := e0
-	</operation>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' sequence='true' rettype='__m64' name='_mm_setr_pi8'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Set</category>
-	<parameter varname='e7' type='char'/>
-	<parameter varname='e6' type='char'/>
-	<parameter varname='e5' type='char'/>
-	<parameter varname='e4' type='char'/>
-	<parameter varname='e3' type='char'/>
-	<parameter varname='e2' type='char'/>
-	<parameter varname='e1' type='char'/>
-	<parameter varname='e0' type='char'/>
-	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[7:0] := e7
-dst[15:8] := e6
-dst[23:16] := e5
-dst[31:24] := e4
-dst[39:32] := e3
-dst[47:40] := e2
-dst[55:48] := e1
-dst[63:56] := e0
-	</operation>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_packs_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".
-	</description>
-	<operation>
-dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
-dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
-dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
-dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
-dst[39:32] := Saturate_Int16_To_Int8 (b[15:0])
-dst[47:40] := Saturate_Int16_To_Int8 (b[31:16])
-dst[55:48] := Saturate_Int16_To_Int8 (b[47:32])
-dst[63:56] := Saturate_Int16_To_Int8 (b[63:48])
-	</operation>
-	<instruction name='packsswb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_packs_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
-	<operation>
-dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
-dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
-dst[47:32] := Saturate_Int32_To_Int16 (b[31:0])
-dst[63:48] := Saturate_Int32_To_Int16 (b[63:32])
-	</operation>
-	<instruction name='packssdw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_packs_pu16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
-	<operation>
-dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
-dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
-dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
-dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
-dst[39:32] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
-dst[47:40] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
-dst[55:48] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
-dst[63:56] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
-	</operation>
-	<instruction name='packuswb' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_unpackhi_pi8'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]){
-	dst[7:0] := src1[39:32]
-	dst[15:8] := src2[39:32] 
-	dst[23:16] := src1[47:40]
-	dst[31:24] := src2[47:40]
-	dst[39:32] := src1[55:48]
-	dst[47:40] := src2[55:48]
-	dst[55:48] := src1[63:56]
-	dst[63:56] := src2[63:56]
-	RETURN dst[63:0]
-}	
-
-dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0])
-	</operation>
-	<instruction name='punpckhbw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_unpackhi_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]){
-	dst[15:0] := src1[47:32]
-	dst[31:16] := src2[47:32]
-	dst[47:32] := src1[63:48]
-	dst[63:48] := src2[63:48]
-	RETURN dst[63:0]
-}
-
-dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0])
-	</operation>
-	<instruction name='punpcklbw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_unpackhi_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
-	<operation>
-dst[31:0] := a[63:32]
-dst[63:32] := b[63:32]
-	</operation>
-	<instruction name='punpckhdq' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_unpacklo_pi8'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_BYTES(src1[63:0], src2[63:0]){
-	dst[7:0] := src1[7:0] 
-	dst[15:8] := src2[7:0] 
-	dst[23:16] := src1[15:8] 
-	dst[31:24] := src2[15:8] 
-	dst[39:32] := src1[23:16] 
-	dst[47:40] := src2[23:16] 
-	dst[55:48] := src1[31:24] 
-	dst[63:56] := src2[31:24] 
-	RETURN dst[63:0]
-}	
-
-dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0])
-	</operation>
-	<instruction name='punpcklbw' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_unpacklo_pi16'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_WORDS(src1[63:0], src2[63:0]){
-	dst[15:0] := src1[15:0] 
-	dst[31:16] := src2[15:0] 
-	dst[47:32] := src1[31:16] 
-	dst[63:48] := src2[31:16] 
-	RETURN dst[63:0]
-}	
-
-dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0])
-	</operation>
-	<instruction name='punpcklwd' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-<intrinsic tech='MMX' rettype='__m64' name='_mm_unpacklo_pi32'>
-	<type>Integer</type>
-	<CPUID>MMX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-dst[31:0] := a[31:0]
-dst[63:32] := b[31:0]
-	</operation>
-	<instruction name='punpckldq' form='mm, mm'/>
-	<header>mmintrin.h</header>
-</intrinsic>
-
-<intrinsic tech='SSE' sequence='true' rettype='' name='_MM_TRANSPOSE4_PS'>
-	<CPUID>SSE</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='row0' type='__m128' />
-	<parameter varname='row1' type='__m128' />
-	<parameter varname='row2' type='__m128' />
-	<parameter varname='row3' type='__m128' />
-	<description>Macro: Transpose the 4x4 matrix formed by the 4 rows of single-precision (32-bit) floating-point elements in "row0", "row1", "row2", and "row3", and store the transposed matrix in these vectors ("row0" now contains column 0, etc.).</description>
-	<operation>
-__m128 tmp3, tmp2, tmp1, tmp0;
-tmp0 = _mm_unpacklo_ps(row0, row1);
-tmp2 = _mm_unpacklo_ps(row2, row3);
-tmp1 = _mm_unpackhi_ps(row0, row1);
-tmp3 = _mm_unpackhi_ps(row2, row3);
-row0 = _mm_movelh_ps(tmp0, tmp2);
-row1 = _mm_movehl_ps(tmp2, tmp0);
-row2 = _mm_movelh_ps(tmp1, tmp3);
-row3 = _mm_movehl_ps(tmp3, tmp1);
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='unsigned int' name='_mm_getcsr'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void' />
-	<description>Get the unsigned 32-bit value of the MXCSR control and status register.</description>
-	<operation>
-dst[31:0] := MXCSR
-	</operation>
-	<instruction name='stmxcsr' form='MEMd'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='void' name='_mm_setcsr'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<parameter varname='a' type='unsigned int' />
-	<description>Set the MXCSR control and status register with the value in unsigned 32-bit integer "a".</description>
-	<operation>
-MXCSR := a[31:0]
-	</operation>
-	<instruction name='ldmxcsr' form='MEMd'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='unsigned int' name='_MM_GET_EXCEPTION_STATE'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<description>Macro: Get the exception state bits from the MXCSR control and status register. The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT</description>
-	<operation>
-dst[31:0] := MXCSR &amp; _MM_EXCEPT_MASK
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='void' name='_MM_SET_EXCEPTION_STATE'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<parameter varname='a' type='unsigned int' />
-	<description>Macro: Set the exception state bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT</description>
-	<operation>
-MXCSR := a[31:0] AND ~_MM_EXCEPT_MASK
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='unsigned int' name='_MM_GET_EXCEPTION_MASK'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<description>Macro: Get the exception mask bits from the MXCSR control and status register. The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT</description>
-	<operation>
-dst[31:0] := MXCSR &amp; _MM_MASK_MASK
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='void' name='_MM_SET_EXCEPTION_MASK'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<parameter varname='a' type='unsigned int' />
-	<description>Macro: Set the exception mask bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT</description>
-	<operation>
-MXCSR := a[31:0] AND ~_MM_MASK_MASK
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='unsigned int' name='_MM_GET_ROUNDING_MODE'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<description>Macro: Get the rounding mode bits from the MXCSR control and status register. The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO</description>
-	<operation>
-dst[31:0] := MXCSR &amp; _MM_ROUND_MASK
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='void' name='_MM_SET_ROUNDING_MODE'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<parameter varname='a' type='unsigned int' />
-	<description>Macro: Set the rounding mode bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO</description>
-	<operation>
-MXCSR := a[31:0] AND ~_MM_ROUND_MASK
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='unsigned int' name='_MM_GET_FLUSH_ZERO_MODE'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<description>Macro: Get the flush zero bits from the MXCSR control and status register. The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF</description>
-	<operation>
-dst[31:0] := MXCSR &amp; _MM_FLUSH_MASK
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='void' name='_MM_SET_FLUSH_ZERO_MODE'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<parameter varname='a' type='unsigned int' />
-	<description>Macro: Set the flush zero bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF</description>
-	<operation>
-MXCSR := a[31:0] AND ~_MM_FLUSH_MASK
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='void' name='_mm_prefetch'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<parameter varname='p' type='char const*' />
-	<parameter varname='i' type='int' />
-	<description>Fetch the line of data from memory that contains address "p" to a location in the cache heirarchy specified by the locality hint "i".</description>
-	<instruction name='prefetchnta' form='mprefetch'/>
-	<instruction name='prefetcht0' form='mprefetch'/>
-	<instruction name='prefetcht1' form='mprefetch'/>
-	<instruction name='prefetcht2' form='mprefetch'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm_prefetch">
-	<CPUID>KNCNI</CPUID>
-	<category>General Support</category>
-	<parameter varname='p' type='char const*' />
-	<parameter varname='i' type='int' />
-	<description>Fetch the line of data from memory that contains address "p" to a location in the cache heirarchy specified by the locality hint "i".</description>
-	<instruction name='vprefetch0' form='mprefetch' xed=''/>
-	<instruction name='vprefetch1' form='mprefetch' xed=''/>
-	<instruction name='vprefetch2' form='mprefetch' xed=''/>
-	<instruction name='vprefetchnta' form='mprefetch' xed=''/>
-	<instruction name='vprefetche0' form='mprefetch' xed=''/>
-	<instruction name='vprefetche1' form='mprefetch' xed=''/>
-	<instruction name='vprefetche2' form='mprefetch' xed=''/>
-	<instruction name='vprefetchenta' form='mprefetch' xed=''/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="void" name="_mm_prefetch">
-	<CPUID>PREFETCHWT1</CPUID>
-	<category>General Support</category>
-	<parameter varname='p' type='char const*' />
-	<parameter varname='i' type='int' />
-	<description>Fetch the line of data from memory that contains address "p" to a location in the cache heirarchy specified by the locality hint "i".</description>
-	<instruction name='prefetchwt1' form='mprefetch' xed=''/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='void' name='_mm_sfence'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void' />
-	<description>Perform a serializing operation on all store-to-memory instructions that were issued prior to this instruction. Guarantees that every store instruction that precedes, in program order, is globally visible before any store instruction which follows the fence in program order.</description>
-	<instruction name='sfence' form=''/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_max_pi16'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF a[i+15:i] &gt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pmaxsw' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_m_pmaxsw'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF a[i+15:i] &gt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pmaxsw' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_max_pu8'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	IF a[i+7:i] &gt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pmaxub' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_m_pmaxub'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	IF a[i+7:i] &gt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pmaxub' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_min_pi16'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF a[i+15:i] &lt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pminsw' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_m_pminsw'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF a[i+15:i] &lt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pminsw' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_min_pu8'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	IF a[i+7:i] &lt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pminub' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_m_pminub'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	IF a[i+7:i] &lt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pminub' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_mulhi_pu16'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[31:16]
-ENDFOR
-	</operation>
-	<instruction name='pmulhuw' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_m_pmulhuw'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[31:16]
-ENDFOR
-	</operation>
-	<instruction name='pmulhuw' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_avg_pu8'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
-ENDFOR
-	</operation>
-	<instruction name='pavgb' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_m_pavgb'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
-ENDFOR
-	</operation>
-	<instruction name='pavgb' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_avg_pu16'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
-ENDFOR
-	</operation>
-	<instruction name='pavgw' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_m_pavgw'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
-ENDFOR
-	</operation>
-	<instruction name='pavgw' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_sad_pu8'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
-ENDFOR
-
-dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + tmp[47:40] + tmp[55:48] + tmp[63:56]
-dst[63:16] := 0
-	</operation>
-	<instruction name='psadbw' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_m_psadbw'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
-ENDFOR
-
-dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + tmp[47:40] + tmp[55:48] + tmp[63:56]
-dst[63:16] := 0
-	</operation>
-	<instruction name='psadbw' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cvtsi32_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='int' />
-	<description>Convert the 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := Convert_Int32_To_FP32(b[31:0])
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cvtsi2ss' form='xmm, r32'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cvt_si2ss'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='int' />
-	<description>Convert the 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := Convert_Int32_To_FP32(b[31:0])
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cvtsi2ss' form='xmm, r32'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m128' name='_mm_cvtsi64_ss'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__int64' />
-	<description>Convert the 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := Convert_Int64_To_FP32(b[63:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='cvtsi2ss' form='xmm, r64'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m128' name='_mm_cvtpi32_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m64' />
-	<description>Convert packed 32-bit integers in "b" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", and copy the upper 2 packed elements from "a" to the upper elements of "dst". </description>
-	<operation>
-dst[31:0] := Convert_Int32_To_FP32(b[31:0])
-dst[63:32] := Convert_Int32_To_FP32(b[63:32])
-dst[95:64] := a[95:64]
-dst[127:96] := a[127:96]
-	</operation>
-	<instruction name='cvtpi2ps' form='xmm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m128' name='_mm_cvt_pi2ps'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m64' />
-	<description>Convert packed 32-bit integers in "b" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", and copy the upper 2 packed elements from "a" to the upper elements of "dst". </description>
-	<operation>
-dst[31:0] := Convert_Int32_To_FP32(b[31:0])
-dst[63:32] := Convert_Int32_To_FP32(b[63:32])
-dst[95:64] := a[95:64]
-dst[127:96] := a[127:96]
-	</operation>
-	<instruction name='cvtpi2ps' form='xmm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_cvtpi16_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m64' />
-	<description>Convert packed 16-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	m := j*32
-	dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i])
-ENDFOR
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_cvtpu16_ps'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m64' />
-	<description>Convert packed unsigned 16-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	m := j*32
-	dst[m+31:m] := Convert_UnsignedInt16_To_FP32(a[i+15:i])
-ENDFOR
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_cvtpi8_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m64' />
-	<description>Convert the lower packed 8-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*8
-	m := j*32
-	dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i])
-ENDFOR
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_cvtpu8_ps'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m64' />
-	<description>Convert the lower packed unsigned 8-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*8
-	m := j*32
-	dst[m+31:m] := Convert_UnsignedInt8_To_FP32(a[i+7:i])
-ENDFOR
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_cvtpi32x2_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='b' type='__m64' />
-	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", then covert the packed 32-bit integers in "a" to single-precision (32-bit) floating-point element, and store the results in the upper 2 elements of "dst". </description>
-	<operation>
-dst[31:0] := Convert_Int32_To_FP32(a[31:0])
-dst[63:32] := Convert_Int32_To_FP32(a[63:32])
-dst[95:64] := Convert_Int32_To_FP32(b[31:0])
-dst[127:96] := Convert_Int32_To_FP32(b[63:32])
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='void' name='_mm_stream_pi'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='__m64*' />
-	<parameter varname='a' type='__m64' />
-	<description>Store 64-bits of integer data from "a" into memory using a non-temporal memory hint.</description>
-	<operation>
-MEM[mem_addr+63:mem_addr] := a[63:0]
-	</operation>
-	<instruction name='movntq' form='m64, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='void' name='_mm_maskmove_si64'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='mask' type='__m64' />
-	<parameter varname='mem_addr' type='char*' />
-	<description>Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	IF mask[i+7]
-		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='maskmovq' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='void' name='_m_maskmovq'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='mask' type='__m64' />
-	<parameter varname='mem_addr' type='char*' />
-	<description>Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	IF mask[i+7]
-		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='maskmovq' form='mm, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='int' name='_mm_extract_pi16'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname="imm8" type='int' />
-	<description>Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst".</description>
-	<operation>
-dst[15:0] := (a[63:0] &gt;&gt; (imm8[1:0] * 16))[15:0]
-dst[31:16] := 0
-	</operation>
-	<instruction name='pextrw' form='r32, mm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='int' name='_m_pextrw'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname="imm8" type='int' />
-	<description>Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst".</description>
-	<operation>
-dst[15:0] := (a[63:0] &gt;&gt; (imm8[1:0] * 16))[15:0]
-dst[31:16] := 0
-	</operation>
-	<instruction name='pextrw' form='r32, mm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_insert_pi16'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='i' type='int' />
-	<parameter varname="imm8" type='int' />
-	<description>Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". </description>
-	<operation>
-dst[63:0] := a[63:0]
-sel := imm8[1:0]*16
-dst[sel+15:sel] := i[15:0]
-	</operation>
-	<instruction name='pinsrw' form='xmm, r32, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_m_pinsrw'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname='i' type='int' />
-	<parameter varname="imm8" type='int' />
-	<description>Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". </description>
-	<operation>
-dst[63:0] := a[63:0]
-sel := imm8[1:0]*16
-dst[sel+15:sel] := i[15:0]
-	</operation>
-	<instruction name='pinsrw' form='mm, r32, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='int' name='_mm_movemask_pi8'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m64' />
-	<description>Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[j] := a[i+7]
-ENDFOR
-dst[MAX:8] := 0
-	</operation>
-	<instruction name='pmovmskb' form='r32, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='int' name='_m_pmovmskb'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m64' />
-	<description>Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[j] := a[i+7]
-ENDFOR
-dst[MAX:8] := 0
-	</operation>
-	<instruction name='pmovmskb' form='r32, mm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_shuffle_pi16'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname="imm8" type='int' />
-	<description>Shuffle 16-bit integers in "a" using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[15:0] := src[15:0]
-	1:	tmp[15:0] := src[31:16]
-	2:	tmp[15:0] := src[47:32]
-	3:	tmp[15:0] := src[63:48]
-	ESAC
-	RETURN tmp[15:0]
-}
-
-dst[15:0] := SELECT4(a[63:0], imm8[1:0])
-dst[31:16] := SELECT4(a[63:0], imm8[3:2])
-dst[47:32] := SELECT4(a[63:0], imm8[5:4])
-dst[63:48] := SELECT4(a[63:0], imm8[7:6])
-	</operation>
-	<instruction name='pshufw' form='mm, mm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_m_pshufw'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64' />
-	<parameter varname="imm8" type='int' />
-	<description>Shuffle 16-bit integers in "a" using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[15:0] := src[15:0]
-	1:	tmp[15:0] := src[31:16]
-	2:	tmp[15:0] := src[47:32]
-	3:	tmp[15:0] := src[63:48]
-	ESAC
-	RETURN tmp[15:0]
-}
-
-dst[15:0] := SELECT4(a[63:0], imm8[1:0])
-dst[31:16] := SELECT4(a[63:0], imm8[3:2])
-dst[47:32] := SELECT4(a[63:0], imm8[5:4])
-dst[63:48] := SELECT4(a[63:0], imm8[7:6])
-	</operation>
-	<instruction name='pshufw' form='mm, mm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_add_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". </description>
-	<operation>
-dst[31:0] := a[31:0] + b[31:0]
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='addss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_add_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := a[i+31:i] + b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='addps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_sub_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := a[31:0] - b[31:0]
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='subss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_sub_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := a[i+31:i] - b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='subps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_mul_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := a[31:0] * b[31:0]
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='mulss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_mul_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := a[i+31:i] * b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='mulps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_div_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". </description>
-	<operation>
-dst[31:0] := a[31:0] / b[31:0]
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='divss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_div_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := a[i+31:i] / b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='divps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_sqrt_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128' />
-	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := SQRT(a[31:0])
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='sqrtss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_sqrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128' />
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := SQRT(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='sqrtps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_rcp_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128' />
-	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
-	<operation>
-dst[31:0] := APPROXIMATE(1.0/a[31:0])
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='rcpss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_rcp_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128' />
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='rcpps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_rsqrt_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128' />
-	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
-	<operation>
-dst[31:0] := APPROXIMATE(1.0 / SQRT(a[31:0]))
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='rsqrtss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_rsqrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128' />
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
-ENDFOR
-	</operation>
-	<instruction name='rsqrtps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_min_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[31:0] := MIN(a[31:0], b[31:0])
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='minss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_min_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='minps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_max_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[31:0] := MAX(a[31:0], b[31:0])
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='maxss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_max_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='maxps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_and_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='andps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_andnot_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='andnps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_or_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='orps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_xor_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='xorps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpeq_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for equality, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := ( a[31:0] == b[31:0] ) ? 0xffffffff : 0
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cmpss' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpeq_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xffffffff : 0
-ENDFOR
-	</operation>
-	<instruction name='cmpps' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmplt_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for less-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := ( a[31:0] &lt; b[31:0] ) ? 0xffffffff : 0
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cmpss' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmplt_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] &lt; b[i+31:i] ) ? 0xffffffff : 0
-ENDFOR
-	</operation>
-	<instruction name='cmpps' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmple_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := ( a[31:0] &lt;= b[31:0] ) ? 0xffffffff : 0
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cmpss' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmple_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 0xffffffff : 0
-ENDFOR
-	</operation>
-	<instruction name='cmpps' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpgt_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for greater-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := ( a[31:0] &gt; b[31:0] ) ? 0xffffffff : 0
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cmpss' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpgt_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xffffffff : 0
-ENDFOR
-	</operation>
-	<instruction name='cmpps' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpge_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for greater-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := ( a[31:0] &gt;= b[31:0] ) ? 0xffffffff : 0
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cmpss' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpge_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 0xffffffff : 0
-ENDFOR
-	</operation>
-	<instruction name='cmpps' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpneq_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := ( a[31:0] != b[31:0] ) ? 0xffffffff : 0
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cmpss' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpneq_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] != b[i+31:i] ) ? 0xffffffff : 0
-ENDFOR
-	</operation>
-	<instruction name='cmpps' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpnlt_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := !( a[31:0] &lt; b[31:0] ) ? 0xffffffff : 0
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cmpss' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpnlt_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := !( a[i+31:i] &lt; b[i+31:i] ) ? 0xffffffff : 0
-ENDFOR
-	</operation>
-	<instruction name='cmpps' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpnle_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := !( a[31:0] &lt;= b[31:0] ) ? 0xffffffff : 0
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cmpss' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpnle_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := !( a[i+31:i] &lt;= b[i+31:i] ) ? 0xffffffff : 0
-ENDFOR
-	</operation>
-	<instruction name='cmpps' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpngt_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := !( a[31:0] &gt; b[31:0] ) ? 0xffffffff : 0
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cmpss' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpngt_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := !( a[i+31:i] &gt; b[i+31:i] ) ? 0xffffffff : 0
-ENDFOR
-	</operation>
-	<instruction name='cmpps' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpnge_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := !( a[31:0] &gt;= b[31:0] ) ? 0xffffffff : 0
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cmpss' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpnge_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := !( a[i+31:i] &gt;= b[i+31:i] ) ? 0xffffffff : 0
-ENDFOR
-	</operation>
-	<instruction name='cmpps' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpord_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := ( a[31:0] != NaN AND b[31:0] != NaN ) ? 0xffffffff : 0
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cmpss' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpord_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] != NaN AND b[i+31:i] != NaN ) ? 0xffffffff : 0
-ENDFOR
-	</operation>
-	<instruction name='cmpps' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpunord_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := ( a[31:0] == NaN OR b[31:0] == NaN ) ? 0xffffffff : 0
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='cmpss' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_cmpunord_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] == NaN OR b[i+31:i] == NaN ) ? 0xffffffff : 0
-ENDFOR
-	</operation>
-	<instruction name='cmpps' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='int' name='_mm_comieq_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1).</description>
-	<operation>
-RETURN ( a[31:0] == b[31:0] ) ? 1 : 0
-	</operation>
-	<instruction name='comiss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='int' name='_mm_comilt_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1).</description>
-	<operation>
-RETURN ( a[31:0] &lt; b[31:0] ) ? 1 : 0
-	</operation>
-	<instruction name='comiss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='int' name='_mm_comile_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1).</description>
-	<operation>
-RETURN ( a[31:0] &lt;= b[31:0] ) ? 1 : 0
-	</operation>
-	<instruction name='comiss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='int' name='_mm_comigt_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1).</description>
-	<operation>
-RETURN ( a[31:0] &gt; b[31:0] ) ? 1 : 0
-	</operation>
-	<instruction name='comiss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='int' name='_mm_comige_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1).</description>
-	<operation>
-RETURN ( a[31:0] &gt;= b[31:0] ) ? 1 : 0
-	</operation>
-	<instruction name='comiss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='int' name='_mm_comineq_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1).</description>
-	<operation>
-RETURN ( a[31:0] != b[31:0] ) ? 1 : 0
-	</operation>
-	<instruction name='comiss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomieq_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
-	<operation>
-RETURN ( a[31:0] == b[31:0] ) ? 1 : 0
-	</operation>
-	<instruction name='ucomiss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomilt_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
-	<operation>
-RETURN ( a[31:0] &lt; b[31:0] ) ? 1 : 0
-	</operation>
-	<instruction name='ucomiss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomile_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
-	<operation>
-RETURN ( a[31:0] &lt;= b[31:0] ) ? 1 : 0
-	</operation>
-	<instruction name='ucomiss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomigt_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
-	<operation>
-RETURN ( a[31:0] &gt; b[31:0] ) ? 1 : 0
-	</operation>
-	<instruction name='ucomiss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomige_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
-	<operation>
-RETURN ( a[31:0] &gt;= b[31:0] ) ? 1 : 0
-	</operation>
-	<instruction name='ucomiss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomineq_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
-	<operation>
-RETURN ( a[31:0] != b[31:0] ) ? 1 : 0
-	</operation>
-	<instruction name='ucomiss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvtss_si32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_Int32(a[31:0])
-	</operation>
-	<instruction name='cvtss2si' form='r32, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvt_ss2si'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_Int32(a[31:0])
-	</operation>
-	<instruction name='cvtss2si' form='r32, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__int64' name='_mm_cvtss_si64'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP32_To_Int64(a[31:0])
-	</operation>
-	<instruction name='cvtss2si' form='r64, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='float' name='_mm_cvtss_f32'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<description>Copy the lower single-precision (32-bit) floating-point element of "a" to "dst".</description>
-	<operation>dst[31:0] := a[31:0]</operation>
-	<instruction name='movss' form='m32, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_cvtps_pi32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='cvtps2pi' form='mm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_cvt_ps2pi'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='cvtps2pi' form='mm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvttss_si32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
-	</operation>
-	<instruction name='cvttss2si' form='r32, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvtt_ss2si'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
-	</operation>
-	<instruction name='cvttss2si' form='r32, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__int64' name='_mm_cvttss_si64'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_Int32_Truncate(a[31:0])
-	</operation>
-	<instruction name='cvttss2si' form='r64, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_cvttps_pi32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='cvttps2pi' form='mm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m64' name='_mm_cvtt_ps2pi'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='cvttps2pi' form='mm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m64' name='_mm_cvtps_pi16'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 16*j
-	k := 32*j
-	dst[i+15:i] := Convert_FP32_To_Int16(a[k+31:k])
-ENDFOR
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m64' name='_mm_cvtps_pi8'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128' />
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 8-bit integers, and store the results in lower 4 elements of "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 8*j
-	k := 32*j
-	dst[i+7:i] := Convert_FP32_To_Int8(a[k+31:k])
-ENDFOR
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_set_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='float' />
-	<description>Copy single-precision (32-bit) floating-point element "a" to the lower element of "dst", and zero the upper 3 elements.</description>
-	<operation>
-dst[31:0] := a[31:0]
-dst[127:32] := 0
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_set1_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='float' />
-	<description>Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_set_ps1'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='float' />
-	<description>Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_set_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Set</category>
-	<parameter varname='e3' type='float'/>
-	<parameter varname='e2' type='float'/>
-	<parameter varname='e1' type='float'/>
-	<parameter varname='e0' type='float'/>
-	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values.</description>
-	<operation>
-dst[31:0] := e0
-dst[63:32] := e1
-dst[95:64] := e2
-dst[127:96] := e3
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_setr_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Set</category>
-	<parameter varname='e3' type='float'/>
-	<parameter varname='e2' type='float'/>
-	<parameter varname='e1' type='float'/>
-	<parameter varname='e0' type='float'/>
-	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[31:0] := e3
-dst[63:32] := e2
-dst[95:64] := e1
-dst[127:96] := e0
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m128' name='_mm_setzero_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Set</category>
-	<parameter varname='' type='void' />
-	<description>Return vector of type __m128 with all elements set to zero.</description>
-	<operation>
-dst[MAX:0] := 0
-	</operation>
-	<instruction name='xorps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_loadh_pi'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Load</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='mem_addr' type='__m64 const*' />
-	<description>Load 2 single-precision (32-bit) floating-point elements from memory into the upper 2 elements of "dst", and copy the lower 2 elements from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[31:0] := a[31:0]
-dst[63:32] := a[63:32]
-dst[95:64] := MEM[mem_addr+31:mem_addr]
-dst[127:96] := MEM[mem_addr+63:mem_addr+32]
-	</operation>
-	<instruction name='movhps' form='xmm, m64'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_loadl_pi'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Load</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='mem_addr' type='__m64 const*' />
-	<description>Load 2 single-precision (32-bit) floating-point elements from memory into the lower 2 elements of "dst", and copy the upper 2 elements from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[31:0] := MEM[mem_addr+31:mem_addr]
-dst[63:32] := MEM[mem_addr+63:mem_addr+32]
-dst[95:64] := a[95:64]
-dst[127:96] := a[127:96]
-	</operation>
-	<instruction name='movlps' form='xmm, m64'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_load_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='float const*' />
-	<description>Load a single-precision (32-bit) floating-point element from memory into the lower of "dst", and zero the upper 3 elements. "mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[31:0] := MEM[mem_addr+31:mem_addr]
-dst[127:32] := 0
-	</operation>
-	<instruction name='movss' form='xmm, m32'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_load1_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='float const*' />
-	<description>Load a single-precision (32-bit) floating-point element from memory into all elements of "dst".</description>
-	<operation>
-dst[31:0] := MEM[mem_addr+31:mem_addr]
-dst[63:32] := MEM[mem_addr+31:mem_addr]
-dst[95:64] := MEM[mem_addr+31:mem_addr]
-dst[127:96] := MEM[mem_addr+31:mem_addr]
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_load_ps1'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='float const*' />
-	<description>Load a single-precision (32-bit) floating-point element from memory into all elements of "dst".</description>
-	<operation>
-dst[31:0] := MEM[mem_addr+31:mem_addr]
-dst[63:32] := MEM[mem_addr+31:mem_addr]
-dst[95:64] := MEM[mem_addr+31:mem_addr]
-dst[127:96] := MEM[mem_addr+31:mem_addr]
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_load_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='float const*' />
-	<description>Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into "dst".
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[127:0] := MEM[mem_addr+127:mem_addr]
-	</operation>
-	<instruction name='movaps' form='xmm, m128'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_loadu_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='float const*' />
-	<description>Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into "dst".
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[127:0] := MEM[mem_addr+127:mem_addr]
-	</operation>
-	<instruction name='movups' form='xmm, m128'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='__m128' name='_mm_loadr_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='float const*' />
-	<description>Load 4 single-precision (32-bit) floating-point elements from memory into "dst" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[31:0] := MEM[mem_addr+127:mem_addr+96]
-dst[63:32] := MEM[mem_addr+95:mem_addr+64]
-dst[95:64] := MEM[mem_addr+63:mem_addr+32]
-dst[127:96] := MEM[mem_addr+31:mem_addr]
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_stream_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='float*' />
-	<parameter varname='a' type='__m128' />
-		<description>Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint.
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+127:mem_addr] := a[127:0]
-	</operation>
-	<instruction name='movntps' form='m128, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storeh_pi'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='__m64*' />
-	<parameter varname='a' type='__m128' />
-	<description>Store the upper 2 single-precision (32-bit) floating-point elements from "a" into memory.</description>
-	<operation>
-MEM[mem_addr+31:mem_addr] := a[95:64]
-MEM[mem_addr+63:mem_addr+32] := a[127:96]
-	</operation>
-	<instruction name='movhps' form='m64, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storel_pi'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='__m64*' />
-	<parameter varname='a' type='__m128' />
-	<description>Store the lower 2 single-precision (32-bit) floating-point elements from "a" into memory.</description>
-	<operation>
-MEM[mem_addr+31:mem_addr] := a[31:0]
-MEM[mem_addr+63:mem_addr+32] := a[63:32]
-	</operation>
-	<instruction name='movlps' form='m64, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_store_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='float*' />
-	<parameter varname='a' type='__m128' />
-	<description>Store the lower single-precision (32-bit) floating-point element from "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[mem_addr+31:mem_addr] := a[31:0]
-	</operation>
-	<instruction name='movss' form='m32, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='void' name='_mm_store1_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='float*' />
-	<parameter varname='a' type='__m128' />
-	<description>Store the lower single-precision (32-bit) floating-point element from "a" into 4 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+31:mem_addr] := a[31:0]
-MEM[mem_addr+63:mem_addr+32] := a[31:0]
-MEM[mem_addr+95:mem_addr+64] := a[31:0]
-MEM[mem_addr+127:mem_addr+96] := a[31:0]
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='void' name='_mm_store_ps1'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='float*' />
-	<parameter varname='a' type='__m128' />
-	<description>Store the lower single-precision (32-bit) floating-point element from "a" into 4 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+31:mem_addr] := a[31:0]
-MEM[mem_addr+63:mem_addr+32] := a[31:0]
-MEM[mem_addr+95:mem_addr+64] := a[31:0]
-MEM[mem_addr+127:mem_addr+96] := a[31:0]
-	</operation>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='void' name='_mm_store_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='float*' />
-	<parameter varname='a' type='__m128' />
-	<description>Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory.
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+127:mem_addr] := a[127:0]
-	</operation>
-	<instruction name='movaps' form='m128, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storeu_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='float*' />
-	<parameter varname='a' type='__m128' />
-	<description>Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory.
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[mem_addr+127:mem_addr] := a[127:0]
-	</operation>
-	<instruction name='movups' form='m128, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' sequence='true' rettype='void' name='_mm_storer_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='float*' />
-	<parameter varname='a' type='__m128' />
-	<description>Store 4 single-precision (32-bit) floating-point elements from "a" into memory in reverse order.
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+31:mem_addr] := a[127:96]
-MEM[mem_addr+63:mem_addr+32] := a[95:64]
-MEM[mem_addr+95:mem_addr+64] := a[63:32]
-MEM[mem_addr+127:mem_addr+96] := a[31:0]
-	</operation>
-	<instruction name='movups' form='m128, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_move_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Move</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst", and copy the upper 3 elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := b[31:0]
-dst[63:32] := a[63:32]
-dst[95:64] := a[95:64]
-dst[127:96] := a[127:96]
-	</operation>
-	<instruction name='movss' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_shuffle_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<parameter varname="imm8" type='unsigned int' />
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-dst[95:64] := SELECT4(b[127:0], imm8[5:4])
-dst[127:96] := SELECT4(b[127:0], imm8[7:6])
-	</operation>
-	<instruction name='shufps' form='xmm, xmm, imm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_unpackhi_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-	</operation>
-	<instruction name='unpckhps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_unpacklo_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-	</operation>
-	<instruction name='unpcklps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_movehl_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Move</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Move the upper 2 single-precision (32-bit) floating-point elements from "b" to the lower 2 elements of "dst", and copy the upper 2 elements from "a" to the upper 2 elements of "dst".</description>
-	<operation>
-dst[31:0] := b[95:64]
-dst[63:32] := b[127:96]
-dst[95:64] := a[95:64]
-dst[127:96] := a[127:96]
-	</operation>
-	<instruction name='movhlps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' rettype='__m128' name='_mm_movelh_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Move</category>
-	<parameter varname='a' type='__m128' />
-	<parameter varname='b' type='__m128' />
-	<description>Move the lower 2 single-precision (32-bit) floating-point elements from "b" to the upper 2 elements of "dst", and copy the lower 2 elements from "a" to the lower 2 elements of "dst".</description>
-	<operation>
-dst[31:0] := a[31:0]
-dst[63:32] := a[63:32]
-dst[95:64] := b[31:0]
-dst[127:96] := b[63:32]
-	</operation>
-	<instruction name='movlhps' form='xmm, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_movemask_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128' />
-	<description>Set each bit of mask "dst" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in "a".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF a[i+31]
-		dst[j] := 1
-	ELSE
-		dst[j] := 0
-	FI
-ENDFOR
-dst[MAX:4] := 0
-	</operation>
-	<instruction name='movmskps' form='r32, xmm'/>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='void*' name='_mm_malloc'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<parameter varname='size' type='size_t'/>
-	<parameter varname='align' type='size_t'/>
-	<description>Allocate "size" bytes of memory, aligned to the alignment specified in "align", and return a pointer to the allocated memory. "_mm_free" should be used to free memory that is allocated with "_mm_malloc".</description>
-	<header>xmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='void' name='_mm_free'>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<description>Free aligned memory that was allocated with "_mm_malloc".</description>
-	<header>xmmintrin.h</header>
-</intrinsic>
-
-<intrinsic tech='SSE2' rettype='void' name='_mm_pause'>
-	<CPUID>SSE2</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Provide a hint to the processor that the code sequence is a spin-wait loop. This can help improve the performance and power consumption of spin-wait loops.</description>
-	<instruction name='pause' form=''/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='void' name='_mm_clflush'>
-	<CPUID>SSE2</CPUID>
-	<category>General Support</category>
-	<parameter varname='p' type='void const*'/>
-	<description>Invalidate and flush the cache line that contains "p" from all levels of the cache hierarchy.</description>
-	<instruction name='clflush' form='mprefetch'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='void' name='_mm_lfence'>
-	<CPUID>SSE2</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Perform a serializing operation on all load-from-memory instructions that were issued prior to this instruction. Guarantees that every load instruction that precedes, in program order, is globally visible before any load instruction which follows the fence in program order.</description>
-	<instruction name='lfence' form=''/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='void' name='_mm_mfence'>
-	<CPUID>SSE2</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Perform a serializing operation on all load-from-memory and store-to-memory instructions that were issued prior to this instruction. Guarantees that every memory access that precedes, in program order, the memory fence instruction is globally visible before any memory instruction which follows the fence in program order.</description>
-	<instruction name='mfence' form=''/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_add_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[i+7:i] := a[i+7:i] + b[i+7:i]
-ENDFOR
-	</operation>
-	<instruction name='paddb' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_add_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := a[i+15:i] + b[i+15:i]
-ENDFOR
-	</operation>
-	<instruction name='paddw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_add_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := a[i+31:i] + b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='paddd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m64' name='_mm_add_si64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Add 64-bit integers "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[63:0] := a[63:0] + b[63:0]
-	</operation>
-	<instruction name='paddq' form='mm, mm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_add_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[i+63:i] + b[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name='paddq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_adds_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
-ENDFOR
-	</operation>
-	<instruction name='paddsb' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_adds_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
-ENDFOR
-	</operation>
-	<instruction name='paddsw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_adds_epu8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
-ENDFOR
-	</operation>
-	<instruction name='paddusb' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_adds_epu16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
-ENDFOR
-	</operation>
-	<instruction name='paddusw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_avg_epu8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
-ENDFOR
-	</operation>
-	<instruction name='pavgb' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_avg_epu16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
-ENDFOR
-	</operation>
-	<instruction name='pavgw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_madd_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
-ENDFOR
-	</operation>
-	<instruction name='pmaddwd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_max_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF a[i+15:i] &gt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pmaxsw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_max_epu8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF a[i+7:i] &gt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pmaxub' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_min_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF a[i+15:i] &lt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pminsw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_min_epu8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF a[i+7:i] &lt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pminub' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_mulhi_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[31:16]
-ENDFOR
-	</operation>
-	<instruction name='pmulhw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_mulhi_epu16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[31:16]
-ENDFOR
-	</operation>
-	<instruction name='pmulhuw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_mullo_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[15:0]
-ENDFOR
-	</operation>
-	<instruction name='pmullw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m64' name='_mm_mul_su32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Multiply the low unsigned 32-bit integers from "a" and "b", and store the unsigned 64-bit result in "dst". </description>
-	<operation>
-dst[63:0] := a[31:0] * b[31:0]
-	</operation>
-	<instruction name='pmuludq' form='mm, mm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_mul_epu32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[i+31:i] * b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='pmuludq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sad_epu8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce two unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
-ENDFOR
-FOR j := 0 to 1
-	i := j*64
-	dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] +
-	               tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56]
-	dst[i+63:i+16] := 0
-ENDFOR
-	</operation>
-	<instruction name='psadbw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sub_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[i+7:i] := a[i+7:i] - b[i+7:i]
-ENDFOR
-	</operation>
-	<instruction name='psubb' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sub_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := a[i+15:i] - b[i+15:i]
-ENDFOR
-	</operation>
-	<instruction name='psubw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sub_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := a[i+31:i] - b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='psubd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m64' name='_mm_sub_si64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Subtract 64-bit integer "b" from 64-bit integer "a", and store the result in "dst".</description>
-	<operation>
-dst[63:0] := a[63:0] - b[63:0]
-	</operation>
-	<instruction name='psubq' form='mm, mm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sub_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[i+63:i] - b[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name='psubq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_subs_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])	
-ENDFOR
-	</operation>
-	<instruction name='psubsb' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_subs_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
-ENDFOR
-	</operation>
-	<instruction name='psubsw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_subs_epu8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])	
-ENDFOR
-	</operation>
-	<instruction name='psubusb' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_subs_epu16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])	
-ENDFOR
-	</operation>
-	<instruction name='psubusw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_slli_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-tmp := imm8[7:0]
-IF tmp &gt; 15
-	tmp := 16
-FI
-dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
-	</operation>
-	<instruction name='pslldq' form='xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_bslli_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-tmp := imm8[7:0]
-IF tmp &gt; 15
-	tmp := 16
-FI
-dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
-	</operation>
-	<instruction name='pslldq' form='xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_bsrli_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-tmp := imm8[7:0]
-IF tmp &gt; 15
-	tmp := 16
-FI
-dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
-	</operation>
-	<instruction name='psrldq' form='xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_slli_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psllw' form='xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sll_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psllw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_slli_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pslld' form='xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sll_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pslld' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_slli_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF imm8[7:0] &gt; 63
-		dst[i+63:i] := 0
-	ELSE
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psllq' form='xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sll_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF count[63:0] &gt; 63
-		dst[i+63:i] := 0
-	ELSE
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psllq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srai_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := SignBit
-	ELSE
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psraw' form='xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sra_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := SignBit
-	ELSE
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psraw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srai_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := SignBit
-	ELSE
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrad' form='xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_sra_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := SignBit
-	ELSE
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrad' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srli_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-tmp := imm8[7:0]
-IF tmp &gt; 15
-	tmp := 16
-FI
-dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
-	</operation>
-	<instruction name='psrldq' form='xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srli_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrlw' form='xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srl_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrlw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srli_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrld' form='xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srl_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrld' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srli_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF imm8[7:0] &gt; 63
-		dst[i+63:i] := 0
-	ELSE
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrlq' form='xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_srl_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF count[63:0] &gt; 63
-		dst[i+63:i] := 0
-	ELSE
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psrlq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_and_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[127:0] := (a[127:0] AND b[127:0])
-	</operation>
-	<instruction name='pand' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_andnot_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compute the bitwise NOT of 128 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
-	<operation>
-dst[127:0] := ((NOT a[127:0]) AND b[127:0])
-	</operation>
-	<instruction name='pandn' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_or_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compute the bitwise OR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[127:0] := (a[127:0] OR b[127:0])
-	</operation>
-	<instruction name='por' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_xor_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compute the bitwise XOR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[127:0] := (a[127:0] XOR b[127:0])
-	</operation>
-	<instruction name='pxor' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpeq_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpeqb' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpeq_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpeqw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpeq_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpeqd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpgt_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[i+7:i] := ( a[i+7:i] &gt; b[i+7:i] ) ? 0xFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpgtb' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpgt_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := ( a[i+15:i] &gt; b[i+15:i] ) ? 0xFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpgtw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpgt_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xFFFFFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpgtd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmplt_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtb instruction with the order of the operands switched.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[i+7:i] := ( a[i+7:i] &lt; b[i+7:i] ) ? 0xFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpgtb' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmplt_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtw instruction with the order of the operands switched.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := ( a[i+15:i] &lt; b[i+15:i] ) ? 0xFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpgtw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cmplt_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtd instruction with the order of the operands switched.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] &lt; b[i+31:i] ) ? 0xFFFFFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpgtd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cvtepi32_pd'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	m := j*64
-	dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='cvtdq2pd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cvtsi32_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='int'/>
-	<description>Convert the 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
-	<operation>
-dst[63:0] := Convert_Int32_To_FP64(b[31:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='cvtsi2sd' form='xmm, r32'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cvtsi64_sd'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__int64'/>
-	<description>Convert the 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
-	<operation>
-dst[63:0] := Convert_Int64_To_FP64(b[63:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='cvtsi2sd' form='xmm, r64'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cvtsi64x_sd'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__int64'/>
-	<description>Convert the 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
-	<operation>
-dst[63:0] := Convert_Int64_To_FP64(b[63:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='cvtsi2sd' form='xmm, r64'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128' name='_mm_cvtepi32_ps'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='cvtdq2ps' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m128d' name='_mm_cvtpi32_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m64'/>
-	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	m := j*64
-	dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='cvtpi2pd' form='xmm, mm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvtsi32_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='int'/>
-	<description>Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := a[31:0]
-dst[127:32] := 0
-	</operation>
-	<instruction name='movd' form='xmm, r32'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvtsi64_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__int64'/>
-	<description>Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element.</description>
-	<operation>
-dst[63:0] := a[63:0]
-dst[127:64] := 0
-	</operation>
-	<instruction name='movq' form='xmm, r64'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvtsi64x_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__int64'/>
-	<description>Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element.</description>
-	<operation>
-dst[63:0] := a[63:0]
-dst[127:64] := 0
-	</operation>
-	<instruction name='movq' form='xmm, r64'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvtsi128_si32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
-	<operation>
-dst[31:0] := a[31:0]
-	</operation>
-	<instruction name='movd' form='r32, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_cvtsi128_si64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Copy the lower 64-bit integer in "a" to "dst".</description>
-	<operation>
-dst[63:0] := a[63:0]
-	</operation>
-	<instruction name='movq' form='r64, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_cvtsi128_si64x'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Copy the lower 64-bit integer in "a" to "dst".</description>
-	<operation>
-dst[63:0] := a[63:0]
-	</operation>
-	<instruction name='movq' form='r64, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='e1' type='__m64'/>
-	<parameter varname='e0' type='__m64'/>
-	<description>Set packed 64-bit integers in "dst" with the supplied values.</description>
-	<operation>
-dst[63:0] := e0
-dst[127:64] := e1
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set_epi64x'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='e1' type='__int64'/>
-	<parameter varname='e0' type='__int64'/>
-	<description>Set packed 64-bit integers in "dst" with the supplied values.</description>
-	<operation>
-dst[63:0] := e0
-dst[127:64] := e1
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='e3' type='int'/>
-	<parameter varname='e2' type='int'/>
-	<parameter varname='e1' type='int'/>
-	<parameter varname='e0' type='int'/>
-	<description>Set packed 32-bit integers in "dst" with the supplied values.</description>
-	<operation>
-dst[31:0] := e0
-dst[63:32] := e1
-dst[95:64] := e2
-dst[127:96] := e3
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='e7' type='short'/>
-	<parameter varname='e6' type='short'/>
-	<parameter varname='e5' type='short'/>
-	<parameter varname='e4' type='short'/>
-	<parameter varname='e3' type='short'/>
-	<parameter varname='e2' type='short'/>
-	<parameter varname='e1' type='short'/>
-	<parameter varname='e0' type='short'/>
-	<description>Set packed 16-bit integers in "dst" with the supplied values.</description>
-	<operation>
-dst[15:0] := e0
-dst[31:16] := e1
-dst[47:32] := e2
-dst[63:48] := e3
-dst[79:64] := e4
-dst[95:80] := e5
-dst[111:96] := e6
-dst[127:112] := e7
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='e15' type='char'/>
-	<parameter varname='e14' type='char'/>
-	<parameter varname='e13' type='char'/>
-	<parameter varname='e12' type='char'/>
-	<parameter varname='e11' type='char'/>
-	<parameter varname='e10' type='char'/>
-	<parameter varname='e9' type='char'/>
-	<parameter varname='e8' type='char'/>
-	<parameter varname='e7' type='char'/>
-	<parameter varname='e6' type='char'/>
-	<parameter varname='e5' type='char'/>
-	<parameter varname='e4' type='char'/>
-	<parameter varname='e3' type='char'/>
-	<parameter varname='e2' type='char'/>
-	<parameter varname='e1' type='char'/>
-	<parameter varname='e0' type='char'/>
-	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[7:0] := e0
-dst[15:8] := e1
-dst[23:16] := e2
-dst[31:24] := e3
-dst[39:32] := e4
-dst[47:40] := e5
-dst[55:48] := e6
-dst[63:56] := e7
-dst[71:64] := e8
-dst[79:72] := e9
-dst[87:80] := e10
-dst[95:88] := e11
-dst[103:96] := e12
-dst[111:104] := e13
-dst[119:112] := e14
-dst[127:120] := e15
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set1_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='__m64'/>
-	<description>Broadcast 64-bit integer "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set1_epi64x'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='__int64'/>
-	<description>Broadcast 64-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastq".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set1_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='int'/>
-	<description>Broadcast 32-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastd".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set1_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='short'/>
-	<description>Broadcast 16-bit integer "a" to all all elements of "dst". This intrinsic may generate "vpbroadcastw".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := a[15:0]
-ENDFOR
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_set1_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='char'/>
-	<description>Broadcast 8-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastb".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[i+7:i] := a[7:0]
-ENDFOR
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_setr_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='e1' type='__m64'/>
-	<parameter varname='e0' type='__m64'/>
-	<description>Set packed 64-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[63:0] := e1
-dst[127:64] := e0
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_setr_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='e3' type='int'/>
-	<parameter varname='e2' type='int'/>
-	<parameter varname='e1' type='int'/>
-	<parameter varname='e0' type='int'/>
-	<description>Set packed 32-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[31:0] := e3
-dst[63:32] := e2
-dst[95:64] := e1
-dst[127:96] := e0
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_setr_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='e7' type='short'/>
-	<parameter varname='e6' type='short'/>
-	<parameter varname='e5' type='short'/>
-	<parameter varname='e4' type='short'/>
-	<parameter varname='e3' type='short'/>
-	<parameter varname='e2' type='short'/>
-	<parameter varname='e1' type='short'/>
-	<parameter varname='e0' type='short'/>
-	<description>Set packed 16-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[15:0] := e7
-dst[31:16] := e6
-dst[47:32] := e5
-dst[63:48] := e4
-dst[79:64] := e3
-dst[95:80] := e2
-dst[111:96] := e1
-dst[127:112] := e0
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128i' name='_mm_setr_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='e15' type='char'/>
-	<parameter varname='e14' type='char'/>
-	<parameter varname='e13' type='char'/>
-	<parameter varname='e12' type='char'/>
-	<parameter varname='e11' type='char'/>
-	<parameter varname='e10' type='char'/>
-	<parameter varname='e9' type='char'/>
-	<parameter varname='e8' type='char'/>
-	<parameter varname='e7' type='char'/>
-	<parameter varname='e6' type='char'/>
-	<parameter varname='e5' type='char'/>
-	<parameter varname='e4' type='char'/>
-	<parameter varname='e3' type='char'/>
-	<parameter varname='e2' type='char'/>
-	<parameter varname='e1' type='char'/>
-	<parameter varname='e0' type='char'/>
-	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[7:0] := e15
-dst[15:8] := e14
-dst[23:16] := e13
-dst[31:24] := e12
-dst[39:32] := e11
-dst[47:40] := e10
-dst[55:48] := e9
-dst[63:56] := e8
-dst[71:64] := e7
-dst[79:72] := e6
-dst[87:80] := e5
-dst[95:88] := e4
-dst[103:96] := e3
-dst[111:104] := e2
-dst[119:112] := e1
-dst[127:120] := e0
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m128i' name='_mm_setzero_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<description>Return vector of type __m128i with all elements set to zero.</description>
-	<operation>
-dst[MAX:0] := 0
-	</operation>
-	<instruction name='pxor' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m128i' name='_mm_loadl_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='__m128i const*'/>
-	<description>Load 64-bit integer from memory into the first element of "dst".</description>
-	<operation>
-dst[63:0] := MEM[mem_addr+63:mem_addr]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='movq' form='xmm, m64'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_load_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='__m128i const*'/>
-	<description>Load 128-bits of integer data from memory into "dst". 
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[127:0] := MEM[mem_addr+127:mem_addr]
-	</operation>
-	<instruction name='movdqa' form='xmm, m128'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_loadu_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='__m128i const*'/>
-	<description>Load 128-bits of integer data from memory into "dst".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-dst[127:0] := MEM[mem_addr+127:mem_addr]
-	</operation>
-	<instruction name='movdqu' form='xmm, m128'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='void' name='_mm_maskmoveu_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='mask' type='__m128i'/>
-	<parameter varname='mem_addr' type='char*'/>
-	<description>Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint. "mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF mask[i+7]
-		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='maskmovdqu' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_store_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='__m128i*'/>
-	<parameter varname='a' type='__m128i'/>
-	<description>Store 128-bits of integer data from "a" into memory. 
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+127:mem_addr] := a[127:0]
-	</operation>
-	<instruction name='movdqa' form='m128, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storeu_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='__m128i*'/>
-	<parameter varname='a' type='__m128i'/>
-	<description>Store 128-bits of integer data from "a" into memory.
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-MEM[mem_addr+127:mem_addr] := a[127:0]
-	</operation>
-	<instruction name='movdqu' form='m128, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='void' name='_mm_storel_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='__m128i*'/>
-	<parameter varname='a' type='__m128i'/>
-	<description>Store 64-bit integer from the first element of "a" into memory.</description>
-	<operation>
-MEM[mem_addr+63:mem_addr] := a[63:0]
-	</operation>
-	<instruction name='movq' form='m64, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_stream_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='__m128i*'/>
-	<parameter varname='a' type='__m128i'/>
-	<description>Store 128-bits of integer data from "a" into memory using a non-temporal memory hint. 
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+127:mem_addr] := a[127:0]
-	</operation>
-	<instruction name='movntdq' form='m128, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='void' name='_mm_stream_si32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='int*'/>
-	<parameter varname='a' type='int'/>
-	<description>Store 32-bit integer "a" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address "mem_addr" is already in the cache, the cache will be updated.</description>
-	<operation>
-MEM[mem_addr+31:mem_addr] := a[31:0]
-	</operation>
-	<instruction name='movnti' form='m32, r32'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='void' name='_mm_stream_si64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='__int64*'/>
-	<parameter varname='a' type='__int64'/>
-	<description>Store 64-bit integer "a" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address "mem_addr" is already in the cache, the cache will be updated.</description>
-	<operation>
-MEM[mem_addr+63:mem_addr] := a[63:0]
-	</operation>
-	<instruction name='movnti' form='m64, r64'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m64' name='_mm_movepi64_pi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Copy the lower 64-bit integer in "a" to "dst".</description>
-	<operation>
-dst[63:0] := a[63:0]
-	</operation>
-	<instruction name='movdq2q' form='mm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m128i' name='_mm_movpi64_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Move</category>
-	<parameter varname='a' type='__m64'/>
-	<description>Copy the 64-bit integer "a" to the lower element of "dst", and zero the upper element.</description>
-	<operation>
-dst[63:0] := a[63:0]
-dst[127:64] := 0
-	</operation>
-	<instruction name='movq2dq' form='xmm, mm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__m128i' name='_mm_move_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Move</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Copy the lower 64-bit integer in "a" to the lower element of "dst", and zero the upper element.</description>
-	<operation>
-dst[63:0] := a[63:0]
-dst[127:64] := 0
-	</operation>
-	<instruction name='movq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_packs_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".
-	</description>
-	<operation>
-dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
-dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
-dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
-dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
-dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
-dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
-dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
-dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
-dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
-dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
-dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
-dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
-dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
-dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
-dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
-dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
-	</operation>
-	<instruction name='packsswb' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_packs_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
-	<operation>
-dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
-dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
-dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
-dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
-dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
-dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
-dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
-dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
-	</operation>
-	<instruction name='packssdw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_packus_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
-	<operation>
-dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
-dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
-dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
-dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
-dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
-dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
-dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
-dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
-dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
-dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
-dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
-dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
-dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
-dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
-dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
-dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
-	</operation>
-	<instruction name='packuswb' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_extract_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst".</description>
-	<operation>
-dst[15:0] := (a[127:0] &gt;&gt; (imm8[2:0] * 16))[15:0]
-dst[31:16] := 0
-	</operation>
-	<instruction name='pextrw' form='r32, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_insert_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='i' type='int'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". </description>
-	<operation>
-dst[127:0] := a[127:0]
-sel := imm8[2:0]*16
-dst[sel+15:sel] := i[15:0]
-	</operation>
-	<instruction name='pinsrw' form='xmm, r32, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_movemask_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[j] := a[i+7]
-ENDFOR
-dst[MAX:16] := 0
-	</operation>
-	<instruction name='pmovmskb' form='r32, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_shuffle_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-	</operation>
-	<instruction name='pshufd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_shufflehi_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst".</description>
-	<operation>
-dst[63:0] := a[63:0]
-dst[79:64] := (a >> (imm8[1:0] * 16))[79:64]
-dst[95:80] := (a >> (imm8[3:2] * 16))[79:64]
-dst[111:96] := (a >> (imm8[5:4] * 16))[79:64]
-dst[127:112] := (a >> (imm8[7:6] * 16))[79:64]
-	</operation>
-	<instruction name='pshufhw' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_shufflelo_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst".</description>
-	<operation>
-dst[15:0] := (a >> (imm8[1:0] * 16))[15:0]
-dst[31:16] := (a >> (imm8[3:2] * 16))[15:0]
-dst[47:32] := (a >> (imm8[5:4] * 16))[15:0]
-dst[63:48] := (a >> (imm8[7:6] * 16))[15:0]
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='pshuflw' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpackhi_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[71:64] 
-	dst[15:8] := src2[71:64] 
-	dst[23:16] := src1[79:72] 
-	dst[31:24] := src2[79:72] 
-	dst[39:32] := src1[87:80] 
-	dst[47:40] := src2[87:80] 
-	dst[55:48] := src1[95:88] 
-	dst[63:56] := src2[95:88] 
-	dst[71:64] := src1[103:96] 
-	dst[79:72] := src2[103:96] 
-	dst[87:80] := src1[111:104] 
-	dst[95:88] := src2[111:104] 
-	dst[103:96] := src1[119:112] 
-	dst[111:104] := src2[119:112] 
-	dst[119:112] := src1[127:120] 
-	dst[127:120] := src2[127:120] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
-	</operation>
-	<instruction name='punpckhbw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpackhi_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[79:64]
-	dst[31:16] := src2[79:64] 
-	dst[47:32] := src1[95:80] 
-	dst[63:48] := src2[95:80] 
-	dst[79:64] := src1[111:96] 
-	dst[95:80] := src2[111:96] 
-	dst[111:96] := src1[127:112] 
-	dst[127:112] := src2[127:112] 
-	RETURN dst[127:0]
-}
-
-dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
-	</operation>
-	<instruction name='punpckhwd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpackhi_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-	</operation>
-	<instruction name='punpckhdq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpackhi_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-	</operation>
-	<instruction name='punpckhqdq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpacklo_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[7:0] 
-	dst[15:8] := src2[7:0] 
-	dst[23:16] := src1[15:8] 
-	dst[31:24] := src2[15:8] 
-	dst[39:32] := src1[23:16] 
-	dst[47:40] := src2[23:16] 
-	dst[55:48] := src1[31:24] 
-	dst[63:56] := src2[31:24] 
-	dst[71:64] := src1[39:32]
-	dst[79:72] := src2[39:32] 
-	dst[87:80] := src1[47:40] 
-	dst[95:88] := src2[47:40] 
-	dst[103:96] := src1[55:48] 
-	dst[111:104] := src2[55:48] 
-	dst[119:112] := src1[63:56] 
-	dst[127:120] := src2[63:56] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
-	</operation>
-	<instruction name='punpcklbw' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpacklo_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[15:0] 
-	dst[31:16] := src2[15:0] 
-	dst[47:32] := src1[31:16] 
-	dst[63:48] := src2[31:16] 
-	dst[79:64] := src1[47:32] 
-	dst[95:80] := src2[47:32] 
-	dst[111:96] := src1[63:48] 
-	dst[127:112] := src2[63:48] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
-	</operation>
-	<instruction name='punpcklwd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpacklo_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-	</operation>
-	<instruction name='punpckldq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_unpacklo_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-	</operation>
-	<instruction name='punpcklqdq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_add_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
-	<operation>
-dst[63:0] := a[63:0] + b[63:0]
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='addsd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_add_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[i+63:i] + b[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name='addpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_div_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
-	<operation>
-dst[63:0] := a[63:0] 0 b[63:0]
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='divsd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_div_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	dst[i+63:i] := a[i+63:i] / b[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name='divpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_max_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := MAX(a[63:0], b[63:0])
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='maxsd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_max_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
-ENDFOR
-	</operation>
-	<instruction name='maxpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_min_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := MIN(a[63:0], b[63:0])
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='minsd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_min_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
-ENDFOR
-	</operation>
-	<instruction name='minpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_mul_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := a[63:0] * b[63:0]
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='mulsd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_mul_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[i+63:i] * b[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name='mulpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_sqrt_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := SQRT(b[63:0])
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='sqrtsd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_sqrt_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := SQRT(a[i+63:i])
-ENDFOR
-	</operation>
-	<instruction name='sqrtpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_sub_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := a[63:0] - b[63:0]
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='subsd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_sub_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[i+63:i] - b[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name='subpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_and_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
-ENDFOR
-	</operation>
-	<instruction name='andpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_andnot_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
-ENDFOR
-	</operation>
-	<instruction name='andnpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_or_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name='orpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_xor_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name='xorpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpeq_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for equality, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] == b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmplt_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] &lt; b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmple_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] &lt;= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpgt_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] &gt; b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpge_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] &gt;= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpord_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] != NaN AND b[63:0] != NaN) ? 0xFFFFFFFFFFFFFFFF : 0
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpunord_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] == NaN OR b[63:0] == NaN) ? 0xFFFFFFFFFFFFFFFF : 0
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpneq_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] != b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpnlt_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := !(a[63:0] &lt; b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpnle_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := !(a[63:0] &lt;= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpngt_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := !(a[63:0] &gt; b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpnge_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := !(a[63:0] &gt;= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='cmpsd' form='xmm, xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpeq_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] == b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR	
-	</operation>
-	<instruction name='cmppd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmplt_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] &lt; b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR	
-	</operation>
-	<instruction name='cmppd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmple_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] &lt;= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR	
-	</operation>
-	<instruction name='cmppd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpgt_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] &gt; b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR	
-	</operation>
-	<instruction name='cmppd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpge_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] &gt;= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR	
-	</operation>
-	<instruction name='cmppd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpord_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR	
-	</operation>
-	<instruction name='cmppd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpunord_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR	
-	</operation>
-	<instruction name='cmppd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpneq_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] != b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR	
-	</operation>
-	<instruction name='cmppd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpnlt_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := !(a[i+63:i] &lt; b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR	
-	</operation>
-	<instruction name='cmppd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpnle_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := !(a[i+63:i] &lt;= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR	
-	</operation>
-	<instruction name='cmppd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpngt_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := !(a[i+63:i] &gt; b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR	
-	</operation>
-	<instruction name='cmppd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cmpnge_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := !(a[i+63:i] &gt;= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR	
-	</operation>
-	<instruction name='cmppd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_comieq_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1).</description>
-	<operation>
-RETURN ( a[63:0] == b[63:0] ) ? 1 : 0
-	</operation>
-	<instruction name='comisd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_comilt_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1).</description>
-	<operation>
-RETURN ( a[63:0] &lt; b[63:0] ) ? 1 : 0
-	</operation>
-	<instruction name='comisd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_comile_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1).</description>
-	<operation>
-RETURN ( a[63:0] &lt;= b[63:0] ) ? 1 : 0
-	</operation>
-	<instruction name='comisd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_comigt_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1).</description>
-	<operation>
-RETURN ( a[63:0] &gt; b[63:0] ) ? 1 : 0
-	</operation>
-	<instruction name='comisd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_comige_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1).</description>
-	<operation>
-RETURN ( a[63:0] &gt;= b[63:0] ) ? 1 : 0
-	</operation>
-	<instruction name='comisd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_comineq_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1).</description>
-	<operation>
-RETURN ( a[63:0] != b[63:0] ) ? 1 : 0
-	</operation>
-	<instruction name='comisd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomieq_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
-	<operation>
-RETURN ( a[63:0] == b[63:0] ) ? 1 : 0
-	</operation>
-	<instruction name='ucomisd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomilt_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
-	<operation>
-RETURN ( a[63:0] &lt; b[63:0] ) ? 1 : 0
-	</operation>
-	<instruction name='ucomisd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomile_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
-	<operation>
-RETURN ( a[63:0] &lt;= b[63:0] ) ? 1 : 0
-	</operation>
-	<instruction name='ucomisd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomigt_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
-	<operation>
-RETURN ( a[63:0] &gt; b[63:0] ) ? 1 : 0
-	</operation>
-	<instruction name='ucomisd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomige_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
-	<operation>
-RETURN ( a[63:0] &gt;= b[63:0] ) ? 1 : 0
-	</operation>
-	<instruction name='ucomisd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_ucomineq_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
-	<operation>
-RETURN ( a[63:0] != b[63:0] ) ? 1 : 0
-	</operation>
-	<instruction name='ucomisd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128' name='_mm_cvtpd_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k])
-ENDFOR
-	</operation>
-	<instruction name='cvtpd2ps' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cvtps_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 32*j
-	dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k])
-ENDFOR
-	</operation>
-	<instruction name='cvtps2pd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvtpd_epi32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
-ENDFOR
-	</operation>
-	<instruction name='cvtpd2dq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvtsd_si32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_Int32(a[63:0])
-	</operation>
-	<instruction name='cvtsd2si' form='r32, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_cvtsd_si64'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_Int64(a[63:0])
-	</operation>
-	<instruction name='cvtsd2si' form='r64, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_cvtsd_si64x'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_Int64(a[63:0])
-	</operation>
-	<instruction name='cvtsd2si' form='r64, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128' name='_mm_cvtsd_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_FP32(b[63:0])
-dst[127:32] := a[127:31]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='cvtsd2ss' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='double' name='_mm_cvtsd_f64'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Copy the lower double-precision (64-bit) floating-point element of "a" to "dst".</description>
-	<operation>dst[63:0] := a[63:0]</operation>
-	<instruction name='movsd' form='m64, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_cvtss_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128'/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	</description>
-	<operation>
-dst[63:0] := Convert_FP32_To_FP64(b[31:0])
-dst[127:64] := a[127:64]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='cvtss2sd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvttpd_epi32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k])
-ENDFOR
-	</operation>
-	<instruction name='cvttpd2dq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cvttsd_si32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0])
-	</operation>
-	<instruction name='cvttsd2si' form='r32, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_cvttsd_si64'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
-	</operation>
-	<instruction name='cvttsd2si' form='r64, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_cvttsd_si64x'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
-	</operation>
-	<instruction name='cvttsd2si' form='r64, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvtps_epi32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='cvtps2dq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128i' name='_mm_cvttps_epi32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='cvttps2dq' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m64' name='_mm_cvtpd_pi32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
-ENDFOR
-	</operation>
-	<instruction name='cvtpd2pi' form='mm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m64' name='_mm_cvttpd_pi32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k])
-ENDFOR
-	</operation>
-	<instruction name='cvttpd2pi' form='mm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_set_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='double'/>
-	<description>Copy double-precision (64-bit) floating-point element "a" to the lower element of "dst", and zero the upper element.</description>
-	<operation>
-dst[63:0] := a[63:0]
-dst[127:64] := 0
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_set1_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='double'/>
-	<description>Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_set_pd1'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='double'/>
-	<description>Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_set_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='e1' type='double'/>
-	<parameter varname='e0' type='double'/>
-	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values.</description>
-	<operation>
-dst[63:0] := e0
-dst[127:64] := e1
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_setr_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='e1' type='double'/>
-	<parameter varname='e0' type='double'/>
-	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[63:0] := e1
-dst[127:64] := e0
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m128d' name='_mm_setzero_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Set</category>
-	<parameter varname='' type='void'/>
-	<description>Return vector of type __m128d with all elements set to zero.</description>
-	<operation>
-dst[MAX:0] := 0
-	</operation>
-	<instruction name='xorpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_load_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='double const*'/>
-	<description>Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into "dst".
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[127:0] := MEM[mem_addr+127:mem_addr]
-	</operation>
-	<instruction name='movapd' form='xmm, m128'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_load1_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='double const*'/>
-	<description>Load a double-precision (64-bit) floating-point element from memory into both elements of "dst".</description>
-	<operation>
-dst[63:0] := MEM[mem_addr+63:mem_addr]
-dst[127:64] := MEM[mem_addr+63:mem_addr]
-	</operation>
-	<instruction name='movapd' form='xmm, m128'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-	
-	
-
-<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_load_pd1'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='double const*'/>
-	<description>Load a double-precision (64-bit) floating-point element from memory into both elements of "dst".</description>
-	<operation>
-dst[63:0] := MEM[mem_addr+63:mem_addr]
-dst[127:64] := MEM[mem_addr+63:mem_addr]
-	</operation>
-	<instruction name='movapd' form='xmm, m128'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='__m128d' name='_mm_loadr_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='double const*'/>
-	<description>Load 2 double-precision (64-bit) floating-point elements from memory into "dst" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[63:0] := MEM[mem_addr+127:mem_addr+64]
-dst[127:64] := MEM[mem_addr+63:mem_addr]
-	</operation>
-	<instruction name='movapd' form='xmm, m128'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_loadu_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='double const*'/>
-	<description>Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into "dst".
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[127:0] := MEM[mem_addr+127:mem_addr]
-	</operation>
-	<instruction name='movupd' form='xmm, m128'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_load_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='double const*'/>
-	<description>Load a double-precision (64-bit) floating-point element from memory into the lower of "dst", and zero the upper element. "mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[63:0] := MEM[mem_addr+63:mem_addr]
-dst[127:64] := 0
-	</operation>
-	<instruction name='movsd' form='xmm, m64'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_loadh_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Load</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='mem_addr' type='double const*'/>
-	<description>Load a double-precision (64-bit) floating-point element from memory into the upper element of "dst", and copy the lower element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[63:0] := a[63:0]
-dst[127:64] := MEM[mem_addr+63:mem_addr]
-	</operation>
-	<instruction name='movhpd' form='xmm, m64'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_loadl_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Load</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='mem_addr' type='double const*'/>
-	<description>Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst", and copy the upper element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[63:0] := MEM[mem_addr+63:mem_addr]
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='movlpd' form='xmm, m64'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_stream_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double*'/>
-	<parameter varname='a' type='__m128d'/>
-	<description>Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint.
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+127:mem_addr] := a[127:0]
-	</operation>
-	<instruction name='movntpd' form='m128, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_store_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double*'/>
-	<parameter varname='a' type='__m128d'/>
-	<description>Store the lower double-precision (64-bit) floating-point element from "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[mem_addr+63:mem_addr] := a[63:0]
-	</operation>
-	<instruction name='movsd' form='m64, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='void' name='_mm_store1_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double*'/>
-	<parameter varname='a' type='__m128d'/>
-	<description>Store the lower double-precision (64-bit) floating-point element from "a" into 2 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+63:mem_addr] := a[63:0]
-MEM[mem_addr+127:mem_addr+64] := a[63:0]
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='void' name='_mm_store_pd1'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double*'/>
-	<parameter varname='a' type='__m128d'/>
-	<description>Store the lower double-precision (64-bit) floating-point element from "a" into 2 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+63:mem_addr] := a[63:0]
-MEM[mem_addr+127:mem_addr+64] := a[63:0]
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_store_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double*'/>
-	<parameter varname='a' type='__m128d'/>
-	<description>Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory.
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+127:mem_addr] := a[127:0]
-	</operation>
-	<instruction name='movapd' form='m128, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storeu_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double*'/>
-	<parameter varname='a' type='__m128d'/>
-	<description>Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory.
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[mem_addr+127:mem_addr] := a[127:0]
-	</operation>
-	<instruction name='movupd' form='m128, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' sequence='true' rettype='void' name='_mm_storer_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double*'/>
-	<parameter varname='a' type='__m128d'/>
-	<description>Store 2 double-precision (64-bit) floating-point elements from "a" into memory in reverse order.
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+63:mem_addr] := a[127:64]
-MEM[mem_addr+127:mem_addr+64] := a[63:0]
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storeh_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double*'/>
-	<parameter varname='a' type='__m128d'/>
-	<description>Store the upper double-precision (64-bit) floating-point element from "a" into memory.</description>
-	<operation>
-MEM[mem_addr+63:mem_addr] := a[127:64]
-	</operation>
-	<instruction name='movhpd' form='m64, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='void' name='_mm_storel_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double*'/>
-	<parameter varname='a' type='__m128d'/>
-	<description>Store the lower double-precision (64-bit) floating-point element from "a" into memory.</description>
-	<operation>
-MEM[mem_addr+63:mem_addr] := a[63:0]
-	</operation>
-	<instruction name='movlpd' form='m64, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_unpackhi_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-	</operation>
-	<instruction name='unpckhpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_unpacklo_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-	</operation>
-	<instruction name='unpcklpd' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_movemask_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Set each bit of mask "dst" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in "a".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF a[i+63]
-		dst[j] := 1
-	ELSE
-		dst[j] := 0
-	FI
-ENDFOR
-dst[MAX:2] := 0
-	</operation>
-	<instruction name='movmskpd' form='r32, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_shuffle_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst". </description>
-	<operation>
-dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
-dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
-	</operation>
-	<instruction name='shufpd' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' vexEq='TRUE' rettype='__m128d' name='_mm_move_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>Move</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := b[63:0]
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='movsd' form='xmm, xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m128' name='_mm_castpd_ps'>
-	<type>Floating Point</type>
-		<CPUID>SSE2</CPUID>
-		<category>Cast</category>
-		<parameter varname='a' type='__m128d'/>
-		<description>Cast vector of type __m128d to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m128i' name='_mm_castpd_si128'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-		<CPUID>SSE2</CPUID>
-		<category>Cast</category>
-		<parameter varname='a' type='__m128d'/>
-		<description>Cast vector of type __m128d to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m128d' name='_mm_castps_pd'>
-	<type>Floating Point</type>
-		<CPUID>SSE2</CPUID>
-		<category>Cast</category>
-		<parameter varname='a' type='__m128'/>
-		<description>Cast vector of type __m128 to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m128i' name='_mm_castps_si128'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-		<CPUID>SSE2</CPUID>
-		<category>Cast</category>
-		<parameter varname='a' type='__m128'/>
-		<description>Cast vector of type __m128 to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m128d' name='_mm_castsi128_pd'>
-	<type>Floating Point</type>
-		<CPUID>SSE2</CPUID>
-		<category>Cast</category>
-		<parameter varname='a' type='__m128i'/>
-		<description>Cast vector of type __m128i to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m128' name='_mm_castsi128_ps'>
-	<type>Floating Point</type>
-		<CPUID>SSE2</CPUID>
-		<category>Cast</category>
-		<parameter varname='a' type='__m128i'/>
-		<description>Cast vector of type __m128i to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' sequence='true' rettype='float' name='_cvtsh_ss'>
-	<type>Floating Point</type>
-	<category>Convert</category>
-	<parameter varname='a' type='unsigned short'/>
-	<description>Convert the half-precision (16-bit) floating-point value "a" to a single-precision (32-bit) floating-point value, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP16_To_FP32(a[15:0])
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' sequence='true' rettype='unsigned short' name='_cvtss_sh'>
-	<type>Floating Point</type>
-	<category>Convert</category>
-	<parameter varname='a' type='float'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Convert the single-precision (32-bit) floating-point value "a" to a half-precision (16-bit) floating-point value, and store the result in "dst".</description>
-	<operation>
-dst[15:0] := Convert_FP32_To_FP16(a[31:0])
-	</operation>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='__m128' name='_mm_cvtph_ps'>
-	<type>Floating Point</type>
-	<CPUID>FP16C</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	m := j*16
-	dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtph2ps' form='xmm, xmm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='__m128i' name='_mm_cvtps_ph'>
-	<type>Floating Point</type>
-	<CPUID>FP16C</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='rounding' type='int'/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := 16*j
-	l := 32*j
-	dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtps2ph' form='xmm, xmm, imm'/>
-	<header>emmintrin.h</header>
-</intrinsic>
-
-<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128' name='_mm_addsub_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<description>Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF (j is even) 
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='addsubps' form='xmm, xmm'/>
-	<header>pmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128d' name='_mm_addsub_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF (j is even) 
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='addsubpd' form='xmm, xmm'/>
-	<header>pmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128d' name='_mm_hadd_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
-	<operation>
-dst[63:0] := a[127:64] + a[63:0]
-dst[127:64] := b[127:64] + b[63:0]
-	</operation>
-	<instruction name='haddpd' form='xmm, xmm'/>
-	<header>pmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128' name='_mm_hadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<description>Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
-	<operation>
-dst[31:0] := a[63:32] + a[31:0]
-dst[63:32] := a[127:96] + a[95:64]
-dst[95:64] := b[63:32] + b[31:0]
-dst[127:96] := b[127:96] + b[95:64]
-	</operation>
-	<instruction name='haddps' form='xmm, xmm'/>
-	<header>pmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128d' name='_mm_hsub_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
-	<operation>
-dst[63:0] := a[63:0] - a[127:64]
-dst[127:64] := b[63:0] - b[127:64]
-	</operation>
-	<instruction name='hsubpd' form='xmm, xmm'/>
-	<header>pmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128' name='_mm_hsub_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<description>Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
-	<operation>
-dst[31:0] := a[31:0] - a[63:32]
-dst[63:32] := a[95:64] - a[127:96]
-dst[95:64] := b[31:0] - b[63:32]
-dst[127:96] := b[95:64] - b[127:96]
-	</operation>
-	<instruction name='hsubps' form='xmm, xmm'/>
-	<header>pmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128i' name='_mm_lddqu_si128'>
-	<type>Integer</type>
-	<CPUID>SSE3</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='__m128i const*'/>
-	<description>Load 128-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm_loadu_si128" when the data crosses a cache line boundary.</description>
-	<operation>
-dst[127:0] := MEM[mem_addr+127:mem_addr]
-	</operation>
-	<instruction name='lddqu' form='xmm, m128'/>
-	<header>pmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='void' name='_mm_monitor'>
-	<CPUID>MONITOR</CPUID>
-	<category>General Support</category>
-	<parameter varname='p' type='void const*'/>
-	<parameter varname='extensions' type='unsigned'/>
-	<parameter varname='hints' type='unsigned'/>
-	<description>Arm address monitoring hardware using the address specified in "p". A store to an address within the specified address range triggers the monitoring hardware. Specify optional extensions in "extensions", and optional hints in "hints".</description>
-	<instruction name='monitor' form=''/>
-	<header>pmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128d' name='_mm_movedup_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE3</CPUID>
-	<category>Move</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Duplicate the low double-precision (64-bit) floating-point element from "a", and store the results in "dst".
-	</description>
-	<operation>
-tmp[63:0] := a[63:0]
-tmp[127:64] := a[63:0]
-	</operation>
-	<instruction name='movddup' form='xmm, xmm'/>
-	<header>pmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128d' name='_mm_loaddup_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE3</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='double const*'/>
-	<description>Load a double-precision (64-bit) floating-point element from memory into both elements of "dst".
-	</description>
-	<operation>
-tmp[63:0] := MEM[mem_addr+63:mem_addr]
-tmp[127:64] := MEM[mem_addr+63:mem_addr]
-	</operation>
-	<instruction name='movddup' form='xmm, m64'/>
-	<header>pmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128' name='_mm_movehdup_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE3</CPUID>
-	<category>Move</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
-	</description>
-	<operation>
-dst[31:0] := a[63:32] 
-dst[63:32] := a[63:32]
-dst[95:64] := a[127:96] 
-dst[127:96] := a[127:96]
-	</operation>
-	<instruction name='movshdup' form='xmm, xmm'/>
-	<header>pmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE3' vexEq='TRUE' rettype='__m128' name='_mm_moveldup_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE3</CPUID>
-	<category>Move</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
-	</description>
-	<operation>
-dst[31:0] := a[31:0] 
-dst[63:32] := a[31:0]
-dst[95:64] := a[95:64] 
-dst[127:96] := a[95:64] 
-	</operation>
-	<instruction name='movsldup' form='xmm, xmm'/>
-	<header>pmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='void' name='_mm_mwait'>
-	<CPUID>MONITOR</CPUID>
-	<category>General Support</category>
-	<parameter varname='extensions' type='unsigned'/>
-	<parameter varname='hints' type='unsigned'/>
-	<description>Hint to the processor that it can enter an implementation-dependent-optimized state while waiting for an event or store operation to the address range specified by MONITOR.</description>
-	<instruction name='mwait' form=''/>
-	<header>pmmintrin.h</header>
-</intrinsic>
-
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_abs_pi8'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m64'/>
-	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	dst[i+7:i] := ABS(a[i+7:i])
-ENDFOR
-	</operation>
-	<instruction name='pabsb' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_abs_epi8'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[i+7:i] := ABS(a[i+7:i])
-ENDFOR
-	</operation>
-	<instruction name='pabsb' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_abs_pi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m64'/>
-	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := ABS(a[i+15:i])
-ENDFOR
-	</operation>
-	<instruction name='pabsw' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_abs_epi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := ABS(a[i+15:i])
-ENDFOR
-	</operation>
-	<instruction name='pabsw' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_abs_pi32'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m64'/>
-	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	dst[i+31:i] := ABS(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='pabsd' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_abs_epi32'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ABS(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='pabsd' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_shuffle_epi8'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF b[i+7] == 1
-		dst[i+7:i] := 0
-	ELSE
-		index[3:0] := b[i+3:i]
-		dst[i+7:i] := a[index*8+7:index*8]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pshufb' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_shuffle_pi8'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	IF b[i+7] == 1
-		dst[i+7:i] := 0
-	ELSE
-		index[2:0] := b[i+2:i]
-		dst[i+7:i] := a[index*8+7:index*8]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pshufb' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_alignr_epi8'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname='count' type='int'/>
-	<description>Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". </description>
-	<operation>
-tmp[255:0] := ((a[127:0] &lt;&lt; 128) OR b[127:0]) &gt;&gt; (count[7:0]*8)
-dst[127:0] := tmp[127:0]
-	</operation>
-	<instruction name='palignr' form='xmm, xmm, imm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_alignr_pi8'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<parameter varname='count' type='int'/>
-	<description>Concatenate 8-byte blocks in "a" and "b" into a 16-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". </description>
-	<operation>
-tmp[127:0] := ((a[63:0] &lt;&lt; 64) OR b[63:0]) &gt;&gt; (count[7:0]*8)
-dst[63:0] := tmp[63:0]
-	</operation>
-	<instruction name='palignr' form='mm, mm, imm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_hadd_epi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
-	<operation>
-dst[15:0] := a[31:16] + a[15:0]
-dst[31:16] := a[63:48] + a[47:32]
-dst[47:32] := a[95:80] + a[79:64]
-dst[63:48] := a[127:112] + a[111:96]
-dst[79:64] := b[31:16] + b[15:0]
-dst[95:80] := b[63:48] + b[47:32]
-dst[111:96] := b[95:80] + b[79:64]
-dst[127:112] := b[127:112] + b[111:96]
-	</operation>
-	<instruction name='phaddw' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_hadds_epi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
-	<operation>
-dst[15:0]= Saturate_To_Int16(a[31:16] + a[15:0])
-dst[31:16] = Saturate_To_Int16(a[63:48] + a[47:32])
-dst[47:32] = Saturate_To_Int16(a[95:80] + a[79:64])
-dst[63:48] = Saturate_To_Int16(a[127:112] + a[111:96])
-dst[79:64] = Saturate_To_Int16(b[31:16] + b[15:0])
-dst[95:80] = Saturate_To_Int16(b[63:48] + b[47:32])
-dst[111:96] = Saturate_To_Int16(b[95:80] + b[79:64])
-dst[127:112] = Saturate_To_Int16(b[127:112] + b[111:96])
-	</operation>
-	<instruction name='phaddsw' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_hadd_epi32'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
-	<operation>
-dst[31:0] := a[63:32] + a[31:0]
-dst[63:32] := a[127:96] + a[95:64]
-dst[95:64] := b[63:32] + b[31:0]
-dst[127:96] := b[127:96] + b[95:64]
-	</operation>
-	<instruction name='phaddd' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_hadd_pi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
-	<operation>
-dst[15:0] := a[31:16] + a[15:0]
-dst[31:16] := a[63:48] + a[47:32]
-dst[47:32] := b[31:16] + b[15:0]
-dst[63:48] := b[63:48] + b[47:32]
-	</operation>
-	<instruction name='phaddw' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_hadd_pi32'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
-	<operation>
-dst[31:0] := a[63:32] + a[31:0]
-dst[63:32] := b[63:32] + b[31:0]
-	</operation>
-	<instruction name='phaddw' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_hadds_pi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
-	<operation>
-dst[15:0]= Saturate_To_Int16(a[31:16] + a[15:0])
-dst[31:16] = Saturate_To_Int16(a[63:48] + a[47:32])
-dst[47:32] = Saturate_To_Int16(b[31:16] + b[15:0])
-dst[63:48] = Saturate_To_Int16(b[63:48] + b[47:32])
-	</operation>
-	<instruction name='phaddsw' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_hsub_epi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
-	<operation>
-dst[15:0] := a[15:0] - a[31:16]
-dst[31:16] := a[47:32] - a[63:48]
-dst[47:32] := a[79:64] - a[95:80]
-dst[63:48] := a[111:96] - a[127:112]
-dst[79:64] := b[15:0] - b[31:16]
-dst[95:80] := b[47:32] - b[63:48]
-dst[111:96] := b[79:64] - b[95:80]
-dst[127:112] := b[111:96] - b[127:112]
-	</operation>
-	<instruction name='phsubw' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_hsubs_epi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
-	<operation>
-dst[15:0]= Saturate_To_Int16(a[15:0] - a[31:16])
-dst[31:16] = Saturate_To_Int16(a[47:32] - a[63:48])
-dst[47:32] = Saturate_To_Int16(a[79:64] - a[95:80])
-dst[63:48] = Saturate_To_Int16(a[111:96] - a[127:112])
-dst[79:64] = Saturate_To_Int16(b[15:0] - b[31:16])
-dst[95:80] = Saturate_To_Int16(b[47:32] - b[63:48])
-dst[111:96] = Saturate_To_Int16(b[79:64] - b[95:80])
-dst[127:112] = Saturate_To_Int16(b[111:96] - b[127:112])
-	</operation>
-	<instruction name='phsubsw' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_hsub_epi32'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
-	<operation>
-dst[31:0] := a[31:0] - a[63:32]
-dst[63:32] := a[95:64] - a[127:96]
-dst[95:64] := b[31:0] - b[63:32]
-dst[127:96] := b[95:64] - b[127:96]
-	</operation>
-	<instruction name='phsubd' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_hsub_pi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
-	<operation>
-dst[15:0] := a[15:0] - a[31:16]
-dst[31:16] := a[47:32] - a[63:48]
-dst[47:32] := b[15:0] - b[31:16]
-dst[63:48] := b[47:32] - b[63:48]
-	</operation>
-	<instruction name='phsubw' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_hsub_pi32'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
-	<operation>
-dst[31:0] := a[31:0] - a[63:32]
-dst[63:32] := b[31:0] - b[63:32]
-	</operation>
-	<instruction name='phsubd' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_hsubs_pi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
-	<operation>
-dst[15:0]= Saturate_To_Int16(a[15:0] - a[31:16])
-dst[31:16] = Saturate_To_Int16(a[47:32] - a[63:48])
-dst[47:32] = Saturate_To_Int16(b[15:0] - b[31:16])
-dst[63:48] = Saturate_To_Int16(b[47:32] - b[63:48])
-	</operation>
-	<instruction name='phsubsw' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_maddubs_epi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
-ENDFOR
-	</operation>
-	<instruction name='pmaddubsw' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_maddubs_pi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
-ENDFOR
-	</operation>
-	<instruction name='pmaddubsw' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_mulhrs_epi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
-	dst[i+15:i] := tmp[16:1]
-ENDFOR
-	</operation>
-	<instruction name='pmulhrsw' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_mulhrs_pi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
-	dst[i+15:i] := tmp[16:1]
-ENDFOR
-	</operation>
-	<instruction name='pmulhrsw' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_sign_epi8'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF b[i+7:i] &lt; 0
-		dst[i+7:i] := NEG(a[i+7:i])
-	ELSE IF b[i+7:i] = 0
-		dst[i+7:i] := 0
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psignb' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_sign_epi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF b[i+15:i] &lt; 0
-		dst[i+15:i] := NEG(a[i+15:i])
-	ELSE IF b[i+15:i] = 0
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := a[i+15:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psignw' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' vexEq='TRUE' rettype='__m128i' name='_mm_sign_epi32'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF b[i+31:i] &lt; 0
-		dst[i+31:i] := NEG(a[i+31:i])
-	ELSE IF b[i+31:i] = 0
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psignd' form='xmm, xmm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_sign_pi8'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	IF b[i+7:i] &lt; 0
-		dst[i+7:i] := NEG(a[i+7:i])
-	ELSE IF b[i+7:i] = 0
-		dst[i+7:i] := 0
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psignb' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_sign_pi16'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*16
-	IF b[i+15:i] &lt; 0
-		dst[i+15:i] := NEG(a[i+15:i])
-	ELSE IF b[i+15:i] = 0
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := a[i+15:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psignw' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSSE3' rettype='__m64' name='_mm_sign_pi32'>
-	<type>Integer</type>
-	<CPUID>SSSE3</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m64'/>
-	<parameter varname='b' type='__m64'/>
-	<description>Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	IF b[i+31:i] &lt; 0
-		dst[i+31:i] := NEG(a[i+31:i])
-	ELSE IF b[i+31:i] = 0
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='psignd' form='mm, mm'/>
-	<header>tmmintrin.h</header>
-</intrinsic>
-
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_blend_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF imm8[j%8]
-		dst[i+63:i] := b[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='blendpd' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_blend_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF imm8[j%8]
-		dst[i+31:i] := b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='blendps' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_blendv_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname='mask' type='__m128d'/>
-	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF mask[i+63]
-		dst[i+63:i] := b[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='blendvpd' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_blendv_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname='mask' type='__m128'/>
-	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF mask[i+31]
-		dst[i+31:i] := b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='blendvps' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_blendv_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname='mask' type='__m128i'/>
-	<description>Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF mask[i+7]
-		dst[i+7:i] := b[i+7:i]
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pblendvb' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_blend_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Blend packed 16-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF imm8[j%8]
-		dst[i+15:i] := b[i+15:i]
-	ELSE
-		dst[i+15:i] := a[i+15:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pblendw' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_dp_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Conditionally multiply the packed double-precision (64-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8".</description>
-	<operation>
-DP(a[127:0], b[127:0], imm8[7:0]) {
-	FOR j := 0 to 1
-		i := j*64
-		IF imm8[(4+j)%8]]
-			temp[i+63:i] := a[i+63:i] * b[i+63:i]
-		ELSE
-			temp[i+63:i] := 0
-		FI
-	ENDFOR
-	
-	sum[63:0] := temp[127:64] + temp[63:0]
-	
-	FOR j := 0 to 1
-		i := j*64
-		IF imm8[j%8]
-			tmpdst[i+63:i] := sum[63:0]
-		ELSE
-			tmpdst[i+63:i] := 0
-		FI
-	ENDFOR
-	RETURN tmpdst[127:0]
-}
-
-dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0])
-	</operation>
-	<instruction name='dppd' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_dp_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8".</description>
-	<operation>
-DP(a[127:0], b[127:0], imm8[7:0]) {
-	FOR j := 0 to 3
-		i := j*32
-		IF imm8[(4+j)%8]
-			temp[i+31:i] := a[i+31:i] * b[i+31:i]
-		ELSE
-			temp[i+31:i] := 0
-		FI
-	ENDFOR
-	
-	sum[31:0] := (temp[127:96] + temp[95:64]) + (temp[63:32] + temp[31:0])
-	
-	FOR j := 0 to 3
-		i := j*32
-		IF imm8[j%8]
-			tmpdst[i+31:i] := sum[31:0]
-		ELSE
-			tmpdst[i+31:i] := 0
-		FI
-	ENDFOR
-	RETURN tmpdst[127:0]
-}
-
-dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0])
-	</operation>
-	<instruction name='dpps' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_extract_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Extract a single-precision (32-bit) floating-point element from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-dst[31:0] := (a[127:0] &gt;&gt; (imm8[1:0] * 32))[31:0]
-	</operation>
-	<instruction name='extractps' form='r32, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_extract_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Extract an 8-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst".</description>
-	<operation>
-dst[7:0] := (a[127:0] &gt;&gt; (imm8[3:0] * 8))[7:0]
-dst[31:8] := 0
-	</operation>
-	<instruction name='pextrb' form='r32, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_extract_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Extract a 32-bit integer from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-dst[31:0] := (a[127:0] &gt;&gt; (imm8[1:0] * 32))[31:0]
-	</operation>
-	<instruction name='pextrd' form='r32, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__int64' name='_mm_extract_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Extract a 64-bit integer from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-dst[63:0] := (a[127:0] &gt;&gt; (imm8[0] * 64))[63:0]
-	</operation>
-	<instruction name='pextrq' form='r64, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_insert_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Copy "a" to "tmp", then insert a single-precision (32-bit) floating-point element from "b" into "tmp" using the control in "imm8". Store "tmp" to "dst" using the mask in "imm8" (elements are zeroed out when the corresponding bit is set). </description>
-	<operation>
-tmp2[127:0] := a[127:0]
-CASE (imm8[7:6]) of
-0: tmp1[31:0] := b[31:0]
-1: tmp1[31:0] := b[63:32]
-2: tmp1[31:0] := b[95:64]
-3: tmp1[31:0] := b[127:96]
-ESAC
-CASE (imm8[5:4]) of
-0: tmp2[31:0] := tmp1[31:0]
-1: tmp2[63:32] := tmp1[31:0]
-2: tmp2[95:64] := tmp1[31:0]
-3: tmp2[127:96] := tmp1[31:0]
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	IF imm8[j%8]
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := tmp2[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='insertps' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_insert_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='i' type='int'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Copy "a" to "dst", and insert the lower 8-bit integer from "i" into "dst" at the location specified by "imm8". </description>
-	<operation>
-dst[127:0] := a[127:0]
-sel := imm8[3:0]*8
-dst[sel+7:sel] := i[7:0]
-	</operation>
-	<instruction name='pinsrb' form='xmm, r32, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_insert_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='i' type='int'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Copy "a" to "dst", and insert the 32-bit integer "i" into "dst" at the location specified by "imm8". </description>
-	<operation>
-dst[127:0] := a[127:0]
-sel := imm8[1:0]*32
-dst[sel+31:sel] := i[31:0]
-	</operation>
-	<instruction name='pinsrd' form='xmm, r32, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_insert_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='i' type='__int64'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "imm8". </description>
-	<operation>
-dst[127:0] := a[127:0]
-sel := imm8[0]*64
-dst[sel+63:sel] := i[63:0]
-	</operation>
-	<instruction name='pinsrq' form='xmm, r64, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_max_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF a[i+7:i] &gt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pmaxsb' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_max_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF a[i+31:i] &gt; b[i+31:i]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := b[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pmaxsd' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_max_epu32'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF a[i+31:i] &gt; b[i+31:i]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := b[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pmaxud' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_max_epu16'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF a[i+15:i] &gt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pmaxuw' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_min_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF a[i+7:i] &lt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pminsb' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_min_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF a[i+31:i] &lt; b[i+31:i]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := b[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pminsd' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_min_epu32'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF a[i+31:i] &lt; b[i+31:i]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := b[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pminud' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_min_epu16'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF a[i+15:i] &lt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='pminuw' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_packus_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst".</description>
-	<operation>
-dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
-dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
-dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
-dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
-dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
-dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
-dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
-dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
-	</operation>
-	<instruction name='packusdw' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cmpeq_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ( a[i+63:i] == b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpeqq' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepi8_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	l := j*16
-	dst[l+15:l] := SignExtend(a[i+7:i])
-ENDFOR
-	</operation>
-	<instruction name='pmovsxbw' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepi8_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 8*j
-	dst[i+31:i] := SignExtend(a[k+7:k])
-ENDFOR
-	</operation>
-	<instruction name='pmovsxbd' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepi8_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 8*j
-	dst[i+63:i] := SignExtend(a[k+7:k])
-ENDFOR
-	</operation>
-	<instruction name='pmovsxbq' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepi16_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 16*j
-	dst[i+31:i] := SignExtend(a[k+15:k])
-ENDFOR
-	</operation>
-	<instruction name='pmovsxwd' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepi16_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 16*j
-	dst[i+63:i] := SignExtend(a[k+15:k])
-ENDFOR
-	</operation>
-	<instruction name='pmovsxwq' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepi32_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 32*j
-	dst[i+63:i] := SignExtend(a[k+31:k])
-ENDFOR
-	</operation>
-	<instruction name='pmovsxdq' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepu8_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	l := j*16
-	dst[l+15:l] := ZeroExtend(a[i+7:i])
-ENDFOR
-	</operation>
-	<instruction name='pmovzxbw' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepu8_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 8*j
-	dst[i+31:i] := ZeroExtend(a[k+7:k])
-ENDFOR
-	</operation>
-	<instruction name='pmovzxbd' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepu8_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 8*j
-	dst[i+63:i] := ZeroExtend(a[k+7:k])
-ENDFOR
-	</operation>
-	<instruction name='pmovzxbq' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepu16_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 16*j
-	dst[i+31:i] := ZeroExtend(a[k+15:k])
-ENDFOR
-	</operation>
-	<instruction name='pmovzxwd' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepu16_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 16*j
-	dst[i+63:i] := ZeroExtend(a[k+15:k])
-ENDFOR
-	</operation>
-	<instruction name='pmovzxwq' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_cvtepu32_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 32*j
-	dst[i+63:i] := ZeroExtend(a[k+31:k])
-ENDFOR
-	</operation>
-	<instruction name='pmovzxdq' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_mul_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[i+31:i] * b[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='pmuldq' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_mullo_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	tmp[63:0] := a[i+31:i] * b[i+31:i]
-	dst[i+31:i] := tmp[31:0]
-ENDFOR
-	</operation>
-	<instruction name='pmulld' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_testz_si128'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
-	<operation>
-IF (a[127:0] AND b[127:0] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-IF ((NOT a[127:0]) AND b[127:0] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-RETURN ZF
-	</operation>
-	<instruction name='ptest' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_testc_si128'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
-	<operation>
-IF (a[127:0] AND b[127:0] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-IF ((NOT a[127:0]) AND b[127:0] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-RETURN CF
-	</operation>
-	<instruction name='ptest' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_testnzc_si128'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
-	<operation>
-IF (a[127:0] AND b[127:0] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-IF ((NOT a[127:0]) AND b[127:0] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-IF (ZF == 0 &amp;&amp; CF == 0)
-	RETURN 1
-ELSE
-	RETURN 0
-FI
-	</operation>
-	<instruction name='ptest' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_test_all_zeros'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='mask' type='__m128i'/>
-	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and return 1 if the result is zero, otherwise return 0.</description>
-	<operation>
-IF (a[127:0] AND mask[127:0] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-RETURN ZF
-	</operation>
-	<instruction name='ptest' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_test_mix_ones_zeros'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='mask' type='__m128i'/>
-	 <description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "mask", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
-	<operation>
-IF (a[127:0] AND mask[127:0] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-IF ((NOT a[127:0]) AND mask[127:0] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-IF (ZF == 0 &amp;&amp; CF == 0)
-	RETURN 1
-ELSE
-	RETURN 0
-FI
-	</operation>
-	<instruction name='ptest' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' sequence='true' rettype='int' name='_mm_test_all_ones'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Compute the bitwise NOT of "a" and then AND with a 128-bit vector containing all 1's, and return 1 if the result is zero, otherwise return 0.</description>
-	<operation>
-FOR j := 0 to 127
-	tmp[i] := 1
-ENDFOR
-
-IF ((NOT a[127:0]) AND tmp[127:0] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-RETURN CF
-	</operation>
-	<instruction name='pcmpeqd' form='xmm, xmm'/>
-	<instruction name='ptest' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_round_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='rounding' type='int'/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed double-precision floating-point elements in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ROUND(a[i+63:i])
-ENDFOR
-	</operation>
-	<instruction name='roundpd' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_floor_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := FLOOR(a[i+63:i])
-ENDFOR
-	</operation>
-	<instruction name='roundpd' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_ceil_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := CEIL(a[i+63:i])
-ENDFOR
-	</operation>
-	<instruction name='roundpd' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_round_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='rounding' type='int'/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ROUND(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='roundps' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_floor_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := FLOOR(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='roundps' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_ceil_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := CEIL(a[i+31:i])
-ENDFOR
-	</operation>
-	<instruction name='roundps' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_round_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname='rounding' type='int'/>
-	<description>Round the lower double-precision (64-bit) floating-point element in "b" using the "rounding" parameter, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := ROUND(b[63:0])
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='roundsd' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_floor_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Round the lower double-precision (64-bit) floating-point element in "b" down to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := FLOOR(b[63:0])
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='roundsd' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128d' name='_mm_ceil_sd'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Round the lower double-precision (64-bit) floating-point element in "b" up to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := CEIL(b[63:0])
-dst[127:64] := a[127:64]
-	</operation>
-	<instruction name='roundsd' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_round_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname='rounding' type='int'/>
-	<description>Round the lower single-precision (32-bit) floating-point element in "b" using the "rounding" parameter, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := ROUND(b[31:0])
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='roundss' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_floor_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<description>Round the lower single-precision (32-bit) floating-point element in "b" down to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := FLOOR(b[31:0])
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='roundss' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128' name='_mm_ceil_ss'>
-	<type>Floating Point</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<description>Round the lower single-precision (32-bit) floating-point element in "b" up to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := CEIL(b[31:0])
-dst[127:32] := a[127:32]
-	</operation>
-	<instruction name='roundss' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_minpos_epu16'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Horizontally compute the minimum amongst the packed unsigned 16-bit integers in "a", store the minimum and index in "dst", and zero the remaining bits in "dst".</description>
-	<operation>
-index[2:0] := 0
-min[15:0] := a[15:0]
-FOR j := 0 to 7
-	i := j*16
-	IF a[i+15:i] &lt; min[15:0]
-		index[2:0] := j
-		min[15:0] := a[i+15:i]
-	FI
-ENDFOR
-dst[15:0] := min[15:0]
-dst[18:16] := index[2:0]
-dst[127:19] := 0
-	</operation>
-	<instruction name='phminposuw' form='xmm, xmm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_mpsadbw_epu8'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Arithmetic</category>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
-	Eight SADs are performed using one quadruplet from "b" and eight quadruplets from "a". One quadruplet is selected from "b" starting at on the offset specified in "imm8". Eight quadruplets are formed from sequential 8-bit integers selected from "a" starting at the offset specified in "imm8".</description>
-	<operation>
-MPSADBW(a[127:0], b[127:0], imm8[2:0]) {
-	a_offset := imm8[2]*32
-	b_offset := imm8[1:0]*32
-	FOR j := 0 to 7
-		i := j*8
-		k := a_offset+i
-		l := b_offset
-		tmp[i*2+15:i*2] := ABS(a[k+7:k] - b[l+7:l]) + ABS(a[k+15:k+8] - b[l+15:l+8]) + ABS(a[k+23:k+16] - b[l+23:l+16]) + ABS(a[k+31:k+24] - b[l+31:l+24])
-	ENDFOR
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0])
-	</operation>
-	<instruction name='mpsadbw' form='xmm, xmm, imm'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.1' vexEq='TRUE' rettype='__m128i' name='_mm_stream_load_si128'>
-	<type>Integer</type>
-	<CPUID>SSE4.1</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='const __m128i*'/>
-	<description>Load 128-bits of integer data from memory into "dst" using a non-temporal memory hint.
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[127:0] := MEM[mem_addr+127:mem_addr]
-	</operation>
-	<instruction name='movntdqa' form='xmm, m128'/>
-	<header>smmintrin.h</header>
-</intrinsic>
-
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__m128i' name='_mm_cmpistrm'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and store the generated mask in "dst".
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-// compare all characters
-aInvalid := 0
-bInvalid := 0
-FOR i := 0 to UpperBound
-	m := i*size
-	FOR j := 0 to UpperBound
-		n := j*size
-		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
-		
-		// invalidate characters after EOS
-		IF a[m+size-1:m] == 0
-			aInvalid := 1
-		FI
-		IF b[n+size-1:n] == 0
-			bInvalid := 1
-		FI
-		
-		// override comparisons for invalid characters
-		CASE (imm8[3:2]) OF
-		0:  // equal any
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		1:  // ranges
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		2:  // equal each
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		3:  // equal ordered
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 1
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		ESAC
-	ENDFOR
-ENDFOR
-
-// aggregate results
-CASE (imm8[3:2]) OF
-0:  // equal any
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound
-			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
-		ENDFOR
-	ENDFOR
-1:  // ranges
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound, j += 2
-			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
-		ENDFOR
-	ENDFOR
-2:  // equal each
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		IntRes1[i] := BoolRes[i][i]
-	ENDFOR
-3:  // equal ordered
-	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
-	FOR i := 0 to UpperBound
-		k := i
-		FOR j := 0 to UpperBound-i
-			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
-			k++
-		ENDFOR
-	ENDFOR
-ESAC
-
-// optionally negate results
-bInvalid := 0
-FOR i := 0 to UpperBound
-	IF imm8[4]
-		IF imm8[5] // only negate valid
-			IF b[n+size-1:n] == 0
-				bInvalid := 1
-			FI
-			IF bInvalid // invalid, don't negate
-				IntRes2[i] := IntRes1[i]
-			ELSE // valid, negate
-				IntRes2[i] := -1 XOR IntRes1[i]
-			FI
-		ELSE // negate all
-			IntRes2[i] := -1 XOR IntRes1[i]
-		FI
-	ELSE // don't negate
-		IntRes2[i] := IntRes1[i]
-	FI
-ENDFOR
-
-// output
-IF imm8[6] // byte / word mask
-	FOR i := 0 to UpperBound
-		j := i*size
-		IF IntRes2[i]
-			dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF)
-		ELSE
-			dst[j+size-1:j] := 0
-		FI
-	ENDFOR
-ELSE // bit mask
-	dst[UpperBound:0] := IntRes[UpperBound:0]
-	dst[127:UpperBound+1] := 0
-FI
-	</operation>
-	<instruction name='pcmpistrm' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpistri'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and store the generated index in "dst".
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-// compare all characters
-aInvalid := 0
-bInvalid := 0
-FOR i := 0 to UpperBound
-	m := i*size
-	FOR j := 0 to UpperBound
-		n := j*size
-		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
-		
-		// invalidate characters after EOS
-		IF a[m+size-1:m] == 0
-			aInvalid := 1
-		FI
-		IF b[n+size-1:n] == 0
-			bInvalid := 1
-		FI
-		
-		// override comparisons for invalid characters
-		CASE (imm8[3:2]) OF
-		0:  // equal any
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		1:  // ranges
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		2:  // equal each
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		3:  // equal ordered
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 1
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		ESAC
-	ENDFOR
-ENDFOR
-
-// aggregate results
-CASE (imm8[3:2]) OF
-0:  // equal any
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound
-			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
-		ENDFOR
-	ENDFOR
-1:  // ranges
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound, j += 2
-			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
-		ENDFOR
-	ENDFOR
-2:  // equal each
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		IntRes1[i] := BoolRes[i][i]
-	ENDFOR
-3:  // equal ordered
-	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
-	FOR i := 0 to UpperBound
-		k := i
-		FOR j := 0 to UpperBound-i
-			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
-			k++
-		ENDFOR
-	ENDFOR
-ESAC
-
-// optionally negate results
-bInvalid := 0
-FOR i := 0 to UpperBound
-	IF imm8[4]
-		IF imm8[5] // only negate valid
-			IF b[n+size-1:n] == 0
-				bInvalid := 1
-			FI
-			IF bInvalid // invalid, don't negate
-				IntRes2[i] := IntRes1[i]
-			ELSE // valid, negate
-				IntRes2[i] := -1 XOR IntRes1[i]
-			FI
-		ELSE // negate all
-			IntRes2[i] := -1 XOR IntRes1[i]
-		FI
-	ELSE // don't negate
-		IntRes2[i] := IntRes1[i]
-	FI
-ENDFOR
-
-// output
-IF imm8[6] // most significant bit
-	tmp := UpperBound
-	dst := tmp
-	DO WHILE ((tmp &gt;= 0) AND a[tmp] = 0)
-		tmp := tmp - 1
-		dst := tmp
-	OD
-ELSE // least significant bit
-	tmp := 0
-	dst := tmp
-	DO WHILE ((tmp &lt;= UpperBound) AND a[tmp] = 0)
-		tmp := tmp + 1
-		dst := tmp
-	OD
-FI
-	</operation>
-	<instruction name='pcmpistri' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpistrz'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if any character in "b" was null, and 0 otherwise.
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-bInvalid := 0
-FOR j := 0 to UpperBound
-	n := j*size
-	IF b[n+size-1:n] == 0
-		bInvalid := 1
-	FI
-ENDFOR
-
-dst := bInvalid
-	</operation>
-	<instruction name='pcmpistri' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpistrc'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if the resulting mask was non-zero, and 0 otherwise.
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-// compare all characters
-aInvalid := 0
-bInvalid := 0
-FOR i := 0 to UpperBound
-	m := i*size
-	FOR j := 0 to UpperBound
-		n := j*size
-		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
-		
-		// invalidate characters after EOS
-		IF a[m+size-1:m] == 0
-			aInvalid := 1
-		FI
-		IF b[n+size-1:n] == 0
-			bInvalid := 1
-		FI
-		
-		// override comparisons for invalid characters
-		CASE (imm8[3:2]) OF
-		0:  // equal any
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		1:  // ranges
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		2:  // equal each
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		3:  // equal ordered
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 1
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		ESAC
-	ENDFOR
-ENDFOR
-
-// aggregate results
-CASE (imm8[3:2]) OF
-0:  // equal any
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound
-			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
-		ENDFOR
-	ENDFOR
-1:  // ranges
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound, j += 2
-			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
-		ENDFOR
-	ENDFOR
-2:  // equal each
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		IntRes1[i] := BoolRes[i][i]
-	ENDFOR
-3:  // equal ordered
-	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
-	FOR i := 0 to UpperBound
-		k := i
-		FOR j := 0 to UpperBound-i
-			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
-			k++
-		ENDFOR
-	ENDFOR
-ESAC
-
-// optionally negate results
-bInvalid := 0
-FOR i := 0 to UpperBound
-	IF imm8[4]
-		IF imm8[5] // only negate valid
-			IF b[n+size-1:n] == 0
-				bInvalid := 1
-			FI
-			IF bInvalid // invalid, don't negate
-				IntRes2[i] := IntRes1[i]
-			ELSE // valid, negate
-				IntRes2[i] := -1 XOR IntRes1[i]
-			FI
-		ELSE // negate all
-			IntRes2[i] := -1 XOR IntRes1[i]
-		FI
-	ELSE // don't negate
-		IntRes2[i] := IntRes1[i]
-	FI
-ENDFOR
-
-// output
-dst := (IntRes2 != 0)
-	</operation>
-	<instruction name='pcmpistri' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpistrs'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if any character in "a" was null, and 0 otherwise.
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-aInvalid := 0
-FOR i := 0 to UpperBound
-	m := i*size
-	IF b[m+size-1:m] == 0
-		aInvalid := 1
-	FI
-ENDFOR
-
-dst := aInvalid
-	</operation>
-	<instruction name='pcmpistri' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpistro'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns bit 0 of the resulting bit mask.
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-// compare all characters
-aInvalid := 0
-bInvalid := 0
-FOR i := 0 to UpperBound
-	m := i*size
-	FOR j := 0 to UpperBound
-		n := j*size
-		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
-		
-		// invalidate characters after EOS
-		IF a[m+size-1:m] == 0
-			aInvalid := 1
-		FI
-		IF b[n+size-1:n] == 0
-			bInvalid := 1
-		FI
-		
-		// override comparisons for invalid characters
-		CASE (imm8[3:2]) OF
-		0:  // equal any
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		1:  // ranges
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		2:  // equal each
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		3:  // equal ordered
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 1
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		ESAC
-	ENDFOR
-ENDFOR
-
-// aggregate results
-CASE (imm8[3:2]) OF
-0:  // equal any
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound
-			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
-		ENDFOR
-	ENDFOR
-1:  // ranges
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound, j += 2
-			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
-		ENDFOR
-	ENDFOR
-2:  // equal each
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		IntRes1[i] := BoolRes[i][i]
-	ENDFOR
-3:  // equal ordered
-	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
-	FOR i := 0 to UpperBound
-		k := i
-		FOR j := 0 to UpperBound-i
-			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
-			k++
-		ENDFOR
-	ENDFOR
-ESAC
-
-// optionally negate results
-bInvalid := 0
-FOR i := 0 to UpperBound
-	IF imm8[4]
-		IF imm8[5] // only negate valid
-			IF b[n+size-1:n] == 0
-				bInvalid := 1
-			FI
-			IF bInvalid // invalid, don't negate
-				IntRes2[i] := IntRes1[i]
-			ELSE // valid, negate
-				IntRes2[i] := -1 XOR IntRes1[i]
-			FI
-		ELSE // negate all
-			IntRes2[i] := -1 XOR IntRes1[i]
-		FI
-	ELSE // don't negate
-		IntRes2[i] := IntRes1[i]
-	FI
-ENDFOR
-
-// output
-dst := IntRes2[0]
-	</operation>
-	<instruction name='pcmpistri' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpistra'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if "b" did not contain a null character and the resulting mask was zero, and 0 otherwise.
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-// compare all characters
-aInvalid := 0
-bInvalid := 0
-FOR i := 0 to UpperBound
-	m := i*size
-	FOR j := 0 to UpperBound
-		n := j*size
-		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
-		
-		// invalidate characters after EOS
-		IF a[m+size-1:m] == 0
-			aInvalid := 1
-		FI
-		IF b[n+size-1:n] == 0
-			bInvalid := 1
-		FI
-		
-		// override comparisons for invalid characters
-		CASE (imm8[3:2]) OF
-		0:  // equal any
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		1:  // ranges
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		2:  // equal each
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		3:  // equal ordered
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 1
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		ESAC
-	ENDFOR
-ENDFOR
-
-// aggregate results
-CASE (imm8[3:2]) OF
-0:  // equal any
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound
-			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
-		ENDFOR
-	ENDFOR
-1:  // ranges
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound, j += 2
-			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
-		ENDFOR
-	ENDFOR
-2:  // equal each
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		IntRes1[i] := BoolRes[i][i]
-	ENDFOR
-3:  // equal ordered
-	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
-	FOR i := 0 to UpperBound
-		k := i
-		FOR j := 0 to UpperBound-i
-			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
-			k++
-		ENDFOR
-	ENDFOR
-ESAC
-
-// optionally negate results
-bInvalid := 0
-FOR i := 0 to UpperBound
-	IF imm8[4]
-		IF imm8[5] // only negate valid
-			IF b[n+size-1:n] == 0
-				bInvalid := 1
-			FI
-			IF bInvalid // invalid, don't negate
-				IntRes2[i] := IntRes1[i]
-			ELSE // valid, negate
-				IntRes2[i] := -1 XOR IntRes1[i]
-			FI
-		ELSE // negate all
-			IntRes2[i] := -1 XOR IntRes1[i]
-		FI
-	ELSE // don't negate
-		IntRes2[i] := IntRes1[i]
-	FI
-ENDFOR
-
-// output
-dst := (IntRes2 == 0) AND bInvalid
-	</operation>
-	<instruction name='pcmpistri' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='__m128i' name='_mm_cmpestrm'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='la' type='int'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname='lb' type='int'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and store the generated mask in "dst".
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-// compare all characters
-aInvalid := 0
-bInvalid := 0
-FOR i := 0 to UpperBound
-	m := i*size
-	FOR j := 0 to UpperBound
-		n := j*size
-		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
-		
-		// invalidate characters after EOS
-		IF i == la
-			aInvalid := 1
-		FI
-		IF j == lb
-			bInvalid := 1
-		FI
-		
-		// override comparisons for invalid characters
-		CASE (imm8[3:2]) OF
-		0:  // equal any
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		1:  // ranges
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		2:  // equal each
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		3:  // equal ordered
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 1
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		ESAC
-	ENDFOR
-ENDFOR
-
-// aggregate results
-CASE (imm8[3:2]) OF
-0:  // equal any
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound
-			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
-		ENDFOR
-	ENDFOR
-1:  // ranges
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound, j += 2
-			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
-		ENDFOR
-	ENDFOR
-2:  // equal each
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		IntRes1[i] := BoolRes[i][i]
-	ENDFOR
-3:  // equal ordered
-	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
-	FOR i := 0 to UpperBound
-		k := i
-		FOR j := 0 to UpperBound-i
-			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
-			k++
-		ENDFOR
-	ENDFOR
-ESAC
-
-// optionally negate results
-FOR i := 0 to UpperBound
-	IF imm8[4]
-		IF imm8[5] // only negate valid
-			IF i &gt;= lb // invalid, don't negate
-				IntRes2[i] := IntRes1[i]
-			ELSE // valid, negate
-				IntRes2[i] := -1 XOR IntRes1[i]
-			FI
-		ELSE // negate all
-			IntRes2[i] := -1 XOR IntRes1[i]
-		FI
-	ELSE // don't negate
-		IntRes2[i] := IntRes1[i]
-	FI
-ENDFOR
-
-// output
-IF imm8[6] // byte / word mask
-	FOR i := 0 to UpperBound
-		j := i*size
-		IF IntRes2[i]
-			dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF)
-		ELSE
-			dst[j+size-1:j] := 0
-		FI
-	ENDFOR
-ELSE // bit mask
-	dst[UpperBound:0] := IntRes[UpperBound:0]
-	dst[127:UpperBound+1] := 0
-FI
-	</operation>
-	<instruction name='pcmpestrm' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpestri'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='la' type='int'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname='lb' type='int'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and store the generated index in "dst".
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-// compare all characters
-aInvalid := 0
-bInvalid := 0
-FOR i := 0 to UpperBound
-	m := i*size
-	FOR j := 0 to UpperBound
-		n := j*size
-		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
-		
-		// invalidate characters after EOS
-		IF i == la
-			aInvalid := 1
-		FI
-		IF j == lb
-			bInvalid := 1
-		FI
-		
-		// override comparisons for invalid characters
-		CASE (imm8[3:2]) OF
-		0:  // equal any
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		1:  // ranges
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			FI
-		2:  // equal each
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 0
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		3:  // equal ordered
-			IF (!aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 0
-			ELSE IF (aInvalid &amp;&amp; !bInvalid)
-				BoolRes[i][j] := 1
-			ELSE If (aInvalid &amp;&amp; bInvalid)
-				BoolRes[i][j] := 1
-			FI
-		ESAC
-	ENDFOR
-ENDFOR
-
-// aggregate results
-CASE (imm8[3:2]) OF
-0:  // equal any
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound
-			IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
-		ENDFOR
-	ENDFOR
-1:  // ranges
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		FOR j := 0 to UpperBound, j += 2
-			IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
-		ENDFOR
-	ENDFOR
-2:  // equal each
-	IntRes1 := 0
-	FOR i := 0 to UpperBound
-		IntRes1[i] := BoolRes[i][i]
-	ENDFOR
-3:  // equal ordered
-	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
-	FOR i := 0 to UpperBound
-		k := i
-		FOR j := 0 to UpperBound-i
-			IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
-			k++
-		ENDFOR
-	ENDFOR
-ESAC
-
-// optionally negate results
-FOR i := 0 to UpperBound
-	IF imm8[4]
-		IF imm8[5] // only negate valid
-			IF i &gt;= lb // invalid, don't negate
-				IntRes2[i] := IntRes1[i]
-			ELSE // valid, negate
-				IntRes2[i] := -1 XOR IntRes1[i]
-			FI
-		ELSE // negate all
-			IntRes2[i] := -1 XOR IntRes1[i]
-		FI
-	ELSE // don't negate
-		IntRes2[i] := IntRes1[i]
-	FI
-ENDFOR
-
-// output
-IF imm8[6] // most significant bit
-	tmp := UpperBound
-	dst := tmp
-	DO WHILE ((tmp &gt;= 0) AND a[tmp] = 0)
-		tmp := tmp - 1
-		dst := tmp
-	OD
-ELSE // least significant bit
-	tmp := 0
-	dst := tmp
-	DO WHILE ((tmp &lt;= UpperBound) AND a[tmp] = 0)
-		tmp := tmp + 1
-		dst := tmp
-	OD
-FI
-	</operation>
-	<instruction name='pcmpestri' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpestrz'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='la' type='int'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname='lb' type='int'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if any character in "b" was null, and 0 otherwise.
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-dst := (lb &lt;= UpperBound)
-	</operation>
-	<instruction name='pcmpestri' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpestrc'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='la' type='int'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname='lb' type='int'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if the resulting mask was non-zero, and 0 otherwise.
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-// compare all characters
-aInvalid := 0
-bInvalid := 0
-FOR i := 0 to UpperBound
-	m := i*size
-	FOR j := 0 to UpperBound
-		n := j*size
-		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
-		
-		// invalidate characters after EOS
-		IF i == la
-			aInvalid := 1
-		FI
-		IF j == lb
-			bInvalid := 1
-		FI
-		
-		// override comparisons for invalid characters
-		CASE (imm8[3:2]) OF
-			0:  // equal any
-				IF (!aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				ELSE IF (aInvalid &amp;&amp; !bInvalid)
-					BoolRes[i][j] := 0
-				ELSE If (aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				FI
-			1:  // ranges
-				IF (!aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				ELSE IF (aInvalid &amp;&amp; !bInvalid)
-					BoolRes[i][j] := 0
-				ELSE If (aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				FI
-			2:  // equal each
-				IF (!aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				ELSE IF (aInvalid &amp;&amp; !bInvalid)
-					BoolRes[i][j] := 0
-				ELSE If (aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 1
-				FI
-			3:  // equal ordered
-				IF (!aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				ELSE IF (aInvalid &amp;&amp; !bInvalid)
-					BoolRes[i][j] := 1
-				ELSE If (aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 1
-				FI
-		ESAC
-	ENDFOR
-ENDFOR
-
-// aggregate results
-CASE (imm8[3:2]) OF
-	0:  // equal any
-		IntRes1 := 0
-		FOR i := 0 to UpperBound
-			FOR j := 0 to UpperBound
-				IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
-			ENDFOR
-		ENDFOR
-	1:  // ranges
-		IntRes1 := 0
-		FOR i := 0 to UpperBound
-			FOR j := 0 to UpperBound, j += 2
-				IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
-			ENDFOR
-		ENDFOR
-	2:  // equal each
-		IntRes1 := 0
-		FOR i := 0 to UpperBound
-			IntRes1[i] := BoolRes[i][i]
-		ENDFOR
-	3:  // equal ordered
-		IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
-		FOR i := 0 to UpperBound
-			k := i
-			FOR j := 0 to UpperBound-i
-				IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
-				k++
-			ENDFOR
-		ENDFOR
-ESAC
-
-// optionally negate results
-FOR i := 0 to UpperBound
-	IF imm8[4]
-		IF imm8[5] // only negate valid
-			IF i &gt;= lb // invalid, don't negate
-				IntRes2[i] := IntRes1[i]
-			ELSE // valid, negate
-				IntRes2[i] := -1 XOR IntRes1[i]
-			FI
-		ELSE // negate all
-			IntRes2[i] := -1 XOR IntRes1[i]
-		FI
-	ELSE // don't negate
-		IntRes2[i] := IntRes1[i]
-	FI
-ENDFOR
-
-// output
-dst := (IntRes2 != 0)
-	</operation>
-	<instruction name='pcmpestri' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpestrs'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='la' type='int'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname='lb' type='int'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if any character in "a" was null, and 0 otherwise.
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-dst := (la &lt;= UpperBound)
-	</operation>
-	<instruction name='pcmpestri' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpestro'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='la' type='int'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname='lb' type='int'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns bit 0 of the resulting bit mask.
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-// compare all characters
-aInvalid := 0
-bInvalid := 0
-FOR i := 0 to UpperBound
-	m := i*size
-	FOR j := 0 to UpperBound
-		n := j*size
-		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
-		
-		// invalidate characters after EOS
-		IF i == la
-			aInvalid := 1
-		FI
-		IF j == lb
-			bInvalid := 1
-		FI
-		
-		// override comparisons for invalid characters
-		CASE (imm8[3:2]) OF
-			0:  // equal any
-				IF (!aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				ELSE IF (aInvalid &amp;&amp; !bInvalid)
-					BoolRes[i][j] := 0
-				ELSE If (aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				FI
-			1:  // ranges
-				IF (!aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				ELSE IF (aInvalid &amp;&amp; !bInvalid)
-					BoolRes[i][j] := 0
-				ELSE If (aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				FI
-			2:  // equal each
-				IF (!aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				ELSE IF (aInvalid &amp;&amp; !bInvalid)
-					BoolRes[i][j] := 0
-				ELSE If (aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 1
-				FI
-			3:  // equal ordered
-				IF (!aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				ELSE IF (aInvalid &amp;&amp; !bInvalid)
-					BoolRes[i][j] := 1
-				ELSE If (aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 1
-				FI
-		ESAC
-	ENDFOR
-ENDFOR
-
-// aggregate results
-CASE (imm8[3:2]) OF
-	0:  // equal any
-		IntRes1 := 0
-		FOR i := 0 to UpperBound
-			FOR j := 0 to UpperBound
-				IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
-			ENDFOR
-		ENDFOR
-	1:  // ranges
-		IntRes1 := 0
-		FOR i := 0 to UpperBound
-			FOR j := 0 to UpperBound, j += 2
-				IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
-			ENDFOR
-		ENDFOR
-	2:  // equal each
-		IntRes1 := 0
-		FOR i := 0 to UpperBound
-			IntRes1[i] := BoolRes[i][i]
-		ENDFOR
-	3:  // equal ordered
-		IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
-		FOR i := 0 to UpperBound
-			k := i
-			FOR j := 0 to UpperBound-i
-				IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
-				k++
-			ENDFOR
-		ENDFOR
-ESAC
-
-// optionally negate results
-FOR i := 0 to UpperBound
-	IF imm8[4]
-		IF imm8[5] // only negate valid
-			IF i &gt;= lb // invalid, don't negate
-				IntRes2[i] := IntRes1[i]
-			ELSE // valid, negate
-				IntRes2[i] := -1 XOR IntRes1[i]
-			FI
-		ELSE // negate all
-			IntRes2[i] := -1 XOR IntRes1[i]
-		FI
-	ELSE // don't negate
-		IntRes2[i] := IntRes1[i]
-	FI
-ENDFOR
-
-// output
-dst := IntRes2[0
-	</operation>
-	<instruction name='pcmpestri' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' dontShowZeroUnmodMsg='TRUE' rettype='int' name='_mm_cmpestra'>
-	<CPUID>SSE4.2</CPUID>
-	<category>String Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='la' type='int'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname='lb' type='int'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if "b" did not contain a null character and the resulting mask was zero, and 0 otherwise.
-	[strcmp_note]
-	</description>
-	<operation>
-size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
-UpperBound := (128 / size) - 1
-
-// compare all characters
-aInvalid := 0
-bInvalid := 0
-FOR i := 0 to UpperBound
-	m := i*size
-	FOR j := 0 to UpperBound
-		n := j*size
-		BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])
-		
-		// invalidate characters after EOS
-		IF i == la
-			aInvalid := 1
-		FI
-		IF j == lb
-			bInvalid := 1
-		FI
-		
-		// override comparisons for invalid characters
-		CASE (imm8[3:2]) OF
-			0:  // equal any
-				IF (!aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				ELSE IF (aInvalid &amp;&amp; !bInvalid)
-					BoolRes[i][j] := 0
-				ELSE If (aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				FI
-			1:  // ranges
-				IF (!aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				ELSE IF (aInvalid &amp;&amp; !bInvalid)
-					BoolRes[i][j] := 0
-				ELSE If (aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				FI
-			2:  // equal each
-				IF (!aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				ELSE IF (aInvalid &amp;&amp; !bInvalid)
-					BoolRes[i][j] := 0
-				ELSE If (aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 1
-				FI
-			3:  // equal ordered
-				IF (!aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 0
-				ELSE IF (aInvalid &amp;&amp; !bInvalid)
-					BoolRes[i][j] := 1
-				ELSE If (aInvalid &amp;&amp; bInvalid)
-					BoolRes[i][j] := 1
-				FI
-		ESAC
-	ENDFOR
-ENDFOR
-
-// aggregate results
-CASE (imm8[3:2]) OF
-	0:  // equal any
-		IntRes1 := 0
-		FOR i := 0 to UpperBound
-			FOR j := 0 to UpperBound
-				IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
-			ENDFOR
-		ENDFOR
-	1:  // ranges
-		IntRes1 := 0
-		FOR i := 0 to UpperBound
-			FOR j := 0 to UpperBound, j += 2
-				IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
-			ENDFOR
-		ENDFOR
-	2:  // equal each
-		IntRes1 := 0
-		FOR i := 0 to UpperBound
-			IntRes1[i] := BoolRes[i][i]
-		ENDFOR
-	3:  // equal ordered
-		IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
-		FOR i := 0 to UpperBound
-			k := i
-			FOR j := 0 to UpperBound-i
-				IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
-				k++
-			ENDFOR
-		ENDFOR
-ESAC
-
-// optionally negate results
-FOR i := 0 to UpperBound
-	IF imm8[4]
-		IF imm8[5] // only negate valid
-			IF i &gt;= lb // invalid, don't negate
-				IntRes2[i] := IntRes1[i]
-			ELSE // valid, negate
-				IntRes2[i] := -1 XOR IntRes1[i]
-			FI
-		ELSE // negate all
-			IntRes2[i] := -1 XOR IntRes1[i]
-		FI
-	ELSE // don't negate
-		IntRes2[i] := IntRes1[i]
-	FI
-ENDFOR
-
-// output
-dst := (IntRes2 == 0) AND (lb &gt; UpperBound)
-	</operation>
-	<instruction name='pcmpestri' form='xmm, xmm, imm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' vexEq='TRUE' rettype='__m128i' name='_mm_cmpgt_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE4.2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ( a[i+63:i] &gt; b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR
-	</operation>
-	<instruction name='pcmpgtq' form='xmm, xmm'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' rettype='unsigned int' name='_mm_crc32_u8'>
-	<type>Integer</type>
-	<CPUID>SSE4.2</CPUID>
-	<category>Cryptography</category>
-	<parameter varname='crc' type='unsigned int'/>
-	<parameter varname='v' type='unsigned char'/>
-	<description>Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 8-bit integer "v", and stores the result in "dst".</description>
-	<operation>
-tmp1[7:0] := v[0:7] // bit reflection
-tmp2[31:0] := crc[0:31] // bit reflection
-tmp3[39:0] := tmp1[7:0] &lt;&lt; 32 
-tmp4[39:0] := tmp2[31:0] &lt;&lt; 8
-tmp5[39:0] := tmp3[39:0] XOR tmp4[39:0]
-tmp6[31:0] := tmp5[39:0] MOD2 0x11EDC6F41
-dst[31:0] := tmp6[0:31] // bit reflection
-	</operation>
-	<instruction name='crc32' form='r32, r8'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' rettype='unsigned int' name='_mm_crc32_u16'>
-	<type>Integer</type>
-	<CPUID>SSE4.2</CPUID>
-	<category>Cryptography</category>
-	<parameter varname='crc' type='unsigned int'/>
-	<parameter varname='v' type='unsigned short'/>
-	<description>Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 16-bit integer "v", and stores the result in "dst".</description>
-	<operation>
-tmp1[15:0] := v[0:15] // bit reflection
-tmp2[31:0] := crc[0:31] // bit reflection
-tmp3[47:0] := tmp1[15:0] &lt;&lt; 32
-tmp4[47:0] := tmp2[31:0] &lt;&lt; 16
-tmp5[47:0] := tmp3[47:0] XOR tmp4[47:0]
-tmp6[31:0] := tmp5[47:0] MOD2 0x11EDC6F41
-dst[31:0] := tmp6[0:31] // bit reflection
-	</operation>
-	<instruction name='crc32' form='r32, r16'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' rettype='unsigned int' name='_mm_crc32_u32'>
-	<type>Integer</type>
-	<CPUID>SSE4.2</CPUID>
-	<category>Cryptography</category>
-	<parameter varname='crc' type='unsigned int'/>
-	<parameter varname='v' type='unsigned int'/>
-	<description>Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 32-bit integer "v", and stores the result in "dst".</description>
-	<operation>
-tmp1[31:0] := v[0:31] // bit reflection
-tmp2[31:0] := crc[0:31] // bit reflection
-tmp3[63:0] := tmp1[31:0] &lt;&lt; 32
-tmp4[63:0] := tmp2[31:0] &lt;&lt; 32
-tmp5[63:0] := tmp3[63:0] XOR tmp4[63:0]
-tmp6[31:0] := tmp5[63:0] MOD2 0x11EDC6F41
-dst[31:0] := tmp6[0:31] // bit reflection
-	</operation>
-	<instruction name='crc32' form='r32, r32'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE4.2' rettype='unsigned __int64' name='_mm_crc32_u64'>
-	<type>Integer</type>
-	<CPUID>SSE4.2</CPUID>
-	<category>Cryptography</category>
-	<parameter varname='crc' type='unsigned __int64'/>
-	<parameter varname='v' type='unsigned __int64'/>
-	<description>Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 64-bit integer "v", and stores the result in "dst".</description>
-	<operation>
-tmp1[63:0] := v[0:63] // bit reflection
-tmp2[31:0] := crc[0:31] // bit reflection
-tmp3[95:0] := tmp1[31:0] &lt;&lt; 32
-tmp4[95:0] := tmp2[63:0] &lt;&lt; 64
-tmp5[95:0] := tmp3[95:0] XOR tmp4[95:0]
-tmp6[31:0] := tmp5[95:0] MOD2 0x11EDC6F41
-dst[31:0] := tmp6[0:31] // bit reflection
-	</operation>
-	<instruction name='crc32' form='r64, r64'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='int' name='_mm_popcnt_u32'>
-	<type>Integer</type>
-	<CPUID>POPCNT</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='unsigned int'/>
-	<description>
-		Count the number of bits set to 1 in unsigned 32-bit integer "a", and return that count in "dst". 
-	</description>
-	<operation>
-dst := 0
-FOR i := 0 to 31
-	IF a[i]
-		dst := dst + 1
-	FI
-ENDFOR
-	</operation>
-	<instruction name='popcnt' form='r32, r32'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='__int64' name='_mm_popcnt_u64'>
-	<type>Integer</type>
-	<CPUID>POPCNT</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='unsigned __int64'/>
-	<description>
-		Count the number of bits set to 1 in unsigned 64-bit integer "a", and return that count in "dst". 
-	</description>
-	<operation>
-dst := 0
-FOR i := 0 to 63
-	IF a[i]
-		dst := dst + 1
-	FI
-ENDFOR
-	</operation>
-	<instruction name='popcnt' form='r64, r64'/>
-	<header>nmmintrin.h</header>
-</intrinsic>
-
-<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_aesenc_si128'>
-	<type>Integer</type>
-	<CPUID>AES</CPUID>
-	<category>Cryptography</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='RoundKey' type='__m128i'/>
-	<description>Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"."</description>
-	<operation>state := a
-a[127:0] := ShiftRows(a[127:0])
-a[127:0] := SubBytes(a[127:0])
-a[127:0] := MixColumns(a[127:0])
-dst[127:0] := a[127:0] XOR RoundKey[127:0]
-	</operation>
-	<instruction name='aesenc' form='xmm, xmm'/>
-	<header>wmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_aesenclast_si128'>
-	<type>Integer</type>
-	<CPUID>AES</CPUID>
-	<category>Cryptography</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='RoundKey' type='__m128i'/>
-	<description>Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"."</description>
-	<operation>state := a
-a[127:0] := ShiftRows(a[127:0])
-a[127:0] := SubBytes(a[127:0])
-dst[127:0] := a[127:0] XOR RoundKey[127:0]
-	</operation>
-	<instruction name='aesenclast' form='xmm, xmm'/>
-	<header>wmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_aesdec_si128'>
-	<type>Integer</type>
-	<CPUID>AES</CPUID>
-	<category>Cryptography</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='RoundKey' type='__m128i'/>
-	<description>Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"."</description>
-	<operation>state := a
-a[127:0] := InvShiftRows(a[127:0])
-a[127:0] := InvSubBytes(a[127:0])
-a[127:0] := InvMixColumns(a[127:0])
-dst[127:0] := a[127:0] XOR RoundKey[127:0]
-	</operation>
-	<instruction name='aesdec' form='xmm, xmm'/>
-	<header>wmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_aesdeclast_si128'>
-	<type>Integer</type>
-	<CPUID>AES</CPUID>
-	<category>Cryptography</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='RoundKey' type='__m128i'/>
-	<description>Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"."</description>
-	<operation>state := a
-a[127:0] := InvShiftRows(a[127:0])
-a[127:0] := InvSubBytes(a[127:0])
-dst[127:0] := a[127:0] XOR RoundKey[127:0]
-	</operation>
-	<instruction name='aesdeclast' form='xmm, xmm'/>
-	<header>wmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_aesimc_si128'>
-	<type>Integer</type>
-	<CPUID>AES</CPUID>
-	<category>Cryptography</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Perform the InvMixColumns transformation on "a" and store the result in "dst".</description>
-	<operation>
-dst[127:0] := InvMixColumns(a[127:0])
-	</operation>
-	<instruction name='aesimc' form='xmm, xmm'/>
-	<header>wmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_aeskeygenassist_si128'>
-	<type>Integer</type>
-	<CPUID>AES</CPUID>
-	<category>Cryptography</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Assist in expanding the AES cipher key by computing steps towards generating a round key for encryption cipher using data from "a" and an 8-bit round constant specified in "imm8", and store the result in "dst"."
-	</description>
-	<operation>
-X3[31:0] := a[127:96]
-X2[31:0] := a[95:64]
-X1[31:0] := a[63:32]
-X0[31:0] := a[31:0]
-RCON[31:0] := ZeroExtend(imm8[7:0]);
-dst[31:0] := SubWord(X1)
-dst[63:32] := (RotWord(SubWord(X1)) XOR RCON;
-dst[95:64] := SubWord(X3)
-dst[127:96] := RotWord(SubWord(X3)) XOR RCON;
-	</operation>
-	<instruction name='aeskeygenassist' form='xmm, xmm, imm'/>
-	<header>wmmintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' vexEq='TRUE' rettype='__m128i' name='_mm_clmulepi64_si128'>
-	<type>Integer</type>
-	<CPUID>PCLMULQDQ</CPUID>
-	<category>Application-Targeted</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname='imm8' type='const int'/>
-	<description>Perform a carry-less multiplication of two 64-bit integers, selected from "a" and "b" according to "imm8", and store the results in "dst".
-	</description>
-	<operation>
-IF (imm8[0] = 0)
-	TEMP1 := a[63:0];
-ELSE
-	TEMP1 := a[127:64];
-FI 
-IF (imm8[4] = 0)
-	TEMP2 := b[63:0];
-ELSE 
-	TEMP2 := b[127:64];
-FI
-
-FOR i := 0 to 63
-	TEMP[i] := (TEMP1[0] and TEMP2[i]);
-	FOR j := 1 to i
-		TEMP [i] := TEMP [i] XOR (TEMP1[j] AND TEMP2[i-j])
-	ENDFOR 
-	dst[i] := TEMP[i];
-ENDFOR
-FOR i := 64 to 127
-	TEMP [i] := 0;
-	FOR j := (i - 63) to 63
-		TEMP [i] := TEMP [i] XOR (TEMP1[j] AND TEMP2[i-j])
-	ENDFOR
-	dst[i] := TEMP[i];
-ENDFOR
-dst[127] := 0
-	</operation>
-	<instruction name='pclmulqdq' form='xmm, xmm, imm'/>
-	<header>wmmintrin.h</header>
-</intrinsic>
-
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_add_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := a[i+63:i] + b[i+63:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vaddpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_add_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := a[i+31:i] + b[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vaddps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_addsub_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF (j is even) 
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vaddsubpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_addsub_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF (j is even) 
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vaddsubps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_and_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-		<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vandpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_and_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vandps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_andnot_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vandnpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_andnot_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vandnps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_blend_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF imm8[j%8]
-		dst[i+63:i] := b[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vblendpd' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_blend_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF imm8[j%8]
-		dst[i+31:i] := b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vblendps' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_blendv_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<parameter varname='mask' type='__m256d'/>
-	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF mask[i+63]
-		dst[i+63:i] := b[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vblendvpd' form='ymm, ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_blendv_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<parameter varname='mask' type='__m256'/>
-	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF mask[i+31]
-		dst[i+31:i] := b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vblendvps' form='ymm, ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_div_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	dst[i+63:i] := a[i+63:i] / b[i+63:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vdivpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_div_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := a[i+31:i] / b[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vdivps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_dp_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8".</description>
-	<operation>
-DP(a[127:0], b[127:0], imm8[7:0]) {
-	FOR j := 0 to 3
-		i := j*32
-		IF imm8[(4+j)%8]
-			temp[i+31:i] := a[i+31:i] * b[i+31:i]
-		ELSE
-			temp[i+31:i] := 0
-		FI
-	ENDFOR
-	
-	sum[31:0] := (temp[127:96] + temp[95:64]) + (temp[63:32] + temp[31:0])
-	
-	FOR j := 0 to 3
-		i := j*32
-		IF imm8[j%8]
-			tmpdst[i+31:i] := sum[31:0]
-		ELSE
-			tmpdst[i+31:i] := 0
-		FI
-	ENDFOR
-	RETURN tmpdst[127:0]
-}
-
-dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0])
-dst[255:128] := DP(a[255:128], b[255:128], imm8[7:0])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vdpps' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_hadd_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
-	<operation>
-dst[63:0] := a[127:64] + a[63:0]
-dst[127:64] := b[127:64] + b[63:0]
-dst[191:128] := a[255:192] + a[191:128]
-dst[255:192] := b[255:192] + b[191:128]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vhaddpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_hadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
-	<operation>
-dst[31:0] := a[63:32] + a[31:0]
-dst[63:32] := a[127:96] + a[95:64]
-dst[95:64] := b[63:32] + b[31:0]
-dst[127:96] := b[127:96] + b[95:64]
-dst[159:128] := a[191:160] + a[159:128]
-dst[191:160] := a[255:224] + a[223:192]
-dst[223:192] := b[191:160] + b[159:128]
-dst[255:224] := b[255:224] + b[223:192]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vhaddps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_hsub_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
-	<operation>
-dst[63:0] := a[63:0] - a[127:64]
-dst[127:64] := b[63:0] - b[127:64]
-dst[191:128] := a[191:128] - a[255:192]
-dst[255:192] := b[191:128] - b[255:192]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vhsubpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_hsub_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
-	<operation>
-dst[31:0] := a[31:0] - a[63:32]
-dst[63:32] := a[95:64] - a[127:96]
-dst[95:64] := b[31:0] - b[63:32]
-dst[127:96] := b[95:64] - b[127:96]
-dst[159:128] := a[159:128] - a[191:160]
-dst[191:160] := a[223:192] - a[255:224]
-dst[223:192] := b[159:128] - b[191:160]
-dst[255:224] := b[223:192] - b[255:224]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vhsubps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_max_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmaxpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_max_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmaxps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_min_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vminpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_min_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vminps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_mul_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := a[i+63:i] * b[i+63:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmulpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_mul_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := a[i+31:i] * b[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmulps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_or_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vorpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_or_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vorps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_shuffle_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst". </description>
-	<operation>
-dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
-dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
-dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
-dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vshufpd' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_shuffle_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-dst[95:64] := SELECT4(b[127:0], imm8[5:4])
-dst[127:96] := SELECT4(b[127:0], imm8[7:6])
-dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-dst[223:192] := SELECT4(b[255:128], imm8[5:4])
-dst[255:224] := SELECT4(b[255:128], imm8[7:6])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vshufps' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_sub_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := a[i+63:i] - b[i+63:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vsubpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_sub_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := a[i+31:i] - b[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vsubps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_xor_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vxorpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_xor_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vxorps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128d' name='_mm_cmp_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ( a[i+63:i] OP b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcmppd' form='xmm, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_cmp_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ( a[i+63:i] OP b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcmppd' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128' name='_mm_cmp_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] OP b[i+31:i] ) ? 0xFFFFFFFF : 0
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcmpps' form='xmm, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_cmp_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] OP b[i+31:i] ) ? 0xFFFFFFFF : 0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcmpps' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128d' name='_mm_cmp_sd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-
-dst[63:0] := ( a[63:0] OP b[63:0] ) ? 0xFFFFFFFFFFFFFFFF : 0
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcmpsd' form='xmm, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128' name='_mm_cmp_ss'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". </description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-
-dst[31:0] := ( a[31:0] OP b[31:0] ) ? 0xFFFFFFFF : 0
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcmpss' form='xmm, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_cvtepi32_pd'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	m := j*64
-	dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtdq2pd' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_cvtepi32_ps'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m256i'/>
-	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtdq2ps' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128' name='_mm256_cvtpd_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtpd2ps' form='xmm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_cvtps_epi32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtps2dq' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_cvtps_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	k := 32*j
-	dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtps2pd' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128i' name='_mm256_cvttpd_epi32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvttpd2dq' form='xmm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128i' name='_mm256_cvtpd_epi32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtpd2dq' form='xmm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_cvttps_epi32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvttps2dq' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128' name='_mm256_extractf128_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-ESAC
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vextractf128' form='xmm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128d' name='_mm256_extractf128_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-ESAC
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vextractf128' form='xmm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128i' name='_mm256_extractf128_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Extract 128 bits (composed of integer data) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-ESAC
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vextractf128' form='xmm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' sequence='true' rettype='__int8' name='_mm256_extract_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="index" type='const int'/>
-	<description>Extract an 8-bit integer from "a", selected with "index", and store the result in "dst".</description>
-	<operation>
-dst[7:0] := (a[255:0] &gt;&gt; (index * 8))[7:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' sequence='true' rettype='__int16' name='_mm256_extract_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="index" type='const int'/>
-	<description>Extract a 16-bit integer from "a", selected with "index", and store the result in "dst".</description>
-	<operation>
-dst[15:0] := (a[255:0] &gt;&gt; (index * 16))[15:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__int32' name='_mm256_extract_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="index" type='const int'/>
-	<description>Extract a 32-bit integer from "a", selected with "index", and store the result in "dst".</description>
-	<operation>
-dst[31:0] := (a[255:0] &gt;&gt; (index * 32))[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__int64' name='_mm256_extract_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="index" type='const int'/>
-	<description>Extract a 64-bit integer from "a", selected with "index", and store the result in "dst".</description>
-	<operation>
-dst[63:0] := (a[255:0] &gt;&gt; (index * 64))[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm256_zeroall'>
-	<CPUID>AVX</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Zero the contents of all XMM or YMM registers.</description>
-	<operation>
-YMM0[MAX:0] := 0
-YMM1[MAX:0] := 0
-YMM2[MAX:0] := 0
-YMM3[MAX:0] := 0
-YMM4[MAX:0] := 0
-YMM5[MAX:0] := 0
-YMM6[MAX:0] := 0
-YMM7[MAX:0] := 0
-IF 64-bit mode
-	YMM8[MAX:0] := 0
-	YMM9[MAX:0] := 0
-	YMM10[MAX:0] := 0
-	YMM11[MAX:0] := 0
-	YMM12[MAX:0] := 0
-	YMM13[MAX:0] := 0
-	YMM14[MAX:0] := 0
-	YMM15[MAX:0] := 0
-FI
-</operation>
-	<instruction name='vzeroall' form=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm256_zeroupper'>
-	<CPUID>AVX</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Zero the upper 128 bits of all YMM registers; the lower 128-bits of the registers are unmodified.</description>
-	<operation>
-YMM0[MAX:128] := 0
-YMM1[MAX:128] := 0
-YMM2[MAX:128] := 0
-YMM3[MAX:128] := 0
-YMM4[MAX:128] := 0
-YMM5[MAX:128] := 0
-YMM6[MAX:128] := 0
-YMM7[MAX:128] := 0
-IF 64-bit mode
-	YMM8[MAX:128] := 0
-	YMM9[MAX:128] := 0
-	YMM10[MAX:128] := 0
-	YMM11[MAX:128] := 0
-	YMM12[MAX:128] := 0
-	YMM13[MAX:128] := 0
-	YMM14[MAX:128] := 0
-	YMM15[MAX:128] := 0
-FI
-</operation>
-	<instruction name='vzeroupper' form=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_permutevar_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := SELECT4(a[127:0], b[1:0])
-dst[63:32] := SELECT4(a[127:0], b[33:32])
-dst[95:64] := SELECT4(a[127:0], b[65:64])
-dst[127:96] := SELECT4(a[127:0], b[97:96])
-dst[159:128] := SELECT4(a[255:128], b[129:128])
-dst[191:160] := SELECT4(a[255:128], b[161:160])
-dst[223:192] := SELECT4(a[255:128], b[193:192])
-dst[255:224] := SELECT4(a[255:128], b[225:224])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpermilps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128' name='_mm_permutevar_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := SELECT4(a[127:0], b[1:0])
-dst[63:32] := SELECT4(a[127:0], b[33:32])
-dst[95:64] := SELECT4(a[127:0], b[65:64])
-dst[127:96] := SELECT4(a[127:0], b[97:96])
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpermilps' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_permute_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-dst[223:192] := SELECT4(a[255:128], imm8[5:4])
-dst[255:224] := SELECT4(a[255:128], imm8[7:6])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpermilps' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128' name='_mm_permute_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpermilps' form='xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_permutevar_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst".</description>
-	<operation>
-IF (b[1] == 0) dst[63:0] := a[63:0]
-IF (b[1] == 1) dst[63:0] := a[127:64]
-IF (b[65] == 0) dst[127:64] := a[63:0]
-IF (b[65] == 1) dst[127:64] := a[127:64]
-IF (b[129] == 0) dst[191:128] := a[191:128]
-IF (b[129] == 1) dst[191:128] := a[255:192]
-IF (b[193] == 0) dst[255:192] := a[191:128]
-IF (b[193] == 1) dst[255:192] := a[255:192]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpermilpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128d' name='_mm_permutevar_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst".</description>
-	<operation>
-IF (b[1] == 0) dst[63:0] := a[63:0]
-IF (b[1] == 1) dst[63:0] := a[127:64]
-IF (b[65] == 0) dst[127:64] := a[63:0]
-IF (b[65] == 1) dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpermilpd' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_permute_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-IF (imm8[0] == 0) dst[63:0] := a[63:0]
-IF (imm8[0] == 1) dst[63:0] := a[127:64]
-IF (imm8[1] == 0) dst[127:64] := a[63:0]
-IF (imm8[1] == 1) dst[127:64] := a[127:64]
-IF (imm8[2] == 0) dst[191:128] := a[191:128]
-IF (imm8[2] == 1) dst[191:128] := a[255:192]
-IF (imm8[3] == 0) dst[255:192] := a[191:128]
-IF (imm8[3] == 1) dst[255:192] := a[255:192]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpermilpd' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128d' name='_mm_permute_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-IF (imm8[0] == 0) dst[63:0] := a[63:0]
-IF (imm8[0] == 1) dst[63:0] := a[127:64]
-IF (imm8[1] == 0) dst[127:64] := a[63:0]
-IF (imm8[1] == 1) dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpermilpd' form='xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_permute2f128_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shuffle 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". </description>
-	<operation>
-SELECT4(src1, src2, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src1[127:0]
-	1:	tmp[127:0] := src1[255:128]
-	2:	tmp[127:0] := src2[127:0]
-	3:	tmp[127:0] := src2[255:128]
-	ESAC
-	IF control[3]
-		tmp[127:0] := 0
-	FI
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
-dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vperm2f128' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_permute2f128_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shuffle 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". </description>
-	<operation>
-SELECT4(src1, src2, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src1[127:0]
-	1:	tmp[127:0] := src1[255:128]
-	2:	tmp[127:0] := src2[127:0]
-	3:	tmp[127:0] := src2[255:128]
-	ESAC
-	IF control[3]
-		tmp[127:0] := 0
-	FI
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
-dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vperm2f128' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_permute2f128_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shuffle 128-bits (composed of integer data) selected by "imm8" from "a" and "b", and store the results in "dst". </description>
-	<operation>
-SELECT4(src1, src2, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src1[127:0]
-	1:	tmp[127:0] := src1[255:128]
-	2:	tmp[127:0] := src2[127:0]
-	3:	tmp[127:0] := src2[255:128]
-	ESAC
-	IF control[3]
-		tmp[127:0] := 0
-	FI
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
-dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vperm2f128' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_broadcast_ss'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='float const *'/>
-	<description>Broadcast a single-precision (32-bit) floating-point element from memory to all elements of "dst".</description>
-	<operation>
-tmp[31:0] = MEM[mem_addr+31:mem_addr]
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := tmp[31:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vbroadcastss' form='ymm, m32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128' name='_mm_broadcast_ss'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<category>Swizzle</category>
-	<parameter varname='mem_addr' type='float const *'/>
-	<description>Broadcast a single-precision (32-bit) floating-point element from memory to all elements of "dst".</description>
-	<operation>
-tmp[31:0] = MEM[mem_addr+31:mem_addr]
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := tmp[31:0]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vbroadcastss' form='xmm, m32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_broadcast_sd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<category>Swizzle</category>
-	<parameter varname='mem_addr' type='double const *'/>
-	<description>Broadcast a double-precision (64-bit) floating-point element from memory to all elements of "dst".</description>
-	<operation>
-tmp[63:0] = MEM[mem_addr+63:mem_addr]
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := tmp[63:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vbroadcastsd' form='ymm, m64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_broadcast_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<category>Swizzle</category>
-	<parameter varname='mem_addr' type='__m128 const *'/>
-	<description>Broadcast 128 bits from memory (composed of 4 packed single-precision (32-bit) floating-point elements) to all elements of "dst".</description>
-	<operation>
-tmp[127:0] = MEM[mem_addr+127:mem_addr]
-dst[127:0] := tmp[127:0]
-dst[255:128] := tmp[127:0]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vbroadcastf128' form='ymm, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_broadcast_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<category>Swizzle</category>
-	<parameter varname='mem_addr' type='__m128d const *'/>
-	<description>Broadcast 128 bits from memory (composed of 2 packed double-precision (64-bit) floating-point elements) to all elements of "dst".</description>
-	<operation>
-tmp[127:0] = MEM[mem_addr+127:mem_addr]
-dst[127:0] := tmp[127:0]
-dst[255:128] := tmp[127:0]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vbroadcastf128' form='ymm, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_insertf128_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[255:0] := a[255:0]
-CASE (imm8[1:0]) of
-0: dst[127:0] := b[127:0]
-1: dst[255:128] := b[127:0]
-ESAC
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_insertf128_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[255:0] := a[255:0]
-CASE imm8[7:0] of
-0: dst[127:0] := b[127:0]
-1: dst[255:128] := b[127:0]
-ESAC
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_insertf128_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Copy "a" to "dst", then insert 128 bits from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[255:0] := a[255:0]
-CASE (imm8[1:0]) of
-0: dst[127:0] := b[127:0]
-1: dst[255:128] := b[127:0]
-ESAC
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_insert_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='i' type='__int8'/>
-	<parameter varname="index" type='const int'/>
-	<description>Copy "a" to "dst", and insert the 8-bit integer "i" into "dst" at the location specified by "index". </description>
-	<operation>
-dst[255:0] := a[255:0]
-sel := index*8
-dst[sel+7:sel] := i[7:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_insert_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='i' type='__int16'/>
-	<parameter varname="index" type='const int'/>
-	<description>Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "index". </description>
-	<operation>
-dst[255:0] := a[255:0]
-sel := index*16
-dst[sel+15:sel] := i[15:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_insert_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='i' type='__int32'/>
-	<parameter varname="index" type='const int'/>
-	<description>Copy "a" to "dst", and insert the 32-bit integer "i" into "dst" at the location specified by "index". </description>
-	<operation>
-dst[255:0] := a[255:0]
-sel := index*32
-dst[sel+31:sel] := i[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_insert_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='i' type='__int64'/>
-	<parameter varname="index" type='const int'/>
-	<description>Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "index". </description>
-	<operation>
-dst[255:0] := a[255:0]
-sel := index*64
-dst[sel+63:sel] := i[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_load_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='double const *'/>
-	<description>Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into "dst".
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[255:0] := MEM[mem_addr+255:mem_addr]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmovapd' form='ymm, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm256_store_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double *'/>
-	<parameter varname='a' type='__m256d'/>
-	<description>Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory.
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+255:mem_addr] := a[255:0]
-	</operation>
-	<instruction name='vmovapd' form='m256, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_load_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='float const *'/>
-	<description>Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into "dst".
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[255:0] := MEM[mem_addr+255:mem_addr]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmovaps' form='ymm, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm256_store_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='float *'/>
-	<parameter varname='a' type='__m256'/>
-	<description>Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory.
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+255:mem_addr] := a[255:0]
-	</operation>
-	<instruction name='vmovaps' form='m256, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_loadu_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='double const *'/>
-	<description>Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into "dst".
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[255:0] := MEM[mem_addr+255:mem_addr]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmovupd' form='ymm, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm256_storeu_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double *'/>
-	<parameter varname='a' type='__m256d'/>
-	<description>Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory.
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[mem_addr+255:mem_addr] := a[255:0]
-	</operation>
-	<instruction name='vmovupd' form='m256, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_loadu_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='float const *'/>
-	<description>Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into "dst".
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[255:0] := MEM[mem_addr+255:mem_addr]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmovups' form='ymm, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm256_storeu_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='float *'/>
-	<parameter varname='a' type='__m256'/>
-	<description>Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory.
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[mem_addr+255:mem_addr] := a[255:0]
-	</operation>
-	<instruction name='vmovups' form='m256, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_load_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='__m256i const *'/>
-	<description>Load 256-bits of integer data from memory into "dst".
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[255:0] := MEM[mem_addr+255:mem_addr]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmovdqa' form='ymm, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm256_store_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='__m256i *'/>
-	<parameter varname='a' type='__m256i'/>
-	<description>Store 256-bits of integer data from "a" into memory.
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+255:mem_addr] := a[255:0]
-	</operation>
-	<instruction name='vmovdqa' form='m256, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_loadu_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='__m256i const *'/>
-	<description>Load 256-bits of integer data from memory into "dst".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-dst[255:0] := MEM[mem_addr+255:mem_addr]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmovdqu' form='ymm, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm256_storeu_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='__m256i *'/>
-	<parameter varname='a' type='__m256i'/>
-	<description>Store 256-bits of integer data from "a" into memory.
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-MEM[mem_addr+255:mem_addr] := a[255:0]
-	</operation>
-	<instruction name='vmovdqu' form='m256, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_maskload_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='double const *'/>
-	<parameter varname='mask' type='__m256i'/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF mask[i+63]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmaskmovpd' form='ymm, ymm, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm256_maskstore_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double *'/>
-	<parameter varname='mask' type='__m256i'/>
-	<parameter varname='a' type='__m256d'/>
-	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using "mask".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF mask[i+63]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vmaskmovpd' form='m256, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128d' name='_mm_maskload_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='double const *'/>
-	<parameter varname='mask' type='__m128i'/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF mask[i+63]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmaskmovpd' form='xmm, xmm, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm_maskstore_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double *'/>
-	<parameter varname='mask' type='__m128i'/>
-	<parameter varname='a' type='__m128d'/>
-	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using "mask".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF mask[i+63]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vmaskmovpd' form='m128, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_maskload_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='float const *'/>
-	<parameter varname='mask' type='__m256i'/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF mask[i+31]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmaskmovps' form='ymm, ymm, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm256_maskstore_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='float *'/>
-	<parameter varname='mask' type='__m256i'/>
-	<parameter varname='a' type='__m256'/>
-	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using "mask".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF mask[i+31]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vmaskmovps' form='m256, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128' name='_mm_maskload_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='float const *'/>
-	<parameter varname='mask' type='__m128i'/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF mask[i+31]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmaskmovps' form='xmm, xmm, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm_maskstore_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='float *'/>
-	<parameter varname='mask' type='__m128i'/>
-	<parameter varname='a' type='__m128'/>
-	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using "mask".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF mask[i+31]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vmaskmovps' form='m128, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_movehdup_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Move</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
-	</description>
-	<operation>
-dst[31:0] := a[63:32] 
-dst[63:32] := a[63:32] 
-dst[95:64] := a[127:96] 
-dst[127:96] := a[127:96]
-dst[159:128] := a[191:160] 
-dst[191:160] := a[191:160] 
-dst[223:192] := a[255:224] 
-dst[255:224] := a[255:224]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmovshdup' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_moveldup_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Move</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
-	</description>
-	<operation>
-dst[31:0] := a[31:0] 
-dst[63:32] := a[31:0] 
-dst[95:64] := a[95:64] 
-dst[127:96] := a[95:64]
-dst[159:128] := a[159:128] 
-dst[191:160] := a[159:128] 
-dst[223:192] := a[223:192] 
-dst[255:224] := a[223:192]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmovsldup' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_movedup_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Move</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst".
-	</description>
-	<operation>
-dst[63:0] := a[63:0]
-dst[127:64] := a[63:0]
-dst[191:128] := a[191:128]
-dst[255:192] := a[191:128]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmovddup' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_lddqu_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='__m256i const *'/>
-	<description>Load 256-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm256_loadu_si256" when the data crosses a cache line boundary.</description>
-	<operation>
-dst[255:0] := MEM[mem_addr+255:mem_addr]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vlddqu' form='ymm, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm256_stream_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='__m256i *'/>
-	<parameter varname='a' type='__m256i'/>
-	<description>Store 256-bits of integer data from "a" into memory using a non-temporal memory hint.
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+255:mem_addr] := a[255:0]
-	</operation>
-	<instruction name='vmovntdq' form='m256, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm256_stream_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='double *'/>
-	<parameter varname='a' type='__m256d'/>
-	<description>Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint.
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+255:mem_addr] := a[255:0]
-	</operation>
-	<instruction name='vmovntpd' form='m256, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='void' name='_mm256_stream_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='float *'/>
-	<parameter varname='a' type='__m256'/>
-	<description>Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint.
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+255:mem_addr] := a[255:0]
-	</operation>
-	<instruction name='vmovntps' form='m256, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_rcp_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vrcpps' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_rsqrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vrsqrtps' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_sqrt_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := SQRT(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vsqrtpd' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_sqrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := SQRT(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vsqrtps' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_round_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='rounding' type='int'/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed double-precision floating-point elements in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ROUND(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vroundpd' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_round_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='rounding' type='int'/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ROUND(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vroundps' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_unpackhi_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vunpckhpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_unpackhi_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vunpckhps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_unpacklo_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vunpcklpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_unpacklo_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vunpcklps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm256_testz_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
-	<operation>
-IF (a[255:0] AND b[255:0] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-IF ((NOT a[255:0]) AND b[255:0] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-RETURN ZF
-	</operation>
-	<instruction name='vptest' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm256_testc_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
-	<operation>
-IF (a[255:0] AND b[255:0] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-IF ((NOT a[255:0]) AND b[255:0] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-RETURN CF
-	</operation>
-	<instruction name='vptest' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm256_testnzc_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
-	<operation>
-IF (a[255:0] AND b[255:0] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-IF ((NOT a[255:0]) AND b[255:0] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-IF (ZF == 0 &amp;&amp; CF == 0)
-	RETURN 1
-ELSE
-	RETURN 0
-FI
-	</operation>
-	<instruction name='vptest' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm256_testz_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
-	<operation>
-tmp[255:0] := a[255:0] AND b[255:0]
-IF (tmp[63] == tmp[127] == tmp[191] == tmp[255] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-tmp[255:0] := (NOT a[255:0]) AND b[255:0]
-IF (tmp[63] == tmp[127] == tmp[191] == tmp[255] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-RETURN ZF
-	</operation>
-	<instruction name='vtestpd' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm256_testc_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
-	<operation>
-tmp[255:0] := a[255:0] AND b[255:0]
-IF (tmp[63] == tmp[127] == tmp[191] == tmp[255] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-tmp[255:0] := (NOT a[255:0]) AND b[255:0]
-IF (tmp[63] == tmp[127] == tmp[191] == tmp[255] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-RETURN CF
-	</operation>
-	<instruction name='vtestpd' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm256_testnzc_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
-	<operation>
-tmp[255:0] := a[255:0] AND b[255:0]
-IF (tmp[63] == tmp[127] == tmp[191] == tmp[255] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-tmp[255:0] := (NOT a[255:0]) AND b[255:0]
-IF (tmp[63] == tmp[127] == tmp[191] == tmp[255] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-IF (ZF == 0 &amp;&amp; CF == 0)
-	RETURN 1
-ELSE
-	RETURN 0
-FI
-	</operation>
-	<instruction name='vtestpd' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm_testz_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
-	<operation>
-tmp[127:0] := a[127:0] AND b[127:0]
-IF (tmp[63] == tmp[127] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-tmp[127:0] := (NOT a[127:0]) AND b[127:0]
-IF (tmp[63] == tmp[127] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-RETURN ZF
-	</operation>
-	<instruction name='vtestpd' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm_testc_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
-	<operation>
-tmp[127:0] := a[127:0] AND b[127:0]
-IF (tmp[63] == tmp[127] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-tmp[127:0] := (NOT a[127:0]) AND b[127:0]
-IF (tmp[63] == tmp[127] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-RETURN CF
-	</operation>
-	<instruction name='vtestpd' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm_testnzc_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
-	<operation>
-tmp[127:0] := a[127:0] AND b[127:0]
-IF (tmp[63] == tmp[127] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-tmp[127:0] := (NOT a[127:0]) AND b[127:0]
-IF (tmp[63] == tmp[127] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-IF (ZF == 0 &amp;&amp; CF == 0)
-	RETURN 1
-ELSE
-	RETURN 0
-FI
-	</operation>
-	<instruction name='vtestpd' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm256_testz_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
-	<operation>
-tmp[255:0] := a[255:0] AND b[255:0]
-IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == tmp[159] == tmp[191] == tmp[223] == tmp[255] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-tmp[255:0] := (NOT a[255:0]) AND b[255:0]
-IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == tmp[159] == tmp[191] == tmp[223] == tmp[255] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-RETURN ZF
-	</operation>
-	<instruction name='vtestps' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm256_testc_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
-	<operation>
-tmp[255:0] := a[255:0] AND b[255:0]
-IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == tmp[159] == tmp[191] == tmp[223] == tmp[255] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-tmp[255:0] := (NOT a[255:0]) AND b[255:0]
-IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == tmp[159] == tmp[191] == tmp[223] == tmp[255] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-RETURN CF
-	</operation>
-	<instruction name='vtestps' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm256_testnzc_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
-	<operation>
-tmp[255:0] := a[255:0] AND b[255:0]
-IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == tmp[159] == tmp[191] == tmp[223] == tmp[255]  == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-tmp[255:0] := (NOT a[255:0]) AND b[255:0]
-IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == tmp[159] == tmp[191] == tmp[223] == tmp[255]  == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-IF (ZF == 0 &amp;&amp; CF == 0)
-	RETURN 1
-ELSE
-	RETURN 0
-FI
-	</operation>
-	<instruction name='vtestps' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm_testz_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<description>Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
-	<operation>
-tmp[127:0] := a[127:0] AND b[127:0]
-IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-tmp[127:0] := (NOT a[127:0]) AND b[127:0]
-IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-RETURN ZF
-	</operation>
-	<instruction name='vtestps' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm_testc_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<description>Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
-	<operation>
-tmp[127:0] := a[127:0] AND b[127:0]
-IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-tmp[127:0] := (NOT a[127:0]) AND b[127:0]
-IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-RETURN CF
-	</operation>
-	<instruction name='vtestps' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm_testnzc_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<description>Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
-	<operation>
-tmp[127:0] := a[127:0] AND b[127:0]
-IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == 0)
-	ZF := 1
-ELSE
-	ZF := 0
-FI
-tmp[127:0] := (NOT a[127:0]) AND b[127:0]
-IF (tmp[31] == tmp[63] == tmp[95] == tmp[127] == 0)
-	CF := 1
-ELSE
-	CF := 0
-FI
-IF (ZF == 0 &amp;&amp; CF == 0)
-	RETURN 1
-ELSE
-	RETURN 0
-FI
-	</operation>
-	<instruction name='vtestps' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm256_movemask_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Set each bit of mask "dst" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in "a".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF a[i+63]
-		dst[j] := 1
-	ELSE
-		dst[j] := 0
-	FI
-ENDFOR
-dst[MAX:4] := 0
-	</operation>
-	<instruction name='vmovmskpd' form='r32, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='int' name='_mm256_movemask_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Set each bit of mask "dst" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in "a".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF a[i+31]
-		dst[j] := 1
-	ELSE
-		dst[j] := 0
-	FI
-ENDFOR
-dst[MAX:8] := 0
-	</operation>
-	<instruction name='vmovmskps' form='r32, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_setzero_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='' type='void'/>
-	<description>Return vector of type __m256d with all elements set to zero.</description>
-	<operation>
-dst[MAX:0] := 0
-	</operation>
-	<instruction name='vxorpd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_setzero_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='' type='void'/>
-	<description>Return vector of type __m256 with all elements set to zero.</description>
-	<operation>
-dst[MAX:0] := 0
-	</operation>
-	<instruction name='vxorps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_setzero_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='' type='void'/>
-	<description>Return vector of type __m256i with all elements set to zero.</description>
-	<operation>
-dst[MAX:0] := 0
-	</operation>
-	<instruction name='vpxor' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256d' name='_mm256_set_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='e3' type='double'/>
-	<parameter varname='e2' type='double'/>
-	<parameter varname='e1' type='double'/>
-	<parameter varname='e0' type='double'/>
-	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values.</description>
-	<operation>
-dst[63:0] := e0
-dst[127:64] := e1
-dst[191:128] := e2
-dst[255:192] := e3
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256' name='_mm256_set_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='e7' type='float'/>
-	<parameter varname='e6' type='float'/>
-	<parameter varname='e5' type='float'/>
-	<parameter varname='e4' type='float'/>
-	<parameter varname='e3' type='float'/>
-	<parameter varname='e2' type='float'/>
-	<parameter varname='e1' type='float'/>
-	<parameter varname='e0' type='float'/>
-	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values.</description>
-	<operation>
-dst[31:0] := e0
-dst[63:32] := e1
-dst[95:64] := e2
-dst[127:96] := e3
-dst[159:128] := e4
-dst[191:160] := e5
-dst[223:192] := e6
-dst[255:224] := e7
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='e31' type='char'/>
-	<parameter varname='e30' type='char'/>
-	<parameter varname='e29' type='char'/>
-	<parameter varname='e28' type='char'/>
-	<parameter varname='e27' type='char'/>
-	<parameter varname='e26' type='char'/>
-	<parameter varname='e25' type='char'/>
-	<parameter varname='e24' type='char'/>
-	<parameter varname='e23' type='char'/>
-	<parameter varname='e22' type='char'/>
-	<parameter varname='e21' type='char'/>
-	<parameter varname='e20' type='char'/>
-	<parameter varname='e19' type='char'/>
-	<parameter varname='e18' type='char'/>
-	<parameter varname='e17' type='char'/>
-	<parameter varname='e16' type='char'/>
-	<parameter varname='e15' type='char'/>
-	<parameter varname='e14' type='char'/>
-	<parameter varname='e13' type='char'/>
-	<parameter varname='e12' type='char'/>
-	<parameter varname='e11' type='char'/>
-	<parameter varname='e10' type='char'/>
-	<parameter varname='e9' type='char'/>
-	<parameter varname='e8' type='char'/>
-	<parameter varname='e7' type='char'/>
-	<parameter varname='e6' type='char'/>
-	<parameter varname='e5' type='char'/>
-	<parameter varname='e4' type='char'/>
-	<parameter varname='e3' type='char'/>
-	<parameter varname='e2' type='char'/>
-	<parameter varname='e1' type='char'/>
-	<parameter varname='e0' type='char'/>
-	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[7:0] := e0
-dst[15:8] := e1
-dst[23:16] := e2
-dst[31:24] := e3
-dst[39:32] := e4
-dst[47:40] := e5
-dst[55:48] := e6
-dst[63:56] := e7
-dst[71:64] := e8
-dst[79:72] := e9
-dst[87:80] := e10
-dst[95:88] := e11
-dst[103:96] := e12
-dst[111:104] := e13
-dst[119:112] := e14
-dst[127:120] := e15
-dst[135:128] := e16
-dst[143:136] := e17
-dst[151:144] := e18
-dst[159:152] := e19
-dst[167:160] := e20
-dst[175:168] := e21
-dst[183:176] := e22
-dst[191:184] := e23
-dst[199:192] := e24
-dst[207:200] := e25
-dst[215:208] := e26
-dst[223:216] := e27
-dst[231:224] := e28
-dst[239:232] := e29
-dst[247:240] := e30
-dst[255:248] := e31
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='e15' type='short'/>
-	<parameter varname='e14' type='short'/>
-	<parameter varname='e13' type='short'/>
-	<parameter varname='e12' type='short'/>
-	<parameter varname='e11' type='short'/>
-	<parameter varname='e10' type='short'/>
-	<parameter varname='e9' type='short'/>
-	<parameter varname='e8' type='short'/>
-	<parameter varname='e7' type='short'/>
-	<parameter varname='e6' type='short'/>
-	<parameter varname='e5' type='short'/>
-	<parameter varname='e4' type='short'/>
-	<parameter varname='e3' type='short'/>
-	<parameter varname='e2' type='short'/>
-	<parameter varname='e1' type='short'/>
-	<parameter varname='e0' type='short'/>
-	<description>Set packed 16-bit integers in "dst" with the supplied values.</description>
-	<operation>
-dst[15:0] := e0
-dst[31:16] := e1
-dst[47:32] := e2
-dst[63:48] := e3
-dst[79:64] := e4
-dst[95:80] := e5
-dst[111:96] := e6
-dst[127:112] := e7
-dst[145:128] := e8
-dst[159:144] := e9
-dst[175:160] := e10
-dst[191:176] := e11
-dst[207:192] := e12
-dst[223:208] := e13
-dst[239:224] := e14
-dst[255:240] := e15
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='e7' type='int'/>
-	<parameter varname='e6' type='int'/>
-	<parameter varname='e5' type='int'/>
-	<parameter varname='e4' type='int'/>
-	<parameter varname='e3' type='int'/>
-	<parameter varname='e2' type='int'/>
-	<parameter varname='e1' type='int'/>
-	<parameter varname='e0' type='int'/>
-	<description>Set packed 32-bit integers in "dst" with the supplied values.</description>
-	<operation>
-dst[31:0] := e0
-dst[63:32] := e1
-dst[95:64] := e2
-dst[127:96] := e3
-dst[159:128] := e4
-dst[191:160] := e5
-dst[223:192] := e6
-dst[255:224] := e7
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set_epi64x'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='e3' type='__int64'/>
-	<parameter varname='e2' type='__int64'/>
-	<parameter varname='e1' type='__int64'/>
-	<parameter varname='e0' type='__int64'/>
-	<description>Set packed 64-bit integers in "dst" with the supplied values.</description>
-	<operation>
-dst[63:0] := e0
-dst[127:64] := e1
-dst[191:128] := e2
-dst[255:192] := e3
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256d' name='_mm256_setr_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='e3' type='double'/>
-	<parameter varname='e2' type='double'/>
-	<parameter varname='e1' type='double'/>
-	<parameter varname='e0' type='double'/>
-	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[63:0] := e3
-dst[127:64] := e2
-dst[191:128] := e1
-dst[255:192] := e0
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256' name='_mm256_setr_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='e7' type='float'/>
-	<parameter varname='e6' type='float'/>
-	<parameter varname='e5' type='float'/>
-	<parameter varname='e4' type='float'/>
-	<parameter varname='e3' type='float'/>
-	<parameter varname='e2' type='float'/>
-	<parameter varname='e1' type='float'/>
-	<parameter varname='e0' type='float'/>
-	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[31:0] := e7
-dst[63:32] := e6
-dst[95:64] := e5
-dst[127:96] := e4
-dst[159:128] := e3
-dst[191:160] := e2
-dst[223:192] := e1
-dst[255:224] := e0
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_setr_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='e31' type='char'/>
-	<parameter varname='e30' type='char'/>
-	<parameter varname='e29' type='char'/>
-	<parameter varname='e28' type='char'/>
-	<parameter varname='e27' type='char'/>
-	<parameter varname='e26' type='char'/>
-	<parameter varname='e25' type='char'/>
-	<parameter varname='e24' type='char'/>
-	<parameter varname='e23' type='char'/>
-	<parameter varname='e22' type='char'/>
-	<parameter varname='e21' type='char'/>
-	<parameter varname='e20' type='char'/>
-	<parameter varname='e19' type='char'/>
-	<parameter varname='e18' type='char'/>
-	<parameter varname='e17' type='char'/>
-	<parameter varname='e16' type='char'/>
-	<parameter varname='e15' type='char'/>
-	<parameter varname='e14' type='char'/>
-	<parameter varname='e13' type='char'/>
-	<parameter varname='e12' type='char'/>
-	<parameter varname='e11' type='char'/>
-	<parameter varname='e10' type='char'/>
-	<parameter varname='e9' type='char'/>
-	<parameter varname='e8' type='char'/>
-	<parameter varname='e7' type='char'/>
-	<parameter varname='e6' type='char'/>
-	<parameter varname='e5' type='char'/>
-	<parameter varname='e4' type='char'/>
-	<parameter varname='e3' type='char'/>
-	<parameter varname='e2' type='char'/>
-	<parameter varname='e1' type='char'/>
-	<parameter varname='e0' type='char'/>
-	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[7:0] := e31
-dst[15:8] := e30
-dst[23:16] := e29
-dst[31:24] := e28
-dst[39:32] := e27
-dst[47:40] := e26
-dst[55:48] := e25
-dst[63:56] := e24
-dst[71:64] := e23
-dst[79:72] := e22
-dst[87:80] := e21
-dst[95:88] := e20
-dst[103:96] := e19
-dst[111:104] := e18
-dst[119:112] := e17
-dst[127:120] := e16
-dst[135:128] := e15
-dst[143:136] := e14
-dst[151:144] := e13
-dst[159:152] := e12
-dst[167:160] := e11
-dst[175:168] := e10
-dst[183:176] := e9
-dst[191:184] := e8
-dst[199:192] := e7
-dst[207:200] := e6
-dst[215:208] := e5
-dst[223:216] := e4
-dst[231:224] := e3
-dst[239:232] := e2
-dst[247:240] := e1
-dst[255:248] := e0
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_setr_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='e15' type='short'/>
-	<parameter varname='e14' type='short'/>
-	<parameter varname='e13' type='short'/>
-	<parameter varname='e12' type='short'/>
-	<parameter varname='e11' type='short'/>
-	<parameter varname='e10' type='short'/>
-	<parameter varname='e9' type='short'/>
-	<parameter varname='e8' type='short'/>
-	<parameter varname='e7' type='short'/>
-	<parameter varname='e6' type='short'/>
-	<parameter varname='e5' type='short'/>
-	<parameter varname='e4' type='short'/>
-	<parameter varname='e3' type='short'/>
-	<parameter varname='e2' type='short'/>
-	<parameter varname='e1' type='short'/>
-	<parameter varname='e0' type='short'/>
-	<description>Set packed 16-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[15:0] := e15
-dst[31:16] := e14
-dst[47:32] := e13
-dst[63:48] := e12
-dst[79:64] := e11
-dst[95:80] := e10
-dst[111:96] := e9
-dst[127:112] := e8
-dst[145:128] := e7
-dst[159:144] := e6
-dst[175:160] := e5
-dst[191:176] := e4
-dst[207:192] := e3
-dst[223:208] := e2
-dst[239:224] := e1
-dst[255:240] := e0
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_setr_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='e7' type='int'/>
-	<parameter varname='e6' type='int'/>
-	<parameter varname='e5' type='int'/>
-	<parameter varname='e4' type='int'/>
-	<parameter varname='e3' type='int'/>
-	<parameter varname='e2' type='int'/>
-	<parameter varname='e1' type='int'/>
-	<parameter varname='e0' type='int'/>
-	<description>Set packed 32-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[31:0] := e7
-dst[63:32] := e6
-dst[95:64] := e5
-dst[127:96] := e4
-dst[159:128] := e3
-dst[191:160] := e2
-dst[223:192] := e1
-dst[255:224] := e0
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_setr_epi64x'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='e3' type='__int64'/>
-	<parameter varname='e2' type='__int64'/>
-	<parameter varname='e1' type='__int64'/>
-	<parameter varname='e0' type='__int64'/>
-	<description>Set packed 64-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[63:0] := e3
-dst[127:64] := e2
-dst[191:128] := e1
-dst[255:192] := e0
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256d' name='_mm256_set1_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='double'/>
-	<description>Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256' name='_mm256_set1_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='float'/>
-	<description>Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set1_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='char'/>
-	<description>Broadcast 8-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastb".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	dst[i+7:i] := a[7:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set1_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='short'/>
-	<description>Broadcast 16-bit integer "a" to all all elements of "dst". This intrinsic may generate the "vpbroadcastw".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := a[15:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set1_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='int'/>
-	<description>Broadcast 32-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastd".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_set1_epi64x'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='a' type='long long'/>
-	<description>Broadcast 64-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastq".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_castpd_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Cast vector of type __m256d to type __m256.
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_castps_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Cast vector of type __m256 to type __m256d.
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_castps_si256'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Casts vector of type __m256 to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_castpd_si256'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Casts vector of type __m256d to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_castsi256_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m256i'/>
-	<description>Casts vector of type __m256i to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_castsi256_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m256i'/>
-	<description>Casts vector of type __m256i to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128' name='_mm256_castps256_ps128'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Casts vector of type __m256 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128d' name='_mm256_castpd256_pd128'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Casts vector of type __m256d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m128i' name='_mm256_castsi256_si128'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m256i'/>
-	<description>Casts vector of type __m256i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_castps128_ps256'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Casts vector of type __m128 to type __m256; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_castpd128_pd256'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Casts vector of type __m128d to type __m256d; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_castsi128_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Casts vector of type __m128i to type __m256i; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_zextps128_ps256'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Casts vector of type __m128 to type __m256; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_zextpd128_pd256'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Casts vector of type __m128d to type __m256d; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_zextsi128_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Casts vector of type __m128i to type __m256i; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_floor_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := FLOOR(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vroundps' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_ceil_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := CEIL(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vroundps' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_floor_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := FLOOR(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vroundpd' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_ceil_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := CEIL(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vroundpd' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE' rettype='__m128' name='_mm_undefined_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Return vector of type __m128 with undefined elements.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m128d' name='_mm_undefined_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE2</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Return vector of type __m128d with undefined elements.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SSE2' rettype='__m128i' name='_mm_undefined_si128'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Return vector of type __m128i with undefined elements.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_undefined_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Return vector of type __m256 with undefined elements.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_undefined_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Return vector of type __m256d with undefined elements.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_undefined_si256'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Return vector of type __m256i with undefined elements.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='void' name='_mm_clflushopt'>
-	<CPUID>CLFLUSHOPT</CPUID>
-	<category>General Support</category>
-	<parameter varname='p' type='void const *'/>
-	<description>Invalidate and flush the cache line that contains "p" from all levels of the cache hierarchy.</description>
-	<instruction name='clflushopt' />
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='void' name='_mm_clwb'>
-	<CPUID>CLWB</CPUID>
-	<category>General Support</category>
-	<parameter varname='p' type='void const *'/>
-	<description>Write back to memory the cache line that contains "p" from any level of the cache hierarchy in the cache coherence domain.</description>
-	<instruction name='clwb' />
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_set_m128'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='hi' type='__m128'/>
-	<parameter varname='lo' type='__m128'/>
-	<description>Set packed __m256 vector "dst" with the supplied values.</description>
-	<operation>
-dst[127:0] := lo[127:0]
-dst[255:128] := hi[127:0]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_set_m128d'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='hi' type='__m128d'/>
-	<parameter varname='lo' type='__m128d'/>
-	<description>Set packed __m256d vector "dst" with the supplied values.</description>
-	<operation>
-dst[127:0] := lo[127:0]
-dst[255:128] := hi[127:0]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_set_m128i'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='hi' type='__m128i'/>
-	<parameter varname='lo' type='__m128i'/>
-	<description>Set packed __m256i vector "dst" with the supplied values.</description>
-	<operation>
-dst[127:0] := lo[127:0]
-dst[255:128] := hi[127:0]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256' name='_mm256_setr_m128'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='lo' type='__m128'/>
-	<parameter varname='hi' type='__m128'/>
-	<description>Set packed __m256 vector "dst" with the supplied values.</description>
-	<operation>
-dst[127:0] := lo[127:0]
-dst[255:128] := hi[127:0]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256d' name='_mm256_setr_m128d'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='lo' type='__m128d'/>
-	<parameter varname='hi' type='__m128d'/>
-	<description>Set packed __m256d vector "dst" with the supplied values.</description>
-	<operation>
-dst[127:0] := lo[127:0]
-dst[255:128] := hi[127:0]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' rettype='__m256i' name='_mm256_setr_m128i'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Set</category>
-	<parameter varname='lo' type='__m128i'/>
-	<parameter varname='hi' type='__m128i'/>
-	<description>Set packed __m256i vector "dst" with the supplied values.</description>
-	<operation>
-dst[127:0] := lo[127:0]
-dst[255:128] := hi[127:0]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vinsertf128' form='ymm, ymm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256' name='_mm256_loadu2_m128'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='hiaddr' type='float const*'/>
-	<parameter varname='loaddr' type='float const*'/>
-	<description>Load two 128-bit values (composed of 4 packed single-precision (32-bit) floating-point elements) from memory, and combine them into a 256-bit value in "dst".
-	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[127:0] := MEM[loaddr+127:loaddr]
-dst[255:128] := MEM[hiaddr+127:hiaddr]
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256d' name='_mm256_loadu2_m128d'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='hiaddr' type='double const*'/>
-	<parameter varname='loaddr' type='double const*'/>
-	<description>Load two 128-bit values (composed of 2 packed double-precision (64-bit) floating-point elements) from memory, and combine them into a 256-bit value in "dst".
-	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[127:0] := MEM[loaddr+127:loaddr]
-dst[255:128] := MEM[hiaddr+127:hiaddr]
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='__m256i' name='_mm256_loadu2_m128i'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Load</category>
-	<parameter varname='hiaddr' type='__m128i const*'/>
-	<parameter varname='loaddr' type='__m128i const*'/>
-	<description>Load two 128-bit values (composed of integer data) from memory, and combine them into a 256-bit value in "dst".
-	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[127:0] := MEM[loaddr+127:loaddr]
-dst[255:128] := MEM[hiaddr+127:hiaddr]
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='void' name='_mm256_storeu2_m128'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='hiaddr' type='float*'/>
-	<parameter varname='loaddr' type='float*'/>
-	<parameter varname='a' type='__m256' />
-	<description>Store the high and low 128-bit halves (each composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory two different 128-bit locations.
-	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[loaddr+127:loaddr] := a[127:0]
-MEM[hiaddr+127:hiaddr] := a[255:128]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='void' name='_mm256_storeu2_m128d'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='hiaddr' type='double*'/>
-	<parameter varname='loaddr' type='double*'/>
-	<parameter varname='a' type='__m256d' />
-	<description>Store the high and low 128-bit halves (each composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory two different 128-bit locations.
-	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[loaddr+127:loaddr] := a[127:0]
-MEM[hiaddr+127:hiaddr] := a[255:128]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' sequence='true' rettype='void' name='_mm256_storeu2_m128i'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Store</category>
-	<parameter varname='hiaddr' type='__m128i*'/>
-	<parameter varname='loaddr' type='__m128i*'/>
-	<parameter varname='a' type='__m256i' />
-	<description>Store the high and low 128-bit halves (each composed of integer data) from "a" into memory two different 128-bit locations.
-	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[loaddr+127:loaddr] := a[127:0]
-MEM[hiaddr+127:hiaddr] := a[255:128]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_abs_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	dst[i+7:i] := ABS(a[i+7:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpabsb' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_abs_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := ABS(a[i+15:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpabsw' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_abs_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ABS(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpabsd' form='ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_add_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	dst[i+7:i] := a[i+7:i] + b[i+7:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpaddb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_add_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := a[i+15:i] + b[i+15:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpaddw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_add_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := a[i+31:i] + b[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpaddd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_add_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := a[i+63:i] + b[i+63:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpaddq' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_adds_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpaddsb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_adds_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpaddsw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_adds_epu8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpaddusb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_adds_epu16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpaddusw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_alignr_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<parameter varname='count' type='const int'/>
-	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*128
-	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128) OR b[i+127:i]) &gt;&gt; (count[7:0]*8)
-	dst[i+127:i] := tmp[127:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpalignr' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_and_si256'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[255:0] := (a[255:0] AND b[255:0])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpand' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_andnot_si256'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compute the bitwise NOT of 256 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
-	<operation>
-dst[255:0] := ((NOT a[255:0]) AND b[255:0])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpandn' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_avg_epu8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpavgb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_avg_epu16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpavgw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_blend_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Blend packed 16-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF imm8[j%8]
-		dst[i+15:i] := b[i+15:i]
-	ELSE
-		dst[i+15:i] := a[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpblendw' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_blend_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Blend packed 32-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF imm8[j%8]
-		dst[i+31:i] := b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpblendd' form='xmm, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_blend_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Blend packed 32-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF imm8[j%8]
-		dst[i+31:i] := b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpblendd' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_blendv_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<parameter varname='mask' type='__m256i'/>
-	<description>Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF mask[i+7]
-		dst[i+7:i] := b[i+7:i]
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpblendvb' form='ymm, ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_broadcastb_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	dst[i+7:i] := a[7:0]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpbroadcastb' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_broadcastb_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	dst[i+7:i] := a[7:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpbroadcastb' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_broadcastd_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpbroadcastd' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_broadcastd_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpbroadcastd' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_broadcastq_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpbroadcastq' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_broadcastq_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpbroadcastq' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' vexEq='TRUE' rettype='__m128d' name='_mm_broadcastsd_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='movddup' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256d' name='_mm256_broadcastsd_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vbroadcastsd' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm_broadcastsi128_si256'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Broadcast 128 bits of integer data from "a" to all 128-bit lanes in "dst".
-	</description>
-	<operation>
-dst[127:0] := a[127:0]
-dst[255:128] := a[127:0]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vbroadcasti128' form='ymm, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_broadcastsi128_si256'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Broadcast 128 bits of integer data from "a" to all 128-bit lanes in "dst".
-	</description>
-	<operation>
-dst[127:0] := a[127:0]
-dst[255:128] := a[127:0]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vbroadcasti128' form='ymm, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128' name='_mm_broadcastss_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vbroadcastss' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256' name='_mm256_broadcastss_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vbroadcastss' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_broadcastw_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := a[15:0]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpbroadcastw' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_broadcastw_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := a[15:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpbroadcastw' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpeq_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpcmpeqb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpeq_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpcmpeqw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpeq_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpcmpeqd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpeq_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ( a[i+63:i] == b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpcmpeqq' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpgt_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	dst[i+7:i] := ( a[i+7:i] &gt; b[i+7:i] ) ? 0xFF : 0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpcmpgtb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpgt_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := ( a[i+15:i] &gt; b[i+15:i] ) ? 0xFFFF : 0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpcmpgtw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpgt_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xFFFFFFFF : 0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpcmpgtd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cmpgt_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Compare</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ( a[i+63:i] &gt; b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpcmpgtq' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepi16_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j:= 0 to 7
-	i := 32*j
-	k := 16*j
-	dst[i+31:i] := SignExtend(a[k+15:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovsxwd' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepi16_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j:= 0 to 3
-	i := 64*j
-	k := 16*j
-	dst[i+63:i] := SignExtend(a[k+15:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovsxwq' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepi32_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j:= 0 to 3
-	i := 64*j
-	k := 32*j
-	dst[i+63:i] := SignExtend(a[k+31:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovsxdq' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepi8_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	l := j*16
-	dst[l+15:l] := SignExtend(a[i+7:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovsxbw' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepi8_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 8*j
-	dst[i+31:i] := SignExtend(a[k+7:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovsxbd' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepi8_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	k := 8*j
-	dst[i+63:i] := SignExtend(a[k+7:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovsxbq' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepu16_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 16*j
-	dst[i+31:i] := ZeroExtend(a[k+15:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovzxwd' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepu16_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j:= 0 to 3
-	i := 64*j
-	k := 16*j
-	dst[i+63:i] := ZeroExtend(a[k+15:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovzxwq' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepu32_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j:= 0 to 3
-	i := 64*j
-	k := 32*j
-	dst[i+63:i] := ZeroExtend(a[k+31:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovzxdq' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepu8_epi16'>
-	<type>Integer</type>
-		 <CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	l := j*16
-	dst[l+15:l] := ZeroExtend(a[i+7:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovzxbw' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepu8_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 8*j
-	dst[i+31:i] := ZeroExtend(a[k+7:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovzxbd' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_cvtepu8_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	k := 8*j
-	dst[i+63:i] := ZeroExtend(a[k+7:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovzxbq' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm256_extracti128_si256'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Extract 128 bits (composed of integer data) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-ESAC
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vextracti128' form='xmm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_hadd_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
-	<operation>
-dst[15:0] := a[31:16] + a[15:0]
-dst[31:16] := a[63:48] + a[47:32]
-dst[47:32] := a[95:80] + a[79:64]
-dst[63:48] := a[127:112] + a[111:96]
-dst[79:64] := b[31:16] + b[15:0]
-dst[95:80] := b[63:48] + b[47:32]
-dst[111:96] := b[95:80] + b[79:64]
-dst[127:112] := b[127:112] + b[111:96]
-dst[143:128] := a[159:144] + a[143:128]
-dst[159:144] := a[191:176] + a[175:160]
-dst[175:160] := a[223:208] + a[207:192]
-dst[191:176] := a[255:240] + a[239:224]
-dst[207:192] := b[127:112] + b[143:128]
-dst[223:208] := b[159:144] + b[175:160]
-dst[239:224] := b[191:176] + b[207:192]
-dst[255:240] := b[223:208] + b[239:224]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vphaddw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_hadd_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
-	<operation>
-dst[31:0] := a[63:32] + a[31:0]
-dst[63:32] := a[127:96] + a[95:64]
-dst[95:64] := b[63:32] + b[31:0]
-dst[127:96] := b[127:96] + b[95:64]
-dst[159:128] := a[191:160] + a[159:128]
-dst[191:160] := a[255:224] + a[223:192]
-dst[223:192] := b[191:160] + b[159:128]
-dst[255:224] := b[255:224] + b[223:192]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vphaddd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_hadds_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
-	<operation>
-dst[15:0]= Saturate_To_Int16(a[31:16] + a[15:0])
-dst[31:16] = Saturate_To_Int16(a[63:48] + a[47:32])
-dst[47:32] = Saturate_To_Int16(a[95:80] + a[79:64])
-dst[63:48] = Saturate_To_Int16(a[127:112] + a[111:96])
-dst[79:64] = Saturate_To_Int16(b[31:16] + b[15:0])
-dst[95:80] = Saturate_To_Int16(b[63:48] + b[47:32])
-dst[111:96] = Saturate_To_Int16(b[95:80] + b[79:64])
-dst[127:112] = Saturate_To_Int16(b[127:112] + b[111:96])
-dst[143:128] = Saturate_To_Int16(a[159:144] + a[143:128])
-dst[159:144] = Saturate_To_Int16(a[191:176] + a[175:160])
-dst[175:160] = Saturate_To_Int16( a[223:208] + a[207:192])
-dst[191:176] = Saturate_To_Int16(a[255:240] + a[239:224])
-dst[207:192] = Saturate_To_Int16(b[127:112] + b[143:128])
-dst[223:208] = Saturate_To_Int16(b[159:144] + b[175:160])
-dst[239:224] = Saturate_To_Int16(b[191-160] + b[159-128])
-dst[255:240] = Saturate_To_Int16(b[255:240] + b[239:224])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vphaddsw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_hsub_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
-	<operation>
-dst[15:0] := a[15:0] - a[31:16]
-dst[31:16] := a[47:32] - a[63:48]
-dst[47:32] := a[79:64] - a[95:80]
-dst[63:48] := a[111:96] - a[127:112]
-dst[79:64] := b[15:0] - b[31:16]
-dst[95:80] := b[47:32] - b[63:48]
-dst[111:96] := b[79:64] - b[95:80]
-dst[127:112] := b[111:96] - b[127:112]
-dst[143:128] := a[143:128] - a[159:144]
-dst[159:144] := a[175:160] - a[191:176]
-dst[175:160] := a[207:192] - a[223:208]
-dst[191:176] := a[239:224] - a[255:240]
-dst[207:192] := b[143:128] - b[159:144]
-dst[223:208] := b[175:160] - b[191:176]
-dst[239:224] := b[207:192] - b[223:208]
-dst[255:240] := b[239:224] - b[255:240]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vphsubw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_hsub_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
-	<operation>
-dst[31:0] := a[31:0] - a[63:32]
-dst[63:32] := a[95:64] - a[127:96]
-dst[95:64] := b[31:0] - b[63:32]
-dst[127:96] := b[95:64] - b[127:96]
-dst[159:128] := a[159:128] - a[191:160]
-dst[191:160] := a[223:192] - a[255:224]
-dst[223:192] := b[159:128] - b[191:160]
-dst[255:224] := b[223:192] - b[255:224]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vphsubd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_hsubs_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
-	<operation>
-dst[15:0]= Saturate_To_Int16(a[15:0] - a[31:16])
-dst[31:16] = Saturate_To_Int16(a[47:32] - a[63:48])
-dst[47:32] = Saturate_To_Int16(a[79:64] - a[95:80])
-dst[63:48] = Saturate_To_Int16(a[111:96] - a[127:112])
-dst[79:64] = Saturate_To_Int16(b[15:0] - b[31:16])
-dst[95:80] = Saturate_To_Int16(b[47:32] - b[63:48])
-dst[111:96] = Saturate_To_Int16(b[79:64] - b[95:80])
-dst[127:112] = Saturate_To_Int16(b[111:96] - b[127:112])
-dst[143:128]= Saturate_To_Int16(a[143:128] - a[159:144])
-dst[159:144] = Saturate_To_Int16(a[175:160] - a[191:176])
-dst[175:160] = Saturate_To_Int16(a[207:192] - a[223:208])
-dst[191:176] = Saturate_To_Int16(a[239:224] - a[255:240])
-dst[207:192] = Saturate_To_Int16(b[143:128] - b[159:144])
-dst[223:208] = Saturate_To_Int16(b[175:160] - b[191:176])
-dst[239:224] = Saturate_To_Int16(b[207:192] - b[223:208])
-dst[255:240] = Saturate_To_Int16(b[239:224] - b[255:240])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vphsubsw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128d' name='_mm_i32gather_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='double const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	m := j*32
-	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgatherdpd' form='xmm, vm32x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256d' name='_mm256_i32gather_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='double const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	m := j*32
-	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vgatherdpd' form='ymm, vm32x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128' name='_mm_i32gather_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='float const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgatherdps' form='xmm, vm32x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256' name='_mm256_i32gather_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='float const*'/>
-	<parameter varname='vindex' type='__m256i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vgatherdps' form='ymm, vm32x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_i32gather_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='int const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpgatherdd' form='xmm, vm32x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_i32gather_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='int const*'/>
-	<parameter varname='vindex' type='__m256i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpgatherdd' form='ymm, vm32x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_i32gather_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='__int64 const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	m := j*32
-	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpgatherdq' form='xmm, vm32x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_i32gather_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='__int64 const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	m := j*32
-	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpgatherdq' form='ymm, vm32x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128d' name='_mm_i64gather_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='double const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgatherqpd' form='xmm, vm64x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256d' name='_mm256_i64gather_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='double const*'/>
-	<parameter varname='vindex' type='__m256i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vgatherqpd' form='ymm, vm64x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128' name='_mm_i64gather_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='float const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	m := j*64
-	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vgatherqps' form='xmm, vm64x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128' name='_mm256_i64gather_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='float const*'/>
-	<parameter varname='vindex' type='__m256i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	m := j*64
-	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgatherqps' form='ymm, vm64x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_i64gather_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='int const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	m := j*64
-	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vpgatherqd' form='xmm, vm64x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm256_i64gather_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='int const*'/>
-	<parameter varname='vindex' type='__m256i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	m := j*64
-	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpgatherqd' form='ymm, vm64x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_i64gather_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='__int64 const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpgatherqq' form='xmm, vm64x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_i64gather_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='base_addr' type='__int64 const*'/>
-	<parameter varname='vindex' type='__m256i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpgatherqq' form='ymm, vm64x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_inserti128_si256'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m128i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Copy "a" to "dst", then insert 128 bits (composed of integer data) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[255:0] := a[255:0]
-CASE (imm8[1:0]) of
-0: dst[127:0] := b[127:0]
-1: dst[255:128] := b[127:0]
-ESAC
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vinserti128' form='ymm, ymm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_madd_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmaddwd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_maddubs_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmaddubsw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128d' name='_mm_mask_i32gather_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m128d'/>
-	<parameter varname='base_addr' type='double const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='mask' type='__m128d'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	m := j*32
-	IF mask[i+63]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-		mask[i+63] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-mask[MAX:128] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgatherdpd' form='xmm, vm32x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256d' name='_mm256_mask_i32gather_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m256d'/>
-	<parameter varname='base_addr' type='double const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='mask' type='__m256d'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	m := j*32
-	IF mask[i+63]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-		mask[i+63] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-mask[MAX:256] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vgatherdpd' form='ymm, vm32x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128' name='_mm_mask_i32gather_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m128'/>
-	<parameter varname='base_addr' type='float const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='mask' type='__m128'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF mask[i+31]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-		mask[i+31] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-mask[MAX:128] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgatherdps' form='xmm, vm32x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256' name='_mm256_mask_i32gather_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m256'/>
-	<parameter varname='base_addr' type='float const*'/>
-	<parameter varname='vindex' type='__m256i'/>
-	<parameter varname='mask' type='__m256'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF mask[i+31]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-		mask[i+31] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-mask[MAX:256] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vgatherdps' form='ymm, vm32x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_mask_i32gather_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m128i'/>
-	<parameter varname='base_addr' type='int const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='mask' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF mask[i+31]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-		mask[i+31] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-mask[MAX:128] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpgatherdd' form='xmm, vm32x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mask_i32gather_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m256i'/>
-	<parameter varname='base_addr' type='int const*'/>
-	<parameter varname='vindex' type='__m256i'/>
-	<parameter varname='mask' type='__m256i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF mask[i+31]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-		mask[i+31] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-mask[MAX:256] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpgatherdd' form='ymm, vm32x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_mask_i32gather_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m128i'/>
-	<parameter varname='base_addr' type='__int64 const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='mask' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	m := j*32
-	IF mask[i+63]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-		mask[i+63] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-mask[MAX:128] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpgatherdq' form='xmm, vm32x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mask_i32gather_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m256i'/>
-	<parameter varname='base_addr' type='__int64 const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='mask' type='__m256i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	m := j*32
-	IF mask[i+63]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-		mask[i+63] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-mask[MAX:256] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpgatherdq' form='ymm, vm32x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128d' name='_mm_mask_i64gather_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m128d'/>
-	<parameter varname='base_addr' type='double const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='mask' type='__m128d'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF mask[i+63]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-		mask[i+63] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-mask[MAX:128] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgatherqpd' form='xmm, vm64x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256d' name='_mm256_mask_i64gather_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m256d'/>
-	<parameter varname='base_addr' type='double const*'/>
-	<parameter varname='vindex' type='__m256i'/>
-	<parameter varname='mask' type='__m256d'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF mask[i+63]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-		mask[i+63] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-mask[MAX:256] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vgatherqpd' form='ymm, vm64x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128' name='_mm_mask_i64gather_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m128'/>
-	<parameter varname='base_addr' type='float const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='mask' type='__m128'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	m := j*64
-	IF mask[i+31]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-		mask[i+31] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-mask[MAX:64] := 0
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vgatherqps' form='xmm, vm64x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128' name='_mm256_mask_i64gather_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m128'/>
-	<parameter varname='base_addr' type='float const*'/>
-	<parameter varname='vindex' type='__m256i'/>
-	<parameter varname='mask' type='__m128'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	m := j*64
-	IF mask[i+31]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-		mask[i+31] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-mask[MAX:128] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgatherqps' form='ymm, vm64x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_mask_i64gather_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m128i'/>
-	<parameter varname='base_addr' type='int const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='mask' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	m := j*64
-	IF mask[i+31]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-		mask[i+31] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-mask[MAX:64] := 0
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vpgatherqd' form='xmm, vm64x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm256_mask_i64gather_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m128i'/>
-	<parameter varname='base_addr' type='int const*'/>
-	<parameter varname='vindex' type='__m256i'/>
-	<parameter varname='mask' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	m := j*64
-	IF mask[i+31]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-		mask[i+31] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-mask[MAX:128] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpgatherqd' form='ymm, vm64x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_mask_i64gather_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m128i'/>
-	<parameter varname='base_addr' type='__int64 const*'/>
-	<parameter varname='vindex' type='__m128i'/>
-	<parameter varname='mask' type='__m128i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF mask[i+63]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-		mask[i+63] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-mask[MAX:128] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpgatherqq' form='xmm, vm64x, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mask_i64gather_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='src' type='__m256i'/>
-	<parameter varname='base_addr' type='__int64 const*'/>
-	<parameter varname='vindex' type='__m256i'/>
-	<parameter varname='mask' type='__m256i'/>
-	<parameter varname='scale' type='const int'/>
-	<description>
-	Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF mask[i+63]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+63:m])*scale]
-		mask[i+63] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-mask[MAX:256] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpgatherqq' form='ymm, vm64x, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_maskload_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='int const*'/>
-	<parameter varname='mask' type='__m128i'/>
-	<description>Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF mask[i+31]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmaskmovd' form='xmm, xmm, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_maskload_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='int const*'/>
-	<parameter varname='mask' type='__m256i'/>
-	<description>Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF mask[i+31]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmaskmovd' form='ymm, ymm, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_maskload_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='__int64 const*'/>
-	<parameter varname='mask' type='__m128i'/>
-	<description>Load packed 64-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF mask[i+63]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmaskmovq' form='xmm, xmm, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_maskload_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='__int64 const*'/>
-	<parameter varname='mask' type='__m256i'/>
-	<description>Load packed 64-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF mask[i+63]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmaskmovq' form='ymm, ymm, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='void' name='_mm_maskstore_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='int*'/>
-	<parameter varname='mask' type='__m128i'/>
-	<parameter varname='a' type='__m128i'/>
-	<description>Store packed 32-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF mask[i+31]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmaskmovd' form='m128, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='void' name='_mm256_maskstore_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='int*'/>
-	<parameter varname='mask' type='__m256i'/>
-	<parameter varname='a' type='__m256i'/>
-	<description>Store packed 32-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF mask[i+31]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmaskmovd' form='m256, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='void' name='_mm_maskstore_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='__int64*'/>
-	<parameter varname='mask' type='__m128i'/>
-	<parameter varname='a' type='__m128i'/>
-	<description>Store packed 64-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF mask[i+63]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmaskmovq' form='m128, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='void' name='_mm256_maskstore_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='__int64*'/>
-	<parameter varname='mask' type='__m256i'/>
-	<parameter varname='a' type='__m256i'/>
-	<description>Store packed 64-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF mask[i+63]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmaskmovq' form='m256, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_max_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF a[i+7:i] &gt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmaxsb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_max_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF a[i+15:i] &gt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmaxsw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_max_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF a[i+31:i] &gt; b[i+31:i]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := b[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmaxsd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_max_epu8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF a[i+7:i] &gt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmaxub' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_max_epu16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF a[i+15:i] &gt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmaxuw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_max_epu32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF a[i+31:i] &gt; b[i+31:i]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := b[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmaxud' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_min_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF a[i+7:i] &lt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpminsb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_min_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF a[i+15:i] &lt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpminsw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_min_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF a[i+31:i] &lt; b[i+31:i]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := b[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpminsd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_min_epu8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF a[i+7:i] &lt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpminub' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_min_epu16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF a[i+15:i] &lt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpminuw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_min_epu32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF a[i+31:i] &lt; b[i+31:i]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := b[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpminud' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='int' name='_mm256_movemask_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m256i'/>
-	<description>
-Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	dst[j] := a[i+7]
-ENDFOR
-	</operation>
-	<instruction name='vpmovmskb' form='r32, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mpsadbw_epu8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
-	Eight SADs are performed for each 128-bit lane using one quadruplet from "b" and eight quadruplets from "a". One quadruplet is selected from "b" starting at on the offset specified in "imm8". Eight quadruplets are formed from sequential 8-bit integers selected from "a" starting at the offset specified in "imm8".</description>
-	<operation>
-MPSADBW(a[127:0], b[127:0], imm8[2:0]) {
-	a_offset := imm8[2]*32
-	b_offset := imm8[1:0]*32
-	FOR j := 0 to 7
-		i := j*8
-		k := a_offset+i
-		l := b_offset
-		tmp[i+15:i] := ABS(a[k+7:k] - b[l+7:l]) + ABS(a[k+15:k+8] - b[l+15:l+8]) + ABS(a[k+23:k+16] - b[l+23:l+16]) + ABS(a[k+31:k+24] - b[l+31:l+24])
-	ENDFOR
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0])
-dst[255:128] := MPSADBW(a[255:128], b[255:128], imm8[5:3])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmpsadbw' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mul_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := a[i+31:i] * b[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmuldq' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mul_epu32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := a[i+31:i] * b[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmuludq' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mulhi_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[31:16]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmulhw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mulhi_epu16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[31:16]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmulhuw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mulhrs_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
-	dst[i+15:i] := tmp[16:1]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmulhrsw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mullo_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[15:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmullw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_mullo_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	tmp[63:0] := a[i+31:i] * b[i+31:i]
-	dst[i+31:i] := tmp[31:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmulld' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_or_si256'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compute the bitwise OR of 256 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[255:0] := (a[255:0] OR b[255:0])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpor' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_packs_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".
-	</description>
-	<operation>
-dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
-dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
-dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
-dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
-dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
-dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
-dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
-dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
-dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
-dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
-dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
-dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
-dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
-dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
-dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
-dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
-dst[135:128] := Saturate_Int16_To_Int8 (a[143:128])
-dst[143:136] := Saturate_Int16_To_Int8 (a[159:144])
-dst[151:144] := Saturate_Int16_To_Int8 (a[175:160])
-dst[159:152] := Saturate_Int16_To_Int8 (a[191:176])
-dst[167:160] := Saturate_Int16_To_Int8 (a[207:192])
-dst[175:168] := Saturate_Int16_To_Int8 (a[223:208])
-dst[183:176] := Saturate_Int16_To_Int8 (a[239:224])
-dst[191:184] := Saturate_Int16_To_Int8 (a[255:240])
-dst[199:192] := Saturate_Int16_To_Int8 (b[143:128])
-dst[207:200] := Saturate_Int16_To_Int8 (b[159:144])
-dst[215:208] := Saturate_Int16_To_Int8 (b[175:160])
-dst[223:216] := Saturate_Int16_To_Int8 (b[191:176])
-dst[231:224] := Saturate_Int16_To_Int8 (b[207:192])
-dst[239:232] := Saturate_Int16_To_Int8 (b[223:208])
-dst[247:240] := Saturate_Int16_To_Int8 (b[239:224])
-dst[255:248] := Saturate_Int16_To_Int8 (b[255:240])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpacksswb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_packs_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
-	<operation>
-dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
-dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
-dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
-dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
-dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
-dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
-dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
-dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
-dst[143:128] := Saturate_Int32_To_Int16 (a[159:128])
-dst[159:144] := Saturate_Int32_To_Int16 (a[191:160])
-dst[175:160] := Saturate_Int32_To_Int16 (a[223:192])
-dst[191:176] := Saturate_Int32_To_Int16 (a[255:224])
-dst[207:192] := Saturate_Int32_To_Int16 (b[159:128])
-dst[223:208] := Saturate_Int32_To_Int16 (b[191:160])
-dst[239:224] := Saturate_Int32_To_Int16 (b[223:192])
-dst[255:240] := Saturate_Int32_To_Int16 (b[255:224])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpackssdw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_packus_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
-	<operation>
-dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
-dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
-dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
-dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
-dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
-dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
-dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
-dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
-dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
-dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
-dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
-dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
-dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
-dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
-dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
-dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
-dst[135:128] := Saturate_Int16_To_UnsignedInt8 (a[143:128])
-dst[143:136] := Saturate_Int16_To_UnsignedInt8 (a[159:144])
-dst[151:144] := Saturate_Int16_To_UnsignedInt8 (a[175:160])
-dst[159:152] := Saturate_Int16_To_UnsignedInt8 (a[191:176])
-dst[167:160] := Saturate_Int16_To_UnsignedInt8 (a[207:192])
-dst[175:168] := Saturate_Int16_To_UnsignedInt8 (a[223:208])
-dst[183:176] := Saturate_Int16_To_UnsignedInt8 (a[239:224])
-dst[191:184] := Saturate_Int16_To_UnsignedInt8 (a[255:240])
-dst[199:192] := Saturate_Int16_To_UnsignedInt8 (b[143:128])
-dst[207:200] := Saturate_Int16_To_UnsignedInt8 (b[159:144])
-dst[215:208] := Saturate_Int16_To_UnsignedInt8 (b[175:160])
-dst[223:216] := Saturate_Int16_To_UnsignedInt8 (b[191:176])
-dst[231:224] := Saturate_Int16_To_UnsignedInt8 (b[207:192])
-dst[239:232] := Saturate_Int16_To_UnsignedInt8 (b[223:208])
-dst[247:240] := Saturate_Int16_To_UnsignedInt8 (b[239:224])
-dst[255:248] := Saturate_Int16_To_UnsignedInt8 (b[255:240])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpackuswb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_packus_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst".</description>
-	<operation>
-dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
-dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
-dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
-dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
-dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
-dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
-dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
-dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
-dst[143:128] := Saturate_Int32_To_UnsignedInt16 (a[159:128])
-dst[159:144] := Saturate_Int32_To_UnsignedInt16 (a[191:160])
-dst[175:160] := Saturate_Int32_To_UnsignedInt16 (a[223:192])
-dst[191:176] := Saturate_Int32_To_UnsignedInt16 (a[255:224])
-dst[207:192] := Saturate_Int32_To_UnsignedInt16 (b[159:128])
-dst[223:208] := Saturate_Int32_To_UnsignedInt16 (b[191:160])
-dst[239:224] := Saturate_Int32_To_UnsignedInt16 (b[223:192])
-dst[255:240] := Saturate_Int32_To_UnsignedInt16 (b[255:224])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpackusdw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_permute2x128_si256'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Shuffle 128-bits (composed of integer data) selected by "imm8" from "a" and "b", and store the results in "dst". </description>
-	<operation>
-SELECT4(src1, src2, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src1[127:0]
-	1:	tmp[127:0] := src1[255:128]
-	2:	tmp[127:0] := src2[127:0]
-	3:	tmp[127:0] := src2[255:128]
-	ESAC
-	IF control[3]
-		tmp[127:0] := 0
-	FI
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
-dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vperm2i128' form='ymm, ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_permute4x64_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpermq' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256d' name='_mm256_permute4x64_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpermpd' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_permutevar8x32_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='idx' type='__m256i'/>
-	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	id := idx[i+2:i]*32
-	dst[i+31:i] := a[id+31:id]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpermd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256' name='_mm256_permutevar8x32_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='idx' type='__m256i'/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	id := idx[i+2:i]*32
-	dst[i+31:i] := a[id+31:id]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpermps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sad_epu8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
-ENDFOR
-FOR j := 0 to 4
-	i := j*64
-	dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56]
-	dst[i+63:i+16] := 0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsadbw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_shuffle_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-dst[223:192] := SELECT4(a[255:128], imm8[5:4])
-dst[255:224] := SELECT4(a[255:128], imm8[7:6])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpshufd' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_shuffle_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Shuffle 8-bit integers in "a" within 128-bit lanes according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF b[i+7] == 1
-		dst[i+7:i] := 0
-	ELSE
-		index[3:0] := b[i+3:i]
-		dst[i+7:i] := a[index*8+7:index*8]
-	FI
-	IF b[128+i+7] == 1
-		dst[128+i+7:128+i] := 0
-	ELSE
-		index[3:0] := b[128+i+3:128+i]
-		dst[128+i+7:128+i] := a[128+index*8+7:128+index*8]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpshufb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_shufflehi_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst".</description>
-	<operation>
-dst[63:0] := a[63:0]
-dst[79:64] := (a >> (imm8[1:0] * 16))[79:64]
-dst[95:80] := (a >> (imm8[3:2] * 16))[79:64]
-dst[111:96] := (a >> (imm8[5:4] * 16))[79:64]
-dst[127:112] := (a >> (imm8[7:6] * 16))[79:64]
-dst[191:128] := a[191:128]
-dst[207:192] := (a >> (imm8[1:0] * 16))[207:192]
-dst[223:208] := (a >> (imm8[3:2] * 16))[207:192]
-dst[239:224] := (a >> (imm8[5:4] * 16))[207:192]
-dst[255:240] := (a >> (imm8[7:6] * 16))[207:192]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpshufhw' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_shufflelo_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst".</description>
-	<operation>
-dst[15:0] := (a >> (imm8[1:0] * 16))[15:0]
-dst[31:16] := (a >> (imm8[3:2] * 16))[15:0]
-dst[47:32] := (a >> (imm8[5:4] * 16))[15:0]
-dst[63:48] := (a >> (imm8[7:6] * 16))[15:0]
-dst[127:64] := a[127:64]
-dst[143:128] := (a >> (imm8[1:0] * 16))[143:128]
-dst[159:144] := (a >> (imm8[3:2] * 16))[143:128]
-dst[175:160] := (a >> (imm8[5:4] * 16))[143:128]
-dst[191:176] := (a >> (imm8[7:6] * 16))[143:128]
-dst[255:192] := a[255:192]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpshuflw' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sign_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF b[i+7:i] &lt; 0
-		dst[i+7:i] := NEG(a[i+7:i])
-	ELSE IF b[i+7:i] = 0
-		dst[i+7:i] := 0
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsignb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sign_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF b[i+15:i] &lt; 0
-		dst[i+15:i] := NEG(a[i+15:i])
-	ELSE IF b[i+15:i] = 0
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := a[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsignw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sign_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF b[i+31:i] &lt; 0
-		dst[i+31:i] := NEG(a[i+31:i])
-	ELSE IF b[i+31:i] = 0
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsignd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_slli_si256'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-tmp := imm8[7:0]
-IF tmp &gt; 15
-	tmp := 16
-FI
-dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
-dst[255:128] := a[255:128] &lt;&lt; (tmp*8)
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpslldq' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_bslli_epi128'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-tmp := imm8[7:0]
-IF tmp &gt; 15
-	tmp := 16
-FI
-dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
-dst[255:128] := a[255:128] &lt;&lt; (tmp*8)
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpslldq' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sll_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsllw' form='ymm, ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_slli_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsllw' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sll_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpslld' form='ymm, ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_slli_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpslld' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sll_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-		<parameter varname='a' type='__m256i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF count[63:0] &gt; 63
-		dst[i+63:i] := 0
-	ELSE
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsllq' form='ymm, ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_slli_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF imm8[7:0] &gt; 63
-		dst[i+63:i] := 0
-	ELSE
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsllq' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_sllv_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpsllvd' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sllv_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='count' type='__m256i'/>
-	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsllvd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_sllv_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpsllvq' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sllv_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='count' type='__m256i'/>
-	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsllvq' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sra_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := SignBit
-	ELSE
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsraw' form='ymm, ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srai_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := SignBit
-	ELSE
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsraw' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sra_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := SignBit
-	ELSE
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsrad' form='ymm, ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srai_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := SignBit
-	ELSE
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsrad' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_srav_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpsravd' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srav_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='count' type='__m256i'/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsravd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srli_si256'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-tmp := imm8[7:0]
-IF tmp &gt; 15
-	tmp := 16
-FI
-dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
-dst[255:128] := a[255:128] &gt;&gt; (tmp*8)
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsrldq' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_bsrli_epi128'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='const int'/>
-	<description>Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-tmp := imm8[7:0]
-IF tmp &gt; 15
-	tmp := 16
-FI
-dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
-dst[255:128] := a[255:128] &gt;&gt; (tmp*8)
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsrldq' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srl_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsrlw' form='ymm, ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srli_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsrlw' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srl_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsrld' form='ymm, ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srli_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsrld' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srl_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF count[63:0] &gt; 63
-		dst[i+63:i] := 0
-	ELSE
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsrlq' form='ymm, ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srli_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname="imm8" type='int'/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF imm8[7:0] &gt; 63
-		dst[i+63:i] := 0
-	ELSE
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsrlq' form='ymm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_srlv_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpsrlvd' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srlv_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='count' type='__m256i'/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsrlvd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m128i' name='_mm_srlv_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='count' type='__m128i'/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpsrlvq' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_srlv_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Shift</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='count' type='__m256i'/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsrlvq' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_stream_load_si256'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='__m256i const*'/>
-	<description>Load 256-bits of integer data from memory into "dst" using a non-temporal memory hint.
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[255:0] := MEM[mem_addr+255:mem_addr]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vmovntdqa' form='ymm, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sub_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	dst[i+7:i] := a[i+7:i] - b[i+7:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsubb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sub_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := a[i+15:i] - b[i+15:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsubw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sub_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := a[i+31:i] - b[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsubd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_sub_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := a[i+63:i] - b[i+63:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsubq' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_subs_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsubsb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_subs_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsubsw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_subs_epu8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsubusb' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_subs_epu16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpsubusw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_xor_si256'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Logical</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Compute the bitwise XOR of 256 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[255:0] := (a[255:0] XOR b[255:0])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpxor' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpackhi_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[71:64] 
-	dst[15:8] := src2[71:64] 
-	dst[23:16] := src1[79:72] 
-	dst[31:24] := src2[79:72] 
-	dst[39:32] := src1[87:80] 
-	dst[47:40] := src2[87:80] 
-	dst[55:48] := src1[95:88] 
-	dst[63:56] := src2[95:88] 
-	dst[71:64] := src1[103:96] 
-	dst[79:72] := src2[103:96] 
-	dst[87:80] := src1[111:104] 
-	dst[95:88] := src2[111:104] 
-	dst[103:96] := src1[119:112] 
-	dst[111:104] := src2[119:112] 
-	dst[119:112] := src1[127:120] 
-	dst[127:120] := src2[127:120] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpunpckhbw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpackhi_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[79:64]
-	dst[31:16] := src2[79:64] 
-	dst[47:32] := src1[95:80] 
-	dst[63:48] := src2[95:80] 
-	dst[79:64] := src1[111:96] 
-	dst[95:80] := src2[111:96] 
-	dst[111:96] := src1[127:112] 
-	dst[127:112] := src2[127:112] 
-	RETURN dst[127:0]
-}
-
-dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpunpckhwd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpackhi_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpunpckhdq' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpackhi_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpunpckhqdq' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpacklo_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[7:0] 
-	dst[15:8] := src2[7:0] 
-	dst[23:16] := src1[15:8] 
-	dst[31:24] := src2[15:8] 
-	dst[39:32] := src1[23:16] 
-	dst[47:40] := src2[23:16] 
-	dst[55:48] := src1[31:24] 
-	dst[63:56] := src2[31:24] 
-	dst[71:64] := src1[39:32]
-	dst[79:72] := src2[39:32] 
-	dst[87:80] := src1[47:40] 
-	dst[95:88] := src2[47:40] 
-	dst[103:96] := src1[55:48] 
-	dst[111:104] := src2[55:48] 
-	dst[119:112] := src1[63:56] 
-	dst[127:120] := src2[63:56] 
-	RETURN dst[127:0]
-}
-
-dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpunpcklbw' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpacklo_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[15:0] 
-	dst[31:16] := src2[15:0] 
-	dst[47:32] := src1[31:16] 
-	dst[63:48] := src2[31:16] 
-	dst[79:64] := src1[47:32] 
-	dst[95:80] := src2[47:32] 
-	dst[111:96] := src1[63:48] 
-	dst[127:112] := src2[63:48] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpunpcklwd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpacklo_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpunpckldq' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' rettype='__m256i' name='_mm256_unpacklo_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Swizzle</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpunpcklqdq' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128d' name='_mm_fmadd_pd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname='c' type='__m128d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132pd' form='xmm, xmm, xmm'/>
-	<instruction name='vfmadd213pd' form='xmm, xmm, xmm'/>
-	<instruction name='vfmadd231pd' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m256d' name='_mm256_fmadd_pd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<parameter varname='c' type='__m256d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vfmadd132pd' form='ymm, ymm, ymm'/>
-	<instruction name='vfmadd213pd' form='ymm, ymm, ymm'/>
-	<instruction name='vfmadd231pd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128' name='_mm_fmadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname='c' type='__m128'/>
-		<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132ps' form='xmm, xmm, xmm'/>
-	<instruction name='vfmadd213ps' form='xmm, xmm, xmm'/>
-	<instruction name='vfmadd231ps' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m256' name='_mm256_fmadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<parameter varname='c' type='__m256'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vfmadd132ps' form='ymm, ymm, ymm'/>
-	<instruction name='vfmadd213ps' form='ymm, ymm, ymm'/>
-	<instruction name='vfmadd231ps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128d' name='_mm_fmadd_sd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname='c' type='__m128d'/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	</description>
-	<operation>
-dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132sd' form='xmm, xmm, xmm'/>
-	<instruction name='vfmadd213sd' form='xmm, xmm, xmm'/>
-	<instruction name='vfmadd231sd' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128' name='_mm_fmadd_ss'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname='c' type='__m128'/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	</description>
-	<operation>
-dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132ss' form='xmm, xmm, xmm'/>
-	<instruction name='vfmadd213ss' form='xmm, xmm, xmm'/>
-	<instruction name='vfmadd231ss' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128d' name='_mm_fmaddsub_pd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname='c' type='__m128d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF (j is even) 
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmaddsub132pd' form='xmm, xmm, xmm'/>
-	<instruction name='vfmaddsub213pd' form='xmm, xmm, xmm'/>
-	<instruction name='vfmaddsub231pd' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m256d' name='_mm256_fmaddsub_pd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<parameter varname='c' type='__m256d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF (j is even) 
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vfmaddsub132pd' form='ymm, ymm, ymm'/>
-	<instruction name='vfmaddsub213pd' form='ymm, ymm, ymm'/>
-	<instruction name='vfmaddsub231pd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128' name='_mm_fmaddsub_ps'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname='c' type='__m128'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF (j is even) 
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmaddsub132ps' form='xmm, xmm, xmm'/>
-	<instruction name='vfmaddsub213ps' form='xmm, xmm, xmm'/>
-	<instruction name='vfmaddsub231ps' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m256' name='_mm256_fmaddsub_ps'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<parameter varname='c' type='__m256'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF (j is even) 
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vfmaddsub132ps' form='ymm, ymm, ymm'/>
-	<instruction name='vfmaddsub213ps' form='ymm, ymm, ymm'/>
-	<instruction name='vfmaddsub231ps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128d' name='_mm_fmsub_pd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname='c' type='__m128d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132pd' form='xmm, xmm, xmm'/>
-	<instruction name='vfmsub213pd' form='xmm, xmm, xmm'/>
-	<instruction name='vfmsub231pd' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m256d' name='_mm256_fmsub_pd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<parameter varname='c' type='__m256d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vfmsub132pd' form='ymm, ymm, ymm'/>
-	<instruction name='vfmsub213pd' form='ymm, ymm, ymm'/>
-	<instruction name='vfmsub231pd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128' name='_mm_fmsub_ps'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname='c' type='__m128'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132ps' form='xmm, xmm, xmm'/>
-	<instruction name='vfmsub213ps' form='xmm, xmm, xmm'/>
-	<instruction name='vfmsub231ps' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m256' name='_mm256_fmsub_ps'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<parameter varname='c' type='__m256'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vfmsub132ps' form='ymm, ymm, ymm'/>
-	<instruction name='vfmsub213ps' form='ymm, ymm, ymm'/>
-	<instruction name='vfmsub231ps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128d' name='_mm_fmsub_sd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname='c' type='__m128d'/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132sd' form='xmm, xmm, xmm'/>
-	<instruction name='vfmsub213sd' form='xmm, xmm, xmm'/>
-	<instruction name='vfmsub231sd' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128' name='_mm_fmsub_ss'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname='c' type='__m128'/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132ss' form='xmm, xmm, xmm'/>
-	<instruction name='vfmsub213ss' form='xmm, xmm, xmm'/>
-	<instruction name='vfmsub231ss' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128d' name='_mm_fmsubadd_pd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname='c' type='__m128d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF (j is even) 
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsubadd132pd' form='xmm, xmm, xmm'/>
-	<instruction name='vfmsubadd213pd' form='xmm, xmm, xmm'/>
-	<instruction name='vfmsubadd231pd' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m256d' name='_mm256_fmsubadd_pd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<parameter varname='c' type='__m256d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF (j is even) 
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vfmsubadd132pd' form='ymm, ymm, ymm'/>
-	<instruction name='vfmsubadd213pd' form='ymm, ymm, ymm'/>
-	<instruction name='vfmsubadd231pd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128' name='_mm_fmsubadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname='c' type='__m128'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF (j is even) 
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsubadd132ps' form='xmm, xmm, xmm'/>
-	<instruction name='vfmsubadd213ps' form='xmm, xmm, xmm'/>
-	<instruction name='vfmsubadd231ps' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m256' name='_mm256_fmsubadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<parameter varname='c' type='__m256'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF (j is even) 
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vfmsubadd132ps' form='ymm, ymm, ymm'/>
-	<instruction name='vfmsubadd213ps' form='ymm, ymm, ymm'/>
-	<instruction name='vfmsubadd231ps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128d' name='_mm_fnmadd_pd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname='c' type='__m128d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132pd' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmadd213pd' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmadd231pd' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m256d' name='_mm256_fnmadd_pd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<parameter varname='c' type='__m256d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vfnmadd132pd' form='ymm, ymm, ymm'/>
-	<instruction name='vfnmadd213pd' form='ymm, ymm, ymm'/>
-	<instruction name='vfnmadd231pd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128' name='_mm_fnmadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname='c' type='__m128'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	a[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132ps' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmadd213ps' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmadd231ps' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m256' name='_mm256_fnmadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<parameter varname='c' type='__m256'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	a[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vfnmadd132ps' form='ymm, ymm, ymm'/>
-	<instruction name='vfnmadd213ps' form='ymm, ymm, ymm'/>
-	<instruction name='vfnmadd231ps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128d' name='_mm_fnmadd_sd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname='c' type='__m128d'/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132sd' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmadd213sd' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmadd231sd' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128' name='_mm_fnmadd_ss'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname='c' type='__m128'/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132ss' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmadd213ss' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmadd231ss' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128d' name='_mm_fnmsub_pd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname='c' type='__m128d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132pd' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmsub213pd' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmsub231pd' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m256d' name='_mm256_fnmsub_pd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<parameter varname='c' type='__m256d'/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vfnmsub132pd' form='ymm, ymm, ymm'/>
-	<instruction name='vfnmsub213pd' form='ymm, ymm, ymm'/>
-	<instruction name='vfnmsub231pd' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128' name='_mm_fnmsub_ps'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname='c' type='__m128'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132ps' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmsub213ps' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmsub231ps' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m256' name='_mm256_fnmsub_ps'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<parameter varname='c' type='__m256'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vfnmsub132ps' form='ymm, ymm, ymm'/>
-	<instruction name='vfnmsub213ps' form='ymm, ymm, ymm'/>
-	<instruction name='vfnmsub231ps' form='ymm, ymm, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128d' name='_mm_fnmsub_sd'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<parameter varname='c' type='__m128d'/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132sd' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmsub213sd' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmsub231sd' form='xmm, xmm, xmm'/>
-	<perfdata arch='Haswell' lat='5' tpt='0.5'/> b
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='FMA' rettype='__m128' name='_mm_fnmsub_ss'>
-	<type>Floating Point</type>
-	<CPUID>FMA</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<parameter varname='c' type='__m128'/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132ss' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmsub213ss' form='xmm, xmm, xmm'/>
-	<instruction name='vfnmsub231ss' form='xmm, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_bextr_u32'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned int' varname='a' />
-	<parameter type='unsigned int' varname='start' />
-	<parameter type='unsigned int' varname='len' />
-	<description>Extract contiguous bits from unsigned 32-bit integer "a", and store the result in "dst". Extract the number of bits specified by "len", starting at the bit specified by "start".</description>
-	<operation>
-tmp := ZERO_EXTEND_TO_512(a)
-dst := ZERO_EXTEND(tmp[start+len-1:start])
-	</operation>
-	<instruction name='bextr' form='r32, r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_bextr2_u32'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned int' varname='a' />
-	<parameter type='unsigned int' varname='control' />
-	<description>Extract contiguous bits from unsigned 32-bit integer "a", and store the result in "dst". Extract the number of bits specified by bits 15:8 of "control", starting at the bit specified by bits 0:7 of "control".</description>
-	<operation>
-start := control[7:0]
-len := control[15:8]
-tmp := ZERO_EXTEND_TO_512(a)
-dst := ZERO_EXTEND(tmp[start+len-1:start])
-	</operation>
-	<instruction name='bextr' form='r32, r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_bextr_u64'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned __int64' varname='a' />
-	<parameter type='unsigned int' varname='start' />
-	<parameter type='unsigned int' varname='len' />
-	<description>Extract contiguous bits from unsigned 64-bit integer "a", and store the result in "dst". Extract the number of bits specified by "len", starting at the bit specified by "start".</description>
-	<operation>
-tmp := ZERO_EXTEND_TO_512(a)
-dst := ZERO_EXTEND(tmp[start+len-1:start])
-	</operation>
-	<instruction name='bextr' form='r64, r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_bextr2_u64'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned __int64' varname='a' />
-	<parameter type='unsigned __int64' varname='control' />
-	<description>Extract contiguous bits from unsigned 64-bit integer "a", and store the result in "dst". Extract the number of bits specified by bits 15:8 of "control", starting at the bit specified by bits 0:7 of "control"..</description>
-	<operation>
-start := control[7:0]
-len := control[15:8]
-tmp := ZERO_EXTEND_TO_512(a)
-dst := ZERO_EXTEND(tmp[start+len-1:start])
-	</operation>
-	<instruction name='bextr' form='r64, r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_blsi_u32'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned int' varname='a' />
-	<description>Extract the lowest set bit from unsigned 32-bit integer "a" and set the corresponding bit in "dst". All other bits in "dst" are zeroed, and all bits are zeroed if no bits are set in "a".</description>
-	<operation>
-dst := (-a) BITWISE AND a
-	</operation>
-	<instruction name='blsi' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_blsi_u64'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned __int64' varname='a' />
-	<description>Extract the lowest set bit from unsigned 64-bit integer "a" and set the corresponding bit in "dst". All other bits in "dst" are zeroed, and all bits are zeroed if no bits are set in "a".</description>
-	<operation>
-dst := (-a) BITWISE AND a
-	</operation>
-	<instruction name='blsi' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_blsmsk_u32'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned int' varname='a' />
-	<description>Set all the lower bits of "dst" up to and including the lowest set bit in unsigned 32-bit integer "a".</description>
-	<operation>
-dst := (a - 1) XOR a
-	</operation>
-	<instruction name='blsmsk' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_blsmsk_u64'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned __int64' varname='a' />
-	<description>Set all the lower bits of "dst" up to and including the lowest set bit in unsigned 64-bit integer "a".</description>
-	<operation>
-dst := (a - 1) XOR a
-	</operation>
-	<instruction name='blsmsk' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_blsr_u32'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned int' varname='a' />
-	<description>Copy all bits from unsigned 32-bit integer "a" to "dst", and reset (set to 0) the bit in "dst" that corresponds to the lowest set bit in "a".</description>
-	<operation>
-dst := (a - 1) BITWISE AND a
-	</operation>
-	<instruction name='blsr' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_blsr_u64'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned __int64' varname='a' />
-	<description>Copy all bits from unsigned 64-bit integer "a" to "dst", and reset (set to 0) the bit in "dst" that corresponds to the lowest set bit in "a".</description>
-	<operation>
-dst := (a - 1) BITWISE AND a
-	</operation>
-	<instruction name='blsr' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_bzhi_u32'>
-	<type>Integer</type>
-	<CPUID>BMI2</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned int' varname='a' />
-	<parameter type='unsigned int' varname='index' />
-	<description>Copy all bits from unsigned 32-bit integer "a" to "dst", and reset (set to 0) the high bits in "dst" starting at "index".</description>
-	<operation>
-n := index[7:0]
-dst := a
-IF (n &lt; 32)
-	dst[31:n] := 0
-FI
-	</operation>
-	<instruction name='bzhi' form='r32, r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_bzhi_u64'>
-	<type>Integer</type>
-	<CPUID>BMI2</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned __int64' varname='a' />
-	<parameter type='unsigned int' varname='index' />
-	<description>Copy all bits from unsigned 64-bit integer "a" to "dst", and reset (set to 0) the high bits in "dst" starting at "index".</description>
-	<operation>
-n := index[7:0]
-dst := a
-IF (n &lt; 64)
-	dst[63:n] := 0
-FI
-	</operation>
-	<instruction name='bzhi' form='r64, r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_invpcid'>
-	<CPUID>INVPCID</CPUID>
-	<category>OS-Targeted</category>
-	<parameter type='unsigned int' varname='type' />
-	<parameter type='void*' varname='descriptor' />
-	<description>
-	Invalidate mappings in the Translation Lookaside Buffers (TLBs) and paging-structure caches for the processor context identifier (PCID) specified by "descriptor" based on the invalidation type specified in "type". 
-	The PCID "descriptor" is specified as a 16-byte memory operand (with no alignment restrictions) where bits [11:0] specify the PCID, and bits [127:64] specify the linear address; bits [63:12] are reserved.
-	The types supported are:
-		0) Individual-address invalidation: If "type" is 0, the logical processor invalidates mappings for a single linear address and tagged with the PCID specified in "descriptor", except global translations. The instruction may also invalidate global translations, mappings for other linear addresses, or mappings tagged with other PCIDs.
-		1) Single-context invalidation: If "type" is 1, the logical processor invalidates all mappings tagged with the PCID specified in "descriptor" except global translations. In some cases, it may invalidate mappings for other PCIDs as well.
-		2) All-context invalidation: If "type" is 2, the logical processor invalidates all mappings tagged with any PCID.
-		3) All-context invalidation, retaining global translations: If "type" is 3, the logical processor invalidates all mappings tagged with any PCID except global translations, ignoring "descriptor". The instruction may also invalidate global translations as well.
-	</description>
-	<operation>
-CASE type OF
-0: // individual-address invalidation retaining global translations
-	OP_PCID := descriptor[11:0]
-	ADDR := descriptor[127:64]
-	BREAK
-1: // single PCID invalidation retaining globals
-	OP_PCID := descriptor[11:0]
-	// invalidate all mappings tagged with OP_PCID except global translations
-	BREAK
-2: // all PCID invalidation
-	// invalidate all mappings tagged with any PCID
-	BREAK
-3: // all PCID invalidation retaining global translations
-	// invalidate all mappings tagged with any PCID except global translations
-	BREAK
-ESAC
-	</operation>
-	<instruction name='invpcid' form='r32, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_lzcnt_u32'>
-	<type>Integer</type>
-	<CPUID>LZCNT</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned int' varname='a' />
-	<description>Count the number of leading zero bits in unsigned 32-bit integer "a", and return that count in "dst".</description>
-	<operation>
-tmp := 31
-dst := 0
-DO WHILE (tmp &gt;= 0 AND a[tmp] = 0)
-	tmp := tmp - 1
-	dst := dst + 1
-OD	
-	</operation>
-	<instruction name='lzcnt' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_lzcnt_u64'>
-	<type>Integer</type>
-	<CPUID>LZCNT</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned __int64' varname='a' />
-	<description>Count the number of leading zero bits in unsigned 64-bit integer "a", and return that count in "dst".</description>
-	<operation>
-tmp := 63
-dst := 0
-DO WHILE (tmp &gt;= 0 AND a[tmp] = 0)
-	tmp := tmp - 1
-	dst := dst + 1
-OD	
-	</operation>
-	<instruction name='lzcnt' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_pdep_u32'>
-	<type>Integer</type>
-	<CPUID>BMI2</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned int' varname='a' />
-	<parameter type='unsigned int' varname='mask' />
-	<description>Deposit contiguous low bits from unsigned 32-bit integer "a" to "dst" at the corresponding bit locations specified by "mask"; all other bits in "dst" are set to zero.</description>
-	<operation>
-tmp := a
-dst := 0
-m := 0
-k := 0
-DO WHILE m &lt; 32
-	IF mask[m] = 1
-		dst[m] := tmp[k]
-		k := k + 1
-	FI
-	m := m + 1
-OD
-	</operation>
-	<instruction name='pdep' form='r32, r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_pdep_u64'>
-	<type>Integer</type>
-	<CPUID>BMI2</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned __int64' varname='a' />
-	<parameter type='unsigned __int64' varname='mask' />
-	<description>Deposit contiguous low bits from unsigned 64-bit integer "a" to "dst" at the corresponding bit locations specified by "mask"; all other bits in "dst" are set to zero.</description>
-	<operation>
-tmp := a
-dst := 0
-m := 0
-k := 0
-DO WHILE m &lt; 64
-	IF mask[m] = 1
-		dst[m] := tmp[k]
-		k := k + 1
-	FI
-	m := m + 1
-OD
-	</operation>
-	<instruction name='pdep' form='r64, r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_pext_u32'>
-	<type>Integer</type>
-	<CPUID>BMI2</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned int' varname='a' />
-	<parameter type='unsigned int' varname='mask' />
-	<description>Extract bits from unsigned 32-bit integer "a" at the corresponding bit locations specified by "mask" to contiguous low bits in "dst"; the remaining upper bits in "dst" are set to zero.</description>
-	<operation>
-tmp := a
-dst := 0
-m := 0
-k := 0
-DO WHILE m &lt; 32
-	IF mask[m] = 1
-		dst[k] := tmp[m]
-		k := k + 1
-	FI
-	m := m + 1
-OD
-	</operation>
-	<instruction name='pext' form='r32, r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_pext_u64'>
-	<type>Integer</type>
-	<CPUID>BMI2</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned __int64' varname='a' />
-	<parameter type='unsigned __int64' varname='mask' />
-	<description>Extract bits from unsigned 64-bit integer "a" at the corresponding bit locations specified by "mask" to contiguous low bits in "dst"; the remaining upper bits in "dst" are set to zero.</description>
-	<operation>
-tmp := a
-dst := 0
-m := 0
-k := 0
-DO WHILE m &lt; 64
-	IF mask[m] = 1
-		dst[k] := tmp[m]
-		k := k + 1
-	FI
-	m := m + 1
-OD
-	</operation>
-	<instruction name='pext' form='r64, r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_andn_u32'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned int' varname='a' />
-	<parameter type='unsigned int' varname='b' />
-	<description>Compute the bitwise NOT of 32-bit integer "a" and then AND with b, and store the results in dst.</description>
-	<operation>
-dst[31:0] := ((NOT a[31:0]) AND b[31:0])
-	</operation>
-	<instruction name='andn' form='r32, r32' />
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_andn_u64'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned __int64' varname='a' />
-	<parameter type='unsigned __int64' varname='b' />
-	<description>Compute the bitwise NOT of 64-bit integer "a" and then AND with b, and store the results in dst.</description>
-	<operation>
-dst[63:0] := ((NOT a[63:0]) AND b[63:0])
-	</operation>
-	<instruction name='andn' form='r64, r64' />
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_mulx_u32'>
-	<type>Integer</type>
-	<CPUID>BMI2</CPUID>
-	<category>Arithmetic</category>
-	<parameter type='unsigned int' varname='a' />
-	<parameter type='unsigned int' varname='b' />
-	<parameter type='unsigned int*' varname='hi' />
-	<description>Multiply unsigned 32-bit integers "a" and "b", store the low 32-bits of the result in "dst", and store the high 32-bits in "hi". This does not read or write arithmetic flags.</description>
-	<operation>
-dst[31:0] := (a * b)[31:0]
-hi[31:0] := (a * b)[63:32]
-	</operation>
-	<instruction name='mulx' form='r32, r32, m32' />
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_mulx_u64'>
-	<type>Integer</type>
-	<CPUID>BMI2</CPUID>
-	<category>Arithmetic</category>
-	<parameter type='unsigned __int64' varname='a' />
-	<parameter type='unsigned __int64' varname='b' />
-	<parameter type='unsigned __int64*' varname='hi' />
-	<description>Multiply unsigned 64-bit integers "a" and "b", store the low 64-bits of the result in "dst", and store the high 64-bits in "hi". This does not read or write arithmetic flags.</description>
-	<operation>
-dst[63:0] := (a * b)[63:0]
-hi[63:0] := (a * b)[127:64]
-	</operation>
-	<instruction name='mulx' form='r64, r64, m64' />
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_tzcnt_u32'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned int' varname='a' />
-	<description>Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst".</description>
-	<operation>
-tmp := 0
-dst := 0
-DO WHILE ((tmp &lt; 32) AND a[tmp] = 0)
-	tmp := tmp + 1
-	dst := dst + 1
-OD	
-	</operation>
-	<instruction name='tzcnt' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_tzcnt_u64'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned __int64' varname='a' />
-	<description>Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst".</description>
-	<operation>
-tmp := 0
-dst := 0
-DO WHILE ((tmp &lt; 64) AND a[tmp] = 0)
-	tmp := tmp + 1
-	dst := dst + 1
-OD	
-	</operation>
-	<instruction name='tzcnt' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='void' name='_xabort'>
-	<CPUID>RTM</CPUID>
-	<category>General Support</category>
-	<parameter type='const unsigned int' varname="imm8" />
-	<description>
-	Force an RTM abort. The EAX register is updated to reflect an XABORT instruction caused the abort, and the "imm8" parameter will be provided in bits [31:24] of EAX.
-	Following an RTM abort, the logical processor resumes execution at the fallback address computed through the outermost XBEGIN instruction. 
-	</description>
-	<operation>
-IF RTM_ACTIVE = 0
-	// nop
-ELSE
-	// restore architectural register state
-	// discard memory updates performed in transaction
-	// update EAX with status and imm8 value
-	RTM_NEST_COUNT := 0
-	RTM_ACTIVE := 0
-	IF 64-bit Mode
-		RIP := fallbackRIP
-	ELSE
-		EIP := fallbackEIP
-	FI
-FI
-	</operation>
-	<instruction name='xabort' form='imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='unsigned int' name='_xbegin'>
-	<CPUID>RTM</CPUID>
-	<category>General Support</category>
-	<parameter type='void' varname='' />
-	<description>
-	Specify the start of an RTM code region. 
-	If the logical processor was not already in transactional execution, then this call causes the logical processor to transition into transactional execution. 
-	On an RTM abort, the logical processor discards all architectural register and memory updates performed during the RTM execution, restores architectural state, and starts execution beginning at the fallback address computed from the outermost XBEGIN instruction. Return status of ~0 (0xFFFF) if continuing inside transaction; all other codes are aborts.
-	</description>
-	<operation>
-IF RTM_NEST_COUNT &lt; MAX_RTM_NEST_COUNT
-	RTM_NEST_COUNT := RTM_NEST_COUNT + 1
-	IF RTM_NEST_COUNT = 1
-		IF 64-bit Mode
-			fallbackRIP := RIP + SignExtend(IMM)
-		ELSE IF 32-bit Mode
-			fallbackEIP := EIP + SignExtend(IMM)
-		ELSE // 16-bit Mode
-			fallbackEIP := (EIP + SignExtend(IMM)) AND 0x0000FFFF
-		FI
-		
-		RTM_ACTIVE := 1
-		// enter RTM execution, record register state, start tracking memory state
-	FI
-ELSE
-	// RTM abort (see _xabort)
-FI			
-	</operation>
-	<instruction name='xbegin' form=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='void' name='_xend'>
-	<CPUID>RTM</CPUID>
-	<category>General Support</category>
-	<parameter type='void' varname='' />
-	<description>
-	Specify the end of an RTM code region.
-	If this corresponds to the outermost scope, the logical processor will attempt to commit the logical processor state atomically. 
-	If the commit fails, the logical processor will perform an RTM abort.
-	</description>
-	<operation>
-IF RTM_ACTIVE = 1
-	RTM_NEST_COUNT := RTM_NEST_COUNT - 1
-	IF RTM_NEST_COUNT = 0
-		// try to commit transaction
-		IF fail to commit transaction
-			// RTM abort (see _xabort)
-		ELSE
-			RTM_ACTIVE = 0
-		FI
-	FI
-FI
-	</operation>
-	<instruction name='xend' form=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='unsigned char' name='_xtest'>
-	<CPUID>RTM</CPUID>
-	<category>General Support</category>
-	<parameter type='void' varname='' />
-	<description>Query the transactional execution status, return 0 if inside a transactionally executing RTM or HLE region, and return 1 otherwise.</description>
-	<operation>
-IF (RTM_ACTIVE = 1 OR HLE_ACTIVE = 1)
-	dst := 0
-ELSE
-	dst := 1
-FI
-	</operation>
-	<instruction name='xtest' form=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='unsigned __int64' name='__rdtscp'>
-	<CPUID>RDTSCP</CPUID>
-	<category>General Support</category>
-	<parameter varname='mem_addr' type='unsigned int *'/>
-	<description>Copy the current 64-bit value of the processor's time-stamp counter into "dst", and store the IA32_TSC_AUX MSR (signature value) into memory at "mem_addr".</description>
-	<operation>
-dst[63:0] := TimeStampCounter
-MEM[mem_addr+31:mem_addr] := IA32_TSC_AUX[31:0]
-	</operation>
-	<instruction name='rdtscp' form=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='unsigned int' name='_rdpid_u32'>
-	<CPUID>RDPID</CPUID>
-	<category>General Support</category>
-	<parameter varname="" type="void"/>
-	<description>Copy the IA32_TSC_AUX MSR (signature value) into "dst".</description>
-	<operation>
-dst[31:0] := IA32_TSC_AUX[31:0]
-	</operation>
-	<instruction name='rdpid' form=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='int' name='_bit_scan_forward'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='int'/>
-	<description>Set "dst" to the index of the lowest set bit in 32-bit integer "a". If no bits are set in "a" then "dst" is undefined.</description>
-	<operation>
-tmp := 0
-IF a = 0
-	dst := undefined
-ELSE
-	DO WHILE ((tmp &lt; 32) AND a[tmp] = 0)
-		tmp := tmp + 1
-		dst := tmp
-	OD
-FI
-	</operation>
-	<instruction name='bsf' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='int' name='_bit_scan_reverse'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='int'/>
-	<description>Set "dst" to the index of the highest set bit in 32-bit integer "a". If no bits are set in "a" then "dst" is undefined.</description>
-	<operation>
-tmp := 31
-IF a = 0
-	dst := undefined
-ELSE
-	DO WHILE ((tmp &gt; 0) AND a[tmp] = 0)
-		tmp := tmp - 1
-		dst := tmp
-	OD
-FI
-	</operation>
-	<instruction name='bsr' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned char' name='_BitScanForward'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='index' type='unsigned __int32*'/>
-	<parameter varname='mask' type='unsigned __int32'/>
-	<description>Set "index" to the index of the lowest set bit in 32-bit integer "mask". If no bits are set in "mask", then set "dst" to 0, otherwise set "dst" to 1.</description>
-	<operation>
-tmp := 0
-IF mask = 0
-	dst := 0
-ELSE
-	DO WHILE ((tmp &lt; 32) AND mask[tmp] = 0)
-		tmp := tmp + 1
-		index := tmp
-		dst := 1
-	OD
-FI
-	</operation>
-	<instruction name='bsf' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned char' name='_BitScanReverse'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='index' type='unsigned __int32*'/>
-	<parameter varname='mask' type='unsigned __int32'/>
-	<description>Set "index" to the index of the highest set bit in 32-bit integer "mask". If no bits are set in "mask", then set "dst" to 0, otherwise set "dst" to 1.</description>
-	<operation>
-tmp := 31
-IF mask = 0
-	dst := 0
-ELSE
-	DO WHILE ((tmp &gt; 0) AND mask[tmp] = 0)
-		tmp := tmp - 1
-		index := tmp
-		dst := 1
-	OD
-FI
-	</operation>
-	<instruction name='bsr' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned char' name='_BitScanForward64'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='index' type='unsigned __int32*'/>
-	<parameter varname='mask' type='unsigned __int64'/>
-	<description>Set "index" to the index of the lowest set bit in 64-bit integer "mask". If no bits are set in "mask", then set "dst" to 0, otherwise set "dst" to 1.</description>
-	<operation>
-tmp := 0
-IF mask = 0
-	dst := 0
-ELSE
-	DO WHILE ((tmp &lt; 64) AND mask[tmp] = 0)
-		tmp := tmp + 1
-		index := tmp
-		dst := 1
-	OD
-FI
-	</operation>
-	<instruction name='bsf' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned char' name='_BitScanReverse64'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='index' type='unsigned __int32*'/>
-	<parameter varname='mask' type='unsigned __int64'/>
-	<description>Set "index" to the index of the highest set bit in 64-bit integer "mask". If no bits are set in "mask", then set "dst" to 0, otherwise set "dst" to 1.</description>
-	<operation>
-tmp := 31
-IF mask = 0
-	dst := 0
-ELSE
-	DO WHILE ((tmp &gt; 0) AND mask[tmp] = 0)
-		tmp := tmp - 1
-		index := tmp
-		dst := 1
-	OD
-FI
-	</operation>
-	<instruction name='bsr' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned char' name='_bittest'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='__int32*'/>
-	<parameter varname='b' type='__int32'/>
-	<description>Return the bit at index "b" of 32-bit integer "a".</description>
-	<operation>
-dst := a[b]
-	</operation>
-	<instruction name='bt' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned char' name='_bittestandcomplement'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='__int32*'/>
-	<parameter varname='b' type='__int32'/>
-	<description>Return the bit at index "b" of 32-bit integer "a", and set that bit to its complement.</description>
-	<operation>
-dst := a[b]
-a[b] := ~a[b]
-	</operation>
-	<instruction name='btc' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned char' name='_bittestandreset'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='__int32*'/>
-	<parameter varname='b' type='__int32'/>
-	<description>Return the bit at index "b" of 32-bit integer "a", and set that bit to zero.</description>
-	<operation>
-dst := a[b]
-a[b] := 0
-	</operation>
-	<instruction name='btr' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned char' name='_bittestandset'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='__int32*'/>
-	<parameter varname='b' type='__int32'/>
-	<description>Return the bit at index "b" of 32-bit integer "a", and set that bit to one.</description>
-	<operation>
-dst := a[b]
-a[b] := 1
-	</operation>
-	<instruction name='bts' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned char' name='_bittest64'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='__int64*'/>
-	<parameter varname='b' type='__int64'/>
-	<description>Return the bit at index "b" of 64-bit integer "a".</description>
-	<operation>
-dst := a[b]
-	</operation>
-	<instruction name='bt' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned char' name='_bittestandcomplement64'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='__int64*'/>
-	<parameter varname='b' type='__int64'/>
-	<description>Return the bit at index "b" of 64-bit integer "a", and set that bit to its complement.</description>
-	<operation>
-dst := a[b]
-a[b] := ~a[b]
-	</operation>
-	<instruction name='btc' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned char' name='_bittestandreset64'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='__int64*'/>
-	<parameter varname='b' type='__int64'/>
-	<description>Return the bit at index "b" of 64-bit integer "a", and set that bit to zero.</description>
-	<operation>
-dst := a[b]
-a[b] := 0
-	</operation>
-	<instruction name='btr' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned char' name='_bittestandset64'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='__int64*'/>
-	<parameter varname='b' type='__int64'/>
-	<description>Return the bit at index "b" of 64-bit integer "a", and set that bit to one.</description>
-	<operation>
-dst := a[b]
-a[b] := 1
-	</operation>
-	<instruction name='bts' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='int' name='_bswap'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='int'/>
-	<description>Reverse the byte order of 32-bit integer "a", and store the result in "dst". This intrinsic is provided for conversion between little and big endian values.</description>
-	<operation>
-dst[7:0] := a[31:24]
-dst[15:8] := a[23:16]
-dst[23:16] := a[15:8]
-dst[31:24] := a[7:0]
-	</operation>
-	<instruction name='bswap' form='r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='__int64' name='_bswap64'>
-	<type>Integer</type>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='__int64'/>
-	<description>Reverse the byte order of 64-bit integer "a", and store the result in "dst". This intrinsic is provided for conversion between little and big endian values.</description>
-	<operation>
-dst[7:0] := a[63:56]
-dst[15:8] := a[55:48]
-dst[23:16] := a[47:40]
-dst[31:24] := a[39:32]
-dst[39:32] := a[31:24]
-dst[47:40] := a[23:16]
-dst[55:48] := a[15:8]
-dst[63:56] := a[7:0]
-	</operation>
-	<instruction name='bswap' form='r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='unsigned __int32' name='_castf32_u32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<category>Cast</category>
-	<parameter varname='a' type='float'/>
-	<description>Cast from type float to type unsigned __int32 without conversion.
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='unsigned __int64' name='_castf64_u64'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<category>Cast</category>
-	<parameter varname='a' type='double'/>
-	<description>Cast from type double to type unsigned __int64 without conversion.
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='float' name='_castu32_f32'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<category>Cast</category>
-	<parameter varname='a' type='unsigned __int32'/>
-	<description>Cast from type unsigned __int32 to type float without conversion.
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='double' name='_castu64_f64'>
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<category>Cast</category>
-	<parameter varname='a' type='unsigned __int64'/>
-	<description>Cast from type unsigned __int64 to type double without conversion.
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_fxrstor'>
-	<CPUID>FXSR</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<description>Reload the x87 FPU, MMX technology, XMM, and MXCSR registers from the 512-byte memory image at "mem_addr". This data should have been written to memory previously using the FXSAVE instruction, and in the same format as required by the operating mode. "mem_addr" must be aligned on a 16-byte boundary.</description>
-	<operation>
-(x87 FPU, MMX, XMM7-XMM0, MXCSR) := Load(MEM[mem_addr])
-	</operation>
-	<instruction name='fxrstor' form='MEMmfpxenv'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_fxrstor64'>
-	<CPUID>FXSR</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<description>Reload the x87 FPU, MMX technology, XMM, and MXCSR registers from the 512-byte memory image at "mem_addr". This data should have been written to memory previously using the FXSAVE64 instruction, and in the same format as required by the operating mode. "mem_addr" must be aligned on a 16-byte boundary.</description>
-	<operation>
-(x87 FPU, MMX, XMM7-XMM0, MXCSR) := Load(MEM[mem_addr])
-	</operation>
-	<instruction name='fxrstor64' form='MEMmfpxenv'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_fxsave'>
-	<CPUID>FXSR</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<description>Save the current state of the x87 FPU, MMX technology, XMM, and MXCSR registers to a 512-byte memory location at "mem_addr". The clayout of the 512-byte region depends on the operating mode. Bytes [511:464] are available for software use and will not be overwritten by the processor.</description>
-	<operation>
-MEM[mem_addr+511*8:mem_addr] := Fxsave(x87 FPU, MMX, XMM7-XMM0, MXCSR)
-	</operation>
-	<instruction name='fxsave' form='MEMmfpxenv'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_fxsave64'>
-	<CPUID>FXSR</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<description>Save the current state of the x87 FPU, MMX technology, XMM, and MXCSR registers to a 512-byte memory location at "mem_addr". The layout of the 512-byte region depends on the operating mode. Bytes [511:464] are available for software use and will not be overwritten by the processor.</description>
-	<operation>
-MEM[mem_addr+511*8:mem_addr] := Fxsave64(x87 FPU, MMX, XMM7-XMM0, MXCSR)
-	</operation>
-	<instruction name='fxsave64' form='MEMmfpxenv'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned long' name='_lrotl'>
-	<type>Integer</type>
-	<category>Shift</category>
-	<parameter varname='a' type='unsigned long'/>
-	<parameter varname='shift' type='int'/>
-	<description>Shift the bits of unsigned 64-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst".</description>
-	<operation>
-dst := a
-count := shift BITWISE AND 63
-DO WHILE (count &gt; 0)
-	tmp[0] := dst[63]
-	dst := (dst &lt;&lt; 1) OR tmp[0]
-	count := count - 1
-OD
-	</operation>
-	<instruction name='rol' form='r64, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned long' name='_lrotr'>
-	<type>Integer</type>
-	<category>Shift</category>
-	<parameter varname='a' type='unsigned long'/>
-	<parameter varname='shift' type='int'/>
-	<description>Shift the bits of unsigned 64-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst".</description>
-	<operation>
-dst := a
-count := shift BITWISE AND 63
-DO WHILE (count &gt; 0)
-	tmp[63] := dst[0]
-	dst := (dst &gt;&gt; 1) OR tmp[63]
-	count := count - 1
-OD
-	</operation>
-	<instruction name='ror' form='r64, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-
-
-
-<intrinsic tech='Other' sequence='true' rettype='void' name='_allow_cpu_features'>
-	<category>General Support</category>
-	<parameter varname='a' type='unsigned __int64'/>
-	<description>Treat the processor-specific feature(s) specified in "a" as available. Multiple features may be OR'd together. See the valid feature flags below:</description>
-	<operation>
-_FEATURE_GENERIC_IA32
-_FEATURE_FPU
-_FEATURE_CMOV
-_FEATURE_MMX
-_FEATURE_FXSAVE
-_FEATURE_SSE
-_FEATURE_SSE2
-_FEATURE_SSE3
-_FEATURE_SSSE3
-_FEATURE_SSE4_1
-_FEATURE_SSE4_2
-_FEATURE_MOVBE
-_FEATURE_POPCNT
-_FEATURE_PCLMULQDQ
-_FEATURE_AES
-_FEATURE_F16C
-_FEATURE_AVX
-_FEATURE_RDRND
-_FEATURE_FMA
-_FEATURE_BMI
-_FEATURE_LZCNT
-_FEATURE_HLE
-_FEATURE_RTM
-_FEATURE_AVX2
-_FEATURE_KNCNI
-_FEATURE_AVX512F
-_FEATURE_ADX
-_FEATURE_RDSEED
-_FEATURE_AVX512ER
-_FEATURE_AVX512PF
-_FEATURE_AVX512CD
-_FEATURE_SHA
-_FEATURE_MPX
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' sequence='true' rettype='int' name='_may_i_use_cpu_feature'>
-	<category>General Support</category>
-	<parameter varname='a' type='unsigned __int64'/>
-	<description>Dynamically query the processor to determine if the processor-specific feature(s) specified in "a" are available, and return true or false (1 or 0) if the set of features is available. Multiple features may be OR'd together. This intrinsic does not check the processor vendor. See the valid feature flags below:</description>
-	<operation>
-_FEATURE_GENERIC_IA32
-_FEATURE_FPU
-_FEATURE_CMOV
-_FEATURE_MMX
-_FEATURE_FXSAVE
-_FEATURE_SSE
-_FEATURE_SSE2
-_FEATURE_SSE3
-_FEATURE_SSSE3
-_FEATURE_SSE4_1
-_FEATURE_SSE4_2
-_FEATURE_MOVBE
-_FEATURE_POPCNT
-_FEATURE_PCLMULQDQ
-_FEATURE_AES
-_FEATURE_F16C
-_FEATURE_AVX
-_FEATURE_RDRND
-_FEATURE_FMA
-_FEATURE_BMI
-_FEATURE_LZCNT
-_FEATURE_HLE
-_FEATURE_RTM
-_FEATURE_AVX2
-_FEATURE_KNCNI
-_FEATURE_AVX512F
-_FEATURE_ADX
-_FEATURE_RDSEED
-_FEATURE_AVX512ER
-_FEATURE_AVX512PF
-_FEATURE_AVX512CD
-_FEATURE_SHA
-_FEATURE_MPX
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_acos_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ACOS(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_acos_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ACOS(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_acosh_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ACOSH(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_acosh_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ACOSH(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_asin_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ASIN(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_asin_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ASIN(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_asinh_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ASINH(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_asinh_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ASINH(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_atan_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ATAN(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_atan_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ATAN(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_atan2_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ATAN(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_atan2_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ATAN(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_atanh_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ATANH(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_atanh_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ATANH(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_cbrt_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := CubeRoot(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cbrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := CubeRoot(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_cdfnorm_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := CDFNormal(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cdfnorm_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := CDFNormal(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_cdfnorminv_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := InverseCDFNormal(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cdfnorminv_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := InverseCDFNormal(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cexp_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the exponential value of "e" raised to the power of packed complex single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := e^(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_clog_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the natural logarithm of packed complex single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ln(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_cos_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := COS(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cos_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := COS(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_cosd_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := COSD(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cosd_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := COSD(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_cosh_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := COSH(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_cosh_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := COSH(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_csqrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the square root of packed complex single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := SQRT(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 8*j
-	dst[i+7:i] := TRUNCATE(a[i+7:i] / b[i+7:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	dst[i+15:i] := TRUNCATE(a[i+15:i] / b[i+15:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	dst[i+63:i] := TRUNCATE(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epu8'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 8*j
-	dst[i+7:i] := TRUNCATE(a[i+7:i] / b[i+7:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epu16'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	dst[i+15:i] := TRUNCATE(a[i+15:i] / b[i+15:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epu32'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_div_epu64'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	dst[i+63:i] := TRUNCATE(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_erf_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ERF(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_erf_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ERF(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_erfc_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := 1.0 - ERF(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_erfc_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := 1.0 - ERF(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_erfcinv_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i]))
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_erfcinv_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := 1.0 / (1.0 - ERF(a[i+31:i]))
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_erfinv_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := 1.0 / ERF(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_erfinv_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := 1.0 / ERF(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_exp_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := e^(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_exp_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := e^(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_exp10_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := 10^(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_exp10_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := 10^(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_exp2_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := 2^(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_exp2_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := 2^(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_expm1_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := e^(a[i+63:i]) - 1.0
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_expm1_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := e^(a[i+31:i]) - 1.0
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_hypot_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := SQRT(a[i+63:i]^2 + b[i+63:i]^2)
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_hypot_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := SQRT(a[i+31:i]^2 + b[i+31:i]^2)
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_idiv_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_idivrem_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='mem_addr' type='__m128i *'/>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed 32-bit integers into memory at "mem_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-	MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_invcbrt_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := InvCubeRoot(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_invcbrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := InvCubeRoot(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_invsqrt_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := InvSQRT(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_invsqrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := InvSQRT(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_irem_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_log_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ln(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_log_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ln(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_log10_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := log10(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_log10_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := log10(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_log1p_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ln(1.0 + a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_log1p_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ln(1.0 + a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_log2_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := log2(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_log2_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := log2(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_logb_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_logb_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_pow_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<parameter varname='b' type='__m128d'/>
-	<description>Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := (a[i+63:i])^(b[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_pow_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<parameter varname='b' type='__m128'/>
-	<description>Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := (a[i+31:i])^(b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epi8'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 8*j
-	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epi16'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epi64'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epu8'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 8*j
-	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epu16'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epu32'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_rem_epu64'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_sin_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := SIN(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_sin_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := SIN(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_sincos_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='mem_addr' type='__m128d *'/>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := SIN(a[i+63:i])
-	MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_sincos_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='mem_addr' type='__m128 *'/>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := SIN(a[i+31:i])
-	MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_sind_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := SIND(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_sind_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := SIND(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_sinh_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := SINH(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_sinh_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := SINH(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_svml_ceil_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := CEIL(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_svml_ceil_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := CEIL(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_svml_floor_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := FLOOR(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_svml_floor_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := FLOOR(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_svml_round_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ROUND(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_svml_round_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ROUND(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_svml_sqrt_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_pd".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := SQRT(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_svml_sqrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := SQRT(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_tan_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := TAN(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_tan_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := TAN(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_tand_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := TAND(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_tand_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := TAND(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_tanh_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := TANH(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_tanh_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := TANH(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128d' name='_mm_trunc_pd'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := TRUNCATE(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128' name='_mm_trunc_ps'>
-	<type>Floating Point</type>
-	<CPUID>SSE</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := TRUNCATE(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_udiv_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_udivrem_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='mem_addr' type='__m128i *'/>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed unsigned 32-bit integers into memory at "mem_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-	MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m128i' name='_mm_urem_epi32'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m128i'/>
-	<parameter varname='b' type='__m128i'/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_acos_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ACOS(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_acos_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ACOS(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_acosh_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ACOSH(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_acosh_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ACOSH(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_asin_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ASIN(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_asin_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ASIN(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_asinh_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ASINH(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_asinh_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ASINH(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_atan_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ATAN(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_atan_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ATAN(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_atan2_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ATAN(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_atan2_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ATAN(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_atanh_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ATANH(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_atanh_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ATANH(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_cbrt_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := CubeRoot(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cbrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := CubeRoot(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_cdfnorm_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := CDFNormal(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cdfnorm_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := CDFNormal(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_cdfnorminv_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := InverseCDFNormal(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cdfnorminv_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := InverseCDFNormal(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cexp_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the exponential value of "e" raised to the power of packed complex single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := e^(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_clog_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the natural logarithm of packed complex single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ln(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_cos_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := COS(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cos_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := COS(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_cosd_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := COSD(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cosd_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := COSD(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_cosh_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := COSH(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_cosh_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := COSH(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_csqrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the square root of packed complex single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := SQRT(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := 8*j
-	dst[i+7:i] := TRUNCATE(a[i+7:i] / b[i+7:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	dst[i+15:i] := TRUNCATE(a[i+15:i] / b[i+15:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	dst[i+63:i] := TRUNCATE(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epu8'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := 8*j
-	dst[i+7:i] := TRUNCATE(a[i+7:i] / b[i+7:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epu16'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	dst[i+15:i] := TRUNCATE(a[i+15:i] / b[i+15:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epu32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_div_epu64'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	dst[i+63:i] := TRUNCATE(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_erf_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ERF(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_erf_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ERF(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_erfc_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := 1.0 - ERF(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_erfc_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := 1.0 - ERF(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_erfcinv_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i]))
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_erfcinv_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := 1.0 / (1.0 - ERF(a[i+31:i]))
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_erfinv_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := 1.0 / ERF(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_erfinv_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := 1.0 / ERF(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_exp_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := e^(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_exp_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := e^(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_exp10_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := 10^(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_exp10_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := 10^(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_exp2_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := 2^(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_exp2_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := 2^(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_expm1_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := e^(a[i+63:i]) - 1.0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_expm1_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := e^(a[i+31:i]) - 1.0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_hypot_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := SQRT(a[i+63:i]^2 + b[i+63:i]^2)
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_hypot_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := SQRT(a[i+31:i]^2 + b[i+31:i]^2)
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_idiv_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_idivrem_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='mem_addr' type='__m256i *'/>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed 32-bit integers into memory at "mem_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-	MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_invcbrt_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := InvCubeRoot(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_invcbrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := InvCubeRoot(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_invsqrt_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := InvSQRT(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_invsqrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := InvSQRT(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_irem_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_log_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ln(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_log_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ln(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_log10_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := log10(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_log10_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := log10(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_log1p_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ln(1.0 + a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_log1p_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ln(1.0 + a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_log2_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := log2(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_log2_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := log2(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_logb_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_logb_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_pow_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<parameter varname='b' type='__m256d'/>
-	<description>Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := (a[i+63:i])^(b[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_pow_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='b' type='__m256'/>
-	<description>Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := (a[i+31:i])^(b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epi8'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := 8*j
-	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epi16'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epu8'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := 8*j
-	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epu16'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epu32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_rem_epu64'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_sin_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := SIN(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_sin_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := SIN(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_sincos_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='mem_addr' type='__m256d *'/>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := SIN(a[i+63:i])
-	MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_sincos_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='mem_addr' type='__m256 *'/>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := SIN(a[i+31:i])
-	MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_sind_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := SIND(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_sind_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := SIND(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_sinh_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := SINH(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_sinh_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := SINH(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_svml_ceil_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := CEIL(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_svml_ceil_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := CEIL(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_svml_floor_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := FLOOR(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_svml_floor_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := FLOOR(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_svml_round_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ROUND(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_svml_round_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ROUND(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_svml_sqrt_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_pd".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := SQRT(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_svml_sqrt_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := SQRT(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_tan_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := TAN(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_tan_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := TAN(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_tand_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := TAND(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_tand_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := TAND(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_tanh_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := TANH(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_tanh_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := TANH(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256d' name='_mm256_trunc_pd'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := TRUNCATE(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256' name='_mm256_trunc_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := TRUNCATE(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_udiv_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_udivrem_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='mem_addr' type='__m256i *'/>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed unsigned 32-bit integers into memory at "mem_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-	MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='SVML' sequence='true' rettype='__m256i' name='_mm256_urem_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='__m256i'/>
-	<parameter varname='b' type='__m256i'/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='int' name='_popcnt32'>
-	<type>Integer</type>
-	<CPUID>POPCNT</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='int'/>
-	<description>
-		Count the number of bits set to 1 in 32-bit integer "a", and return that count in "dst". 
-	</description>
-	<operation>
-dst := 0
-FOR i := 0 to 31
-	IF a[i]
-		dst := dst + 1
-	FI
-ENDFOR
-	</operation>
-	<instruction name='popcnt' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='int' name='_popcnt64'>
-	<type>Integer</type>
-	<CPUID>POPCNT</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname='a' type='__int64'/>
-	<description>
-		Count the number of bits set to 1 in 64-bit integer "a", and return that count in "dst". 
-	</description>
-	<operation>
-dst := 0
-FOR i := 0 to 63
-	IF a[i]
-		dst := dst + 1
-	FI
-ENDFOR
-	</operation>
-	<instruction name='popcnt' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='__int64' name='_rdpmc'>
-	<category>General Support</category>
-	<parameter varname='a' type='int'/>
-	<description>Read the Performance Monitor Counter (PMC) specified by "a", and store up to 64-bits in "dst". The width of performance counters is implementation specific.</description>
-	<operation>
-dst[63:0] := ReadPMC(a)
-	</operation>
-	<instruction name='rdpmc' form=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='__int64' name='_rdtsc'>
-	<CPUID>TSC</CPUID>
-	<category>General Support</category>
-	<parameter varname='' type='void'/>
-	<description>Copy the current 64-bit value of the processor's time-stamp counter into "dst".</description>
-	<operation>
-dst[63:0] := TimeStampCounter
-	</operation>
-	<instruction name='rdtsc' form=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_rotl'>
-	<type>Integer</type>
-	<category>Shift</category>
-	<parameter varname='a' type='unsigned int'/>
-	<parameter varname='shift' type='int'/>
-	<description>Shift the bits of unsigned 32-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst".</description>
-	<operation>
-dst := a
-count := shift BITWISE AND 31
-DO WHILE (count &gt; 0)
-	tmp[0] := dst[31]
-	dst := (dst &lt;&lt; 1) OR tmp[0]
-	count := count - 1
-OD
-	</operation>
-	<instruction name='rol' form='r32, int'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_rotr'>
-	<type>Integer</type>
-	<category>Shift</category>
-	<parameter varname='a' type='unsigned int'/>
-	<parameter varname='shift' type='int'/>
-	<description>Shift the bits of unsigned 32-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst".</description>
-	<operation>
-dst := a
-count := shift BITWISE AND 31
-DO WHILE (count &gt; 0)
-	tmp[31] := dst[0]
-	dst := (dst &gt;&gt; 1) OR tmp[31]
-	count := count - 1
-OD
-	</operation>
-	<instruction name='ror' form='r32, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned short' name='_rotwl'>
-	<type>Integer</type>
-	<category>Shift</category>
-	<parameter varname='a' type='unsigned short'/>
-	<parameter varname='shift' type='int'/>
-	<description>Shift the bits of unsigned 16-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst".</description>
-	<operation>
-dst := a
-count := shift BITWISE AND 15
-DO WHILE (count &gt; 0)
-	tmp[0] := dst[15]
-	dst := (dst &lt;&lt; 1) OR tmp[0]
-	count := count - 1
-OD
-	</operation>
-	<instruction name='rol' form='r16, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned short' name='_rotwr'>
-	<type>Integer</type>
-	<category>Shift</category>
-	<parameter varname='a' type='unsigned short'/>
-	<parameter varname='shift' type='int'/>
-	<description>Shift the bits of unsigned 16-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst".</description>
-	<operation>
-dst := a
-count := shift BITWISE AND 15
-DO WHILE (count &gt; 0)
-	tmp[15] := dst[0]
-	dst := (dst &gt;&gt; 1) OR tmp[15]
-	count := count - 1
-OD
-	</operation>
-	<instruction name='ror' form='r16, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_rotl64'>
-	<type>Integer</type>
-	<category>Shift</category>
-	<parameter varname='a' type='unsigned __int64'/>
-	<parameter varname='shift' type='int'/>
-	<description>Shift the bits of unsigned 64-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst".</description>
-	<operation>
-dst := a
-count := shift BITWISE AND 63
-DO WHILE (count &gt; 0)
-	tmp[0] := dst[63]
-	dst := (dst &lt;&lt; 1) OR tmp[0]
-	count := count - 1
-OD
-	</operation>
-	<instruction name='rol' form='r64, int'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_rotr64'>
-	<type>Integer</type>
-	<category>Shift</category>
-	<parameter varname='a' type='unsigned __int64'/>
-	<parameter varname='shift' type='int'/>
-	<description>Shift the bits of unsigned 64-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst".</description>
-	<operation>
-dst := a
-count := shift BITWISE AND 63
-DO WHILE (count &gt; 0)
-	tmp[63] := dst[0]
-	dst := (dst &gt;&gt; 1) OR tmp[63]
-	count := count - 1
-OD
-	</operation>
-	<instruction name='ror' form='r64, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_xgetbv'>
-	<CPUID>XSAVE</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='a' type='unsigned int'/>
-	<description>Copy up to 64-bits from the value of the extended control register (XCR) specified by "a" into "dst". Currently only XFEATURE_ENABLED_MASK XCR is supported.</description>
-	<operation>
-dst[63:0] := XCR[a]
-	</operation>
-	<instruction name='xgetbv' form=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_xrstor'>
-	<CPUID>XSAVE</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<parameter varname='rs_mask' type='unsigned __int64'/>
-	<description>Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary.</description>
-	<operation>
-st_mask = mem_addr.HEADER.XSTATE_BV[62:0]
-FOR i := 0 to 62
-	IF (rs_mask[i] AND XCR0[i])
-		IF st_mask[i]
-			CASE (i) OF
-			0: ProcessorState[x87 FPU] := mem_addr.FPUSSESave_Area[FPU]
-			1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE]
-			DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i]
-			ESAC
-		ELSE
-			// ProcessorExtendedState := Processor Supplied Values
-			CASE (i) OF
-			1: MXCSR := mem_addr.FPUSSESave_Area[SSE]
-			ESAC
-		FI
-	FI
-	i := i + 1
-ENDFOR
-	</operation>
-	<instruction name='xrstor' form='MEMmxsave'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_xrstor64'>
-	<CPUID>XSAVE</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<parameter varname='rs_mask' type='unsigned __int64'/>
-	<description>Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary.</description>
-	<operation>
-st_mask = mem_addr.HEADER.XSTATE_BV[62:0]
-FOR i := 0 to 62
-	IF (rs_mask[i] AND XCR0[i])
-		IF st_mask[i]
-			CASE (i) OF
-			0: ProcessorState[x87 FPU] := mem_addr.FPUSSESave_Area[FPU]
-			1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE]
-			DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i]
-			ESAC
-		ELSE
-			// ProcessorExtendedState := Processor Supplied Values
-			CASE (i) OF
-			1: MXCSR := mem_addr.FPUSSESave_Area[SSE]
-			ESAC
-		FI
-	FI
-	i := i + 1
-ENDFOR
-	</operation>
-	<instruction name='xrstor64' form='MEMmxsave'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_xsave'>
-	<CPUID>XSAVE</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<parameter varname='save_mask' type='unsigned __int64'/>
-	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
-	<operation>
-mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
-FOR i := 0 to 62
-	IF mask[i]
-		CASE (i) OF
-		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
-		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
-		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
-		ESAC
-		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
-	FI
-	i := i + 1
-ENDFOR
-	</operation>
-	<instruction name='xsave' form='MEMmxsave'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_xsave64'>
-	<CPUID>XSAVE</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<parameter varname='save_mask' type='unsigned __int64'/>
-	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
-	<operation>
-mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
-FOR i := 0 to 62
-	IF mask[i]
-		CASE (i) OF
-		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
-		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
-		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
-		ESAC
-		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
-	FI
-	i := i + 1
-ENDFOR
-	</operation>
-	<instruction name='xsave64' form='MEMmxsave'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_xsaveopt'>
-	<CPUID>XSAVE</CPUID>
-	<CPUID>XSAVEOPT</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<parameter varname='save_mask' type='unsigned __int64'/>
-	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. The hardware may optimize the manner in which data is saved. The performance of this instruction will be equal to or better than using the XSAVE instruction.</description>
-	<operation>
-mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
-FOR i := 0 to 62
-	IF mask[i]
-		CASE (i) OF
-		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
-		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
-		2: mem_addr.EXT_SAVE_Area2[YMM] := ProcessorState[YMM]
-		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
-		ESAC
-		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
-	FI
-	i := i + 1
-ENDFOR
-	</operation>
-	<instruction name='xsaveopt' form='MEMmxsave'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_xsaveopt64'>
-	<CPUID>XSAVE</CPUID>
-	<CPUID>XSAVEOPT</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<parameter varname='save_mask' type='unsigned __int64'/>
-	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. The hardware may optimize the manner in which data is saved. The performance of this instruction will be equal to or better than using the XSAVE64 instruction.</description>
-	<operation>
-mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
-FOR i := 0 to 62
-	IF mask[i]
-		CASE (i) OF
-		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
-		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
-		2: mem_addr.EXT_SAVE_Area2[YMM] := ProcessorState[YMM]
-		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
-		ESAC
-		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
-	FI
-	i := i + 1
-ENDFOR
-	</operation>
-	<instruction name='xsaveopt64' form='MEMmxsave'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_xsetbv'>
-	<CPUID>XSAVE</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='a' type='unsigned int'/>
-	<parameter varname='val' type='unsigned __int64'/>
-	<description>Copy 64-bits from "val" to the extended control register (XCR) specified by "a". Currently only XFEATURE_ENABLED_MASK XCR is supported.</description>
-	<operation>
-XCR[a] := val[63:0]
-	</operation>
-	<instruction name='xsetbv' form=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-	
-	
-
-<intrinsic tech='Other' rettype='__m128i' name='_mm_loadu_si32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='void const*'/>
-	<description>Load unaligned 32-bit integer from memory into the first element of "dst".</description>
-	<operation>
-dst[31:0] := MEM[mem_addr+31:mem_addr]
-dst[MAX:32] := 0
-	</operation>
-	<instruction name='movd' form='xmm, m32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='void' name='_mm_storeu_si32'>
-	<type>Integer</type>
-	<CPUID>SSE2</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='void*'/>
-	<parameter varname='a' type='__m128i'/>
-	<description>Store 32-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[mem_addr+31:mem_addr] := a[31:0]
-	</operation>
-	<instruction name='movd' form='m32, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='void' name='_mm_storeu_si16' sequence='true'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='void*'/>
-	<parameter varname='a' type='__m128i'/>
-	<description>Store 16-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[mem_addr+15:mem_addr] := a[15:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='__m128i' name='_mm_loadu_si64'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='void const*'/>
-	<description>Load unaligned 64-bit integer from memory into the first element of "dst".</description>
-	<operation>
-dst[63:0] := MEM[mem_addr+63:mem_addr]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='movq' form='xmm, m64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='void' name='_mm_storeu_si64'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Store</category>
-	<parameter varname='mem_addr' type='void*'/>
-	<parameter varname='a' type='__m128i'/>
-	<description>Store 64-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[mem_addr+63:mem_addr] := a[63:0]
-	</operation>
-	<instruction name='movq' form='m64, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='__m128i' name='_mm_loadu_si16' sequence='true'>
-	<type>Integer</type>
-	<CPUID>SSE</CPUID>
-	<category>Load</category>
-	<parameter varname='mem_addr' type='void const*'/>
-	<description>Load unaligned 16-bit integer from memory into the first element of "dst".</description>
-	<operation>
-dst[15:0] := MEM[mem_addr+15:mem_addr]
-dst[MAX:16] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_readfsbase_u32'>
-	<type>Integer</type>
-	<CPUID>FSGSBASE</CPUID>
-	<category>General Support</category>
-	<description>Read the FS segment base register and store the 32-bit result in "dst".</description>
-	<operation>
-dst[31:0] := FS_Segment_Base_Register;
-dst[63:32] := 0
-	</operation>
-	<instruction name='rdfsbase' form='r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_readfsbase_u64'>
-	<type>Integer</type>
-	<CPUID>FSGSBASE</CPUID>
-	<category>General Support</category>
-	<description>Read the FS segment base register and store the 64-bit result in "dst".</description>
-	<operation>
-dst[63:0] := FS_Segment_Base_Register;
-	</operation>
-	<instruction name='rdfsbase' form='r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned int' name='_readgsbase_u32'>
-	<type>Integer</type>
-	<CPUID>FSGSBASE</CPUID>
-	<category>General Support</category>
-	<description>Read the GS segment base register and store the 32-bit result in "dst".</description>
-	<operation>
-dst[31:0] := GS_Segment_Base_Register;
-dst[63:32] := 0
-	</operation>
-	<instruction name='rdgsbase' form='r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='unsigned __int64' name='_readgsbase_u64'>
-	<type>Integer</type>
-	<CPUID>FSGSBASE</CPUID>
-	<category>General Support</category>
-	<description>Read the GS segment base register and store the 64-bit result in "dst".</description>
-	<operation>
-dst[63:0] := GS_Segment_Base_Register;
-	</operation>
-	<instruction name='rdgsbase' form='r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='int' name='_rdrand16_step'>
-	<type>Integer</type>
-	<CPUID>RDRAND</CPUID>
-	<category>Random</category>
-	<parameter varname='val' type='unsigned short*'/>
-	<description>Read a hardware generated 16-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise.
-</description>
-	<operation>
-IF HW_RND_GEN.ready = 1
-	val[15:0] := HW_RND_GEN.data;
-	RETURN 1;
-ELSE
-	val[15:0] := 0;
-	RETURN 0;
-FI
-	</operation>
-	<instruction name='rdrand' form='r16'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='int' name='_rdrand32_step'>
-	<type>Integer</type>
-	<CPUID>RDRAND</CPUID>
-	<category>Random</category>
-	<parameter varname='val' type='unsigned int*'/>
-	<description>Read a hardware generated 32-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise.
-</description>
-	<operation>
-IF HW_RND_GEN.ready = 1
-	val[31:0] := HW_RND_GEN.data;
-	RETURN 1;
-ELSE
-	val[31:0] := 0;
-	RETURN 0;
-FI
-	</operation>
-	<instruction name='rdrand' form='r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='int' name='_rdrand64_step'>
-	<type>Integer</type>
-	<CPUID>RDRAND</CPUID>
-	<category>Random</category>
-	<parameter varname='val' type='unsigned __int64*'/>
-	<description>Read a hardware generated 64-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise.
-</description>
-	<operation>
-IF HW_RND_GEN.ready = 1
-	val[63:0] := HW_RND_GEN.data;
-	RETURN 1;
-ELSE
-	val[63:0] := 0;
-	RETURN 0;
-FI
-	</operation>
-	<instruction name='rdrand' form='r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_writefsbase_u32'>
-	<type>Integer</type>
-	<CPUID>FSGSBASE</CPUID>
-	<category>General Support</category>
-	<parameter varname='a' type='unsigned int'/>
-	<description>Write the unsigned 32-bit integer "a" to the FS segment base register.</description>
-	<operation>
-FS_Segment_Base_Register[31:0] := a[31:0];
-FS_Segment_Base_Register[63:32] := 0
-	</operation>
-	<instruction name='wrfsbase' form='r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_writefsbase_u64'>
-	<type>Integer</type>
-	<CPUID>FSGSBASE</CPUID>
-	<category>General Support</category>
-	<parameter varname='a' type='unsigned __int64'/>
-	<description>Write the unsigned 64-bit integer "a" to the FS segment base register.</description>
-	<operation>
-FS_Segment_Base_Register[63:0] := a[63:0];
-	</operation>
-	<instruction name='wrfsbase' form='r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_writegsbase_u32'>
-	<type>Integer</type>
-	<CPUID>FSGSBASE</CPUID>
-	<category>General Support</category>
-	<parameter varname='a' type='unsigned int'/>
-	<description>Write the unsigned 32-bit integer "a" to the GS segment base register.</description>
-	<operation>
-GS_Segment_Base_Register[31:0] := a[31:0];
-GS_Segment_Base_Register[63:32] := 0
-	</operation>
-	<instruction name='wrgsbase' form='r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_writegsbase_u64'>
-	<type>Integer</type>
-	<CPUID>FSGSBASE</CPUID>
-	<category>General Support</category>
-	<parameter varname='a' type='unsigned __int64'/>
-	<description>Write the unsigned 64-bit integer "a" to the GS segment base register.</description>
-	<operation>
-GS_Segment_Base_Register[63:0] := a[63:0];
-	</operation>
-	<instruction name='wrgsbase' form='r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='__m256' name='_mm256_cvtph_ps'>
-	<type>Floating Point</type>
-	<CPUID>FP16C</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	m := j*16
-	dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtph2ps' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='Other' rettype='__m128i' name='_mm256_cvtps_ph'>
-	<type>Floating Point</type>
-	<CPUID>FP16C</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m256'/>
-	<parameter varname='rounding' type='int'/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 32*j
-	dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtps2ph' form='xmm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="int" name="_rdseed16_step">
-	<CPUID>RDSEED</CPUID>
-	<category>Random</category>
-	<parameter varname="val" type="unsigned short *"/>
-	<description>Read a 16-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise.</description>
-	<operation>
-IF HW_NRND_GEN.ready = 1 THEN
-	val[15:0] := HW_NRND_GEN.data
-	RETURN 1
-ELSE
-	val[15:0] := 0
-	RETURN 0
-FI
-	</operation>
-	<instruction name='rdseed' form='r16'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="int" name="_rdseed32_step">
-	<CPUID>RDSEED</CPUID>
-	<category>Random</category>
-	<parameter varname="val" type="unsigned int *"/>
-	<description>Read a 32-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise.</description>
-	<operation>
-IF HW_NRND_GEN.ready = 1 THEN
-	val[31:0] := HW_NRND_GEN.data
-	RETURN 1
-ELSE
-	val[31:0] := 0
-	RETURN 0
-FI
-	</operation>
-	<instruction name='rdseed' form='r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="int" name="_rdseed64_step">
-	<CPUID>RDSEED</CPUID>
-	<category>Random</category>
-	<parameter varname="val" type="unsigned __int64 *"/>
-	<description>Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise.</description>
-	<operation>
-IF HW_NRND_GEN.ready = 1 THEN
-	val[63:0] := HW_NRND_GEN.data
-	RETURN 1
-ELSE
-	val[63:0] := 0
-	RETURN 0
-FI
-	</operation>
-	<instruction name='rdseed' form='r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="unsigned char" name="_addcarry_u32">
-	<type>Integer</type>
-	<category>Arithmetic</category>
-	<parameter varname="c_in" type="unsigned char"/>
-	<parameter varname="a" type="unsigned int"/>
-	<parameter varname="b" type="unsigned int"/>
-	<parameter varname="out" type="unsigned int *"/>
-	<description>Add unsigned 32-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry flag), and store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
-	<operation>
-dst:out[31:0] := a[31:0] + b[31:0] + c_in;
-	</operation>
-	<instruction name='adc' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="unsigned char" name="_addcarry_u64">
-	<type>Integer</type>
-	<category>Arithmetic</category>
-	<parameter varname="c_in" type="unsigned char"/>
-	<parameter varname="a" type="unsigned __int64"/>
-	<parameter varname="b" type="unsigned __int64"/>
-	<parameter varname="out" type="unsigned __int64 *"/>
-		<description>Add unsigned 64-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry flag), and store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
-	<operation>
-dst:out[63:0] := a[63:0] + b[63:0] + c_in;
-	</operation>
-	<instruction name='adc' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="unsigned char" name="_subborrow_u32">
-	<type>Integer</type>
-	<category>Arithmetic</category>
-	<parameter varname="b_in" type="unsigned char"/>
-	<parameter varname="a" type="unsigned int"/>
-	<parameter varname="b" type="unsigned int"/>
-	<parameter varname="out" type="unsigned int *"/>
-	<description>Add unsigned 8-bit borrow "b_in" (carry flag) to unsigned 32-bit integer "b", and subtract the result from unsigned 32-bit integer "a". Store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
-	<operation>
-dst:out[31:0] := (a[31:0] - (b[31:0] + b_in));
-	</operation>
-	<instruction name='sbb' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="unsigned char" name="_subborrow_u64">
-	<type>Integer</type>
-	<category>Arithmetic</category>
-	<parameter varname="b_in" type="unsigned char"/>
-	<parameter varname="a" type="unsigned __int64"/>
-	<parameter varname="b" type="unsigned __int64"/>
-	<parameter varname="out" type="unsigned __int64 *"/>
-	<description>Add unsigned 8-bit borrow "b_in" (carry flag) to unsigned 64-bit integer "b", and subtract the result from unsigned 64-bit integer "a". Store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
-	<operation>
-dst:out[63:0] := (a[63:0] - (b[63:0] + b_in));
-	</operation>
-	<instruction name='sbb' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="unsigned char" name="_addcarryx_u32">
-	<type>Integer</type>
-	<CPUID>ADX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="c_in" type="unsigned char"/>
-	<parameter varname="a" type="unsigned int"/>
-	<parameter varname="b" type="unsigned int"/>
-	<parameter varname="out" type="unsigned int *"/>
-		<description>Add unsigned 32-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry or overflow flag), and store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
-	<operation>
-dst:out[31:0] := a[31:0] + b[31:0] + c_in;
-	</operation>
-	<instruction name='adcx' form='r32, r32'/>
-	<instruction name='adox' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="unsigned char" name="_addcarryx_u64">
-	<type>Integer</type>
-	<CPUID>ADX</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="c_in" type="unsigned char"/>
-	<parameter varname="a" type="unsigned __int64"/>
-	<parameter varname="b" type="unsigned __int64"/>
-	<parameter varname="out" type="unsigned __int64 *"/>
-	<description>Add unsigned 64-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry or overflow flag), and store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
-	<operation>
-dst:out[63:0] := a[63:0] + b[63:0] + c_in;
-	</operation>
-	<instruction name='adcx' form='r64, r64'/>
-	<instruction name='adox' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="__m128i" name="_mm_sha1msg1_epu32">
-	<type>Integer</type>
-	<CPUID>SHA</CPUID>
-	<category>Cryptography</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Perform an intermediate calculation for the next four SHA1 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst".</description>
-	<operation>
-W0 := a[127:96];
-W1 := a[95:64];
-W2 := a[63:32];
-W3 := a[31:0];
-W4 := b[127:96];
-W5 := b[95:64];
-
-dst[127:96] := W2 XOR W0;
-dst[95:64] := W3 XOR W1;
-dst[63:32] := W4 XOR W2;
-dst[31:0] := W5 XOR W3;
-	</operation>
-	<instruction name='sha1msg1' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="__m128i" name="_mm_sha1msg2_epu32">
-	<type>Integer</type>
-	<CPUID>SHA</CPUID>
-	<category>Cryptography</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Perform the final calculation for the next four SHA1 message values (unsigned 32-bit integers) using the intermediate result in "a" and the previous message values in "b", and store the result in "dst".</description>
-	<operation>
-W13 := b[95:64];
-W14 := b[63:32];
-W15 := b[31:0];
-W16 := (a[127:96] XOR W13) &lt;&lt;&lt; 1;
-W17 := (a[95:64] XOR W14) &lt;&lt;&lt; 1;
-W18 := (a[63:32] XOR W15) &lt;&lt;&lt; 1;
-W19 := (a[31:0] XOR W16) &lt;&lt;&lt; 1;
-
-dst[127:96] := W16;
-dst[95:64] := W17;
-dst[63:32] := W18;
-dst[31:0] := W19;
-	</operation>
-	<instruction name='sha1msg2' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="__m128i" name="_mm_sha1nexte_epu32">
-	<type>Integer</type>
-	<CPUID>SHA</CPUID>
-	<category>Cryptography</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Calculate SHA1 state variable E after four rounds of operation from the current SHA1 state variable "a", add that value to the scheduled values (unsigned 32-bit integers) in "b", and store the result in "dst".</description>
-	<operation>
-tmp := (a[127:96] &lt;&lt;&lt; 30);
-dst[127:96] := b[127:96] + tmp;
-dst[95:64] := b[95:64];
-dst[63:32] := b[63:32];
-dst[31:0] := b[31:0];
-	</operation>
-	<instruction name='sha1nexte' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="__m128i" name="_mm_sha1rnds4_epu32">
-	<type>Integer</type>
-	<CPUID>SHA</CPUID>
-	<category>Cryptography</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="func" type="const int"/>
-	<description>Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) from "a" and some pre-computed sum of the next 4 round message values (unsigned 32-bit integers), and state variable E from "b", and store the updated SHA1 state (A,B,C,D) in "dst". "func" contains the logic functions and round constants.</description>
-	<operation>
-IF (func[1:0] = 0) THEN
-	f() := f0(), K := K0;
-ELSE IF (func[1:0] = 1) THEN
-	f() := f1(), K := K1;
-ELSE IF (func[1:0] = 2) THEN
-	f() := f2(), K := K2;
-ELSE IF (func[1:0] = 3) THEN
-	f() := f3(), K := K3;
-FI;
-
-A := a[127:96];
-B := a[95:64];
-C := a[63:32];
-D := a[31:0];
-
-W[0] := b[127:96];
-W[1] := b[95:64];
-W[2] := b[63:32];
-W[3] := b[31:0];
-
-A[1] := f(B, C, D) + (A &lt;&lt;&lt; 5) + W[0] + K;
-B[1] := A;
-C[1] := B &lt;&lt;&lt; 30;
-D[1] := C;
-E[1] := D;
-
-FOR i = 1 to 3
-		A[i+1] := f(B[i], C[i], D[i]) + (A[i] &lt;&lt;&lt; 5) + W[i] + E[i] + K;
-		B[i+1] := A[i];
-		C[i+1] := B[i] &lt;&lt;&lt; 30;
-		D[i+1] := C[i];
-		E[i+1] := D[i];
-ENDFOR;
-
-dst[127:96] := A[4];
-dst[95:64] := B[4];
-dst[63:32] := C[4];
-dst[31:0] := D[4];
-	</operation>
-	<instruction name='sha1rnds4' form='xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="__m128i" name="_mm_sha256msg1_epu32">
-	<type>Integer</type>
-	<CPUID>SHA</CPUID>
-	<category>Cryptography</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Perform an intermediate calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst".</description>
-	<operation>
-W4 := b[31:0];
-W3 := a[127:96];
-W2 := a[95:64];
-W1 := a[63:32];
-W0 := a[31:0];
-
-dst[127:96] := W3 + sigma0(W4);
-dst[95:64] := W2 + sigma0(W3);
-dst[63:32] := W1 + sigma0(W2);
-dst[31:0] := W0 + sigma0(W1);
-	</operation>
-	<instruction name='sha256msg1' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="__m128i" name="_mm_sha256msg2_epu32">
-	<type>Integer</type>
-	<CPUID>SHA</CPUID>
-	<category>Cryptography</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Perform the final calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst"."</description>
-	<operation>
-W14 := b[95:64];
-W15 := b[127:96];
-W16 := a[31:0] + sigma1(W14);
-W17 := a[63:32] + sigma1(W15);
-W18 := a[95:64] + sigma1(W16);
-W19 := a[127:96] + sigma1(W17);
-
-dst[127:96] := W19;
-dst[95:64] := W18;
-dst[63:32] := W17;
-dst[31:0] := W16;
-	</operation>
-	<instruction name='sha256msg2' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="__m128i" name="_mm_sha256rnds2_epu32">
-	<type>Integer</type>
-	<CPUID>SHA</CPUID>
-	<category>Cryptography</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="k" type="__m128i"/>
-	<description>Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from "a", an initial SHA256 state (A,B,E,F) from "b", and a pre-computed sum of the next 2 round message values (unsigned 32-bit integers) and the corresponding round constants from "k", and store the updated SHA256 state (A,B,E,F) in "dst".</description>
-	<operation>
-A[0] := b[127:96];
-B[0] := b[95:64];
-C[0] := a[127:96];
-D[0] := a[95:64];
-E[0] := b[63:32];
-F[0] := b[31:0];
-G[0] := a[63:32];
-H[0] := a[31:0];
-
-W_K0 := k[31:0];
-W_K1 := k[63:32];
-
-FOR i = 0 to 1
-		A_(i+1) := Ch(E[i], F[i], G[i]) + sum1(E[i]) + WKi + H[i] + Maj(A[i], B[i], C[i]) + sum0(A[i]);
-		B_(i+1) := A[i];
-		C_(i+1) := B[i];
-		D_(i+1) := C[i];
-		E_(i+1) := Ch(E[i], F[i], G[i]) + sum1(E[i]) + WKi + H[i] + D[i];
-		F_(i+1) := E[i];
-		G_(i+1) := F[i];
-		H_(i+1) := G[i];
-ENDFOR;
-
-dst[127:96] := A[2];
-dst[95:64] := B[2];
-dst[63:32] := E[2];
-dst[31:0] := F[2];
-	</operation>
-	<instruction name='sha256rnds2' form='xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="void *" name="_bnd_set_ptr_bounds">
-	<CPUID>MPX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="srcmem" type="const void *"/>
-	<parameter varname="size" type="size_t"/>
-	<description>Make a pointer with the value of "srcmem" and bounds set to ["srcmem", "srcmem" + "size" - 1], and store the result in "dst".</description>
-	<operation>
-dst := srcmem;
-dst.LB := srcmem.LB;
-dst.UB := srcmem + size - 1;
-	</operation>
-	<instruction name='bndmk' form='bnd, m32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="void *" name="_bnd_narrow_ptr_bounds" sequence="true">
-	<CPUID>MPX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="q" type="const void *"/>
-	<parameter varname="r" type="const void *"/>
-	<parameter varname="size" type="size_t"/>
-	<description>Narrow the bounds for pointer "q" to the intersection of the bounds of "r" and the bounds ["q", "q" + "size" - 1], and store the result in "dst".</description>
-	<operation>
-dst := q;
-IF r.LB &gt; (q + size - 1) OR r.UB &lt; q THEN
-	dst.LB := 1;
-	dst.UB := 0;
-ELSE
-	dst.LB := MAX(r.LB, q);
-	dst.UB := MIN(r.UB, (q + size - 1));
-FI;
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="void *" name="_bnd_copy_ptr_bounds" sequence="true">
-	<CPUID>MPX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="q" type="const void *"/>
-	<parameter varname="r" type="const void *"/>
-	<description>Make a pointer with the value of "q" and bounds set to the bounds of "r" (e.g. copy the bounds of "r" to pointer "q"), and store the result in "dst".</description>
-	<operation>
-dst := q;
-dst.LB := r.LB;
-dst.UB := r.UB;
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="void *" name="_bnd_init_ptr_bounds" sequence="true">
-	<CPUID>MPX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="q" type="const void *"/>
-	<description>Make a pointer with the value of "q" and open bounds, which allow the pointer to access the entire virtual address space, and store the result in "dst".</description>
-	<operation>
-dst := q;
-dst.LB := 0;
-dst.UB := 0;
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="void" name="_bnd_store_ptr_bounds">
-	<CPUID>MPX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="ptr_addr" type="const void **"/>
-	<parameter varname="ptr_val" type="const void *"/>
-	<description>Stores the bounds of "ptr_val" pointer in memory at address "ptr_addr".</description>
-	<operation>
-MEM[ptr_addr].LB := ptr_val.LB;
-MEM[ptr_addr].UB := ptr_val.UB;
-	</operation>
-	<instruction name='bndstx' form='mib, bnd'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="void" name="_bnd_chk_ptr_lbounds">
-	<CPUID>MPX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="q" type="const void *"/>
-	<description>Checks if "q" is within its lower bound, and throws a #BR if not.</description>
-	<operation>
-IF q &lt; q.LB THEN
-	#BR;
-FI;
-	</operation>
-	<instruction name='bndcl' form='bnd, m32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="void" name="_bnd_chk_ptr_ubounds">
-	<CPUID>MPX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="q" type="const void *"/>
-	<description>Checks if "q" is within its upper bound, and throws a #BR if not.</description>
-	<operation>
-IF q &gt; q.UB THEN
-	#BR;
-FI;
-	</operation>
-	<instruction name='bndcu' form='bnd, m32'/>
-	<instruction name='bndcn' form='bnd, m32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="void" name="_bnd_chk_ptr_bounds">
-	<CPUID>MPX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="q" type="const void *"/>
-	<parameter varname="size" type="size_t"/>
-	<description>Checks if ["q", "q" + "size" - 1] is within the lower and upper bounds of "q" and throws a #BR if not.</description>
-	<operation>
-IF (q + size - 1) &lt; q.LB OR (q + size - 1) &gt; q.UB THEN
-	#BR;
-FI;
-	</operation>
-	<instruction name='bndcu' form='bnd, m32'/>
-	<instruction name='bndcn' form='bnd, m32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="const void *" name="_bnd_get_ptr_lbound" sequence="true">
-	<CPUID>MPX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="q" type="const void *"/>
-	<description>Return the lower bound of "q".</description>
-	<operation>
-dst := q.LB
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype="const void *" name="_bnd_get_ptr_ubound" sequence="true">
-	<CPUID>MPX</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="q" type="const void *"/>
-	<description>Return the upper bound of "q".</description>
-	<operation>
-dst := q.UB
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="short" name="_loadbe_i16" sequence="true">
-	<category>Load</category>
-	<parameter varname="ptr" type="void const *"/>
-	<description>Loads a big-endian word (16-bit) value from address "ptr" and stores the result in "dst".
-	</description>
-	<operation>
-addr := MEM[ptr]
-FOR j := 0 to 1
-	i := j*8
-	dst[i+7:i] := addr[15-i:15-i-7]
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="int" name="_loadbe_i32" sequence="true">
-	<category>Load</category>
-	<parameter varname="ptr" type="void const *"/>
-	<description>Loads a big-endian double word (32-bit) value from address "ptr" and stores the result in "dst".
-	</description>
-	<operation>
-addr := MEM[ptr]
-FOR j := 0 to 4
-	i := j*8
-	dst[i+7:i] := addr[31-i:31-i-7]
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__int64" name="_loadbe_i64" sequence="true">
-	<category>Load</category>
-	<parameter varname="ptr" type="void const *"/>
-	<description>Loads a big-endian quad word (64-bit) value from address "ptr" and stores the result in "dst".
-	</description>
-	<operation>
-addr := MEM[ptr]
-FOR j := 0 to 8
-	i := j*8
-	dst[i+7:i] := addr[63-i:63-i-7]
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_storebe_i16" sequence="true">
-	<category>Store</category>
-	<parameter varname="ptr" type="void *"/>
-	<parameter varname="data" type="short"/>
-	<description>Stores word-sized (16-bit) "data" to address "ptr" in big-endian format.
-	</description>
-	<operation>
-addr := MEM[ptr]
-FOR j := 0 to 1
-	i := j*8
-	addr[i+7:i] := data[15-i:15-i-7]
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_storebe_i32" sequence="true">
-	<category>Store</category>
-	<parameter varname="ptr" type="void *"/>
-	<parameter varname="data" type="int"/>
-	<description>Stores double word-sized (32-bit) "data" to address "ptr" in big-endian format.
-	</description>
-	<operation>
-addr := MEM[ptr]
-FOR j := 0 to 4
-	i := j*8
-	addr[i+7:i] := data[31-i:31-i-7]
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_storebe_i64" sequence="true">
-	<category>Store</category>
-	<parameter varname="ptr" type="void *"/>
-	<parameter varname="data" type="__int64"/>
-	<description>Stores quad word-sized (64-bit) "data" to address "ptr" in big-endian format.
-	</description>
-	<operation>
-addr := MEM[ptr]
-FOR j := 0 to 7
-	i := j*8
-	addr[i+7:i] := data[63-i:63-i-7]
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_xsavec'>
-	<CPUID>XSAVE</CPUID>
-	<CPUID>XSAVEC</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<parameter varname='save_mask' type='unsigned __int64'/>
-	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsavec differs from xsave in that it uses compaction and that it may use init optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
-	<operation>
-mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
-FOR i := 0 to 62
-	IF mask[i]
-		CASE (i) OF
-		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
-		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
-		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
-		ESAC
-		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
-	FI
-	i := i + 1
-ENDFOR
-	</operation>
-	<instruction name='xsavec' form='MEMmxsave'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_xsaves'>
-	<CPUID>XSAVE</CPUID>
-	<CPUID>XSS</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<parameter varname='save_mask' type='unsigned __int64'/>
-	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsaves differs from xsave in that it can save state components corresponding to bits set in IA32_XSS MSR and that it may use the modified optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
-	<operation>
-mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
-FOR i := 0 to 62
-	IF mask[i]
-		CASE (i) OF
-		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
-		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
-		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
-		ESAC
-		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
-	FI
-	i := i + 1
-ENDFOR
-	</operation>
-	<instruction name='xsaves' form='MEMmxsave'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_xsavec64'>
-	<CPUID>XSAVE</CPUID>
-	<CPUID>XSAVEC</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<parameter varname='save_mask' type='unsigned __int64'/>
-	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsavec differs from xsave in that it uses compaction and that it may use init optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
-	<operation>
-mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
-FOR i := 0 to 62
-	IF mask[i]
-		CASE (i) OF
-		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
-		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
-		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
-		ESAC
-		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
-	FI
-	i := i + 1
-ENDFOR
-	</operation>
-	<instruction name='xsavec64' form='MEMmxsave'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_xsaves64'>
-	<CPUID>XSAVE</CPUID>
-	<CPUID>XSS</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='void *'/>
-	<parameter varname='save_mask' type='unsigned __int64'/>
-	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsaves differs from xsave in that it can save state components corresponding to bits set in IA32_XSS MSR and that it may use the modified optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
-	<operation>
-mask[62:0] := save_mask[62:0] BITWISE AND XCR0[62:0]
-FOR i := 0 to 62
-	IF mask[i]
-		CASE (i) OF
-		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87 FPU]
-		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
-		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
-		ESAC
-		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
-	FI
-	i := i + 1
-ENDFOR
-	</operation>
-	<instruction name='xsavec64' form='MEMmxsave'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_xrstors'>
-	<CPUID>XSAVE</CPUID>
-	<CPUID>XSS</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='const void *'/>
-	<parameter varname='rs_mask' type='unsigned __int64'/>
-	<description>Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". xrstors differs from xrstor in that it can restore state components corresponding to bits set in the IA32_XSS MSR; xrstors cannot restore from an xsave area in which the extended region is in the standard form. State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary.</description>
-	<operation>
-st_mask = mem_addr.HEADER.XSTATE_BV[62:0]
-FOR i := 0 to 62
-	IF (rs_mask[i] AND XCR0[i])
-		IF st_mask[i]
-			CASE (i) OF
-			0: ProcessorState[x87 FPU] := mem_addr.FPUSSESave_Area[FPU]
-			1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE]
-			DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i]
-			ESAC
-		ELSE
-			// ProcessorExtendedState := Processor Supplied Values
-			CASE (i) OF
-			1: MXCSR := mem_addr.FPUSSESave_Area[SSE]
-			ESAC
-		FI
-	FI
-	i := i + 1
-ENDFOR
-	</operation>
-	<instruction name='xrstors' form='MEMmxsave'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='void' name='_xrstors64'>
-	<CPUID>XSAVE</CPUID>
-	<CPUID>XSS</CPUID>
-	<category>OS-Targeted</category>
-	<parameter varname='mem_addr' type='const void *'/>
-	<parameter varname='rs_mask' type='unsigned __int64'/>
-	<description>Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". xrstors differs from xrstor in that it can restore state components corresponding to bits set in the IA32_XSS MSR; xrstors cannot restore from an xsave area in which the extended region is in the standard form. State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary.</description>
-	<operation>
-st_mask = mem_addr.HEADER.XSTATE_BV[62:0]
-FOR i := 0 to 62
-	IF (rs_mask[i] AND XCR0[i])
-		IF st_mask[i]
-			CASE (i) OF
-			0: ProcessorState[x87 FPU] := mem_addr.FPUSSESave_Area[FPU]
-			1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE]
-			DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i]
-			ESAC
-		ELSE
-			// ProcessorExtendedState := Processor Supplied Values
-			CASE (i) OF
-			1: MXCSR := mem_addr.FPUSSESave_Area[SSE]
-			ESAC
-		FI
-	FI
-	i := i + 1
-ENDFOR
-	</operation>
-	<instruction name='xrstors64' form='MEMmxsave'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX' vexEq='TRUE' rettype='float' name='_mm256_cvtss_f32'>
-	<type>Floating Point</type>
-	<CPUID>AVX</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m256' />
-	<description>Copy the lower single-precision (32-bit) floating-point element of "a" to "dst".</description>
-	<operation>dst[31:0] := a[31:0]</operation>
-	<instruction name='movss' form='m32, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' vexEq='TRUE' rettype='double' name='_mm256_cvtsd_f64'>
-	<type>Floating Point</type>
-	<CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Copy the lower double-precision (64-bit) floating-point element of "a" to "dst".</description>
-	<operation>dst[63:0] := a[63:0]</operation>
-	<instruction name='movsd' form='m64, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX2' vexEq='TRUE' rettype='int' name='_mm256_cvtsi256_si32'>
-	<type>Integer</type>
-	<CPUID>AVX2</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m256i'/>
-	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
-	<operation>
-dst[31:0] := a[31:0]
-	</operation>
-	<instruction name='movd' form='r32, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-
-
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kandn">
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<parameter varname="b" type="__mmask16"/>
-	<description>Compute the bitwise NOT of 16-bit masks "a" and then AND with "b", and store the result in "k".</description>
-	<operation>
-k[15:0] := (NOT a[15:0]) AND b[15:0]
-k[MAX:16] := 0
-	</operation>
-	<instruction name='kandnw' form='k, k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kandn">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<parameter varname="b" type="__mmask16"/>
-	<description>Compute the bitwise NOT of 16-bit masks "a" and then AND with "b", and store the result in "k".</description>
-	<operation>
-k[15:0] := (NOT a[15:0]) AND b[15:0]
-k[MAX:16] := 0
-	</operation>
-	<instruction name='kandn' form='k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kand">
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<parameter varname="b" type="__mmask16"/>
-	<description>Compute the bitwise AND of 16-bit masks "a" and "b", and store the result in "k".</description>
-	<operation>
-k[15:0] := a[15:0] AND b[15:0]
-k[MAX:16] := 0
-	</operation>
-	<instruction name='kandw' form='k, k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kand">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<parameter varname="b" type="__mmask16"/>
-	<description>Compute the bitwise AND of 16-bit masks "a" and "b", and store the result in "k".</description>
-	<operation>
-k[15:0] := a[15:0] AND b[15:0]
-k[MAX:16] := 0
-	</operation>
-	<instruction name='kand' form='k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kmov">
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<description>Copy 16-bit mask "a" to "k".</description>
-	<operation>
-k[15:0] := a[15:0]
-k[MAX:16] := 0
-	</operation>
-	<instruction name='kmovw' form='k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kmov">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<description>Copy 16-bit mask "a" to "k".</description>
-	<operation>
-k[15:0] := a[15:0]
-k[MAX:16] := 0
-	</operation>
-	<instruction name='kmov' form='k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_knot">
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<description>Compute the bitwise NOT of 16-bit mask "a", and store the result in "k".</description>
-	<operation>
-k[15:0] := NOT a[15:0]
-k[MAX:16] := 0
-	</operation>
-	<instruction name='knotw' form='k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_knot">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<description>Compute the bitwise NOT of 16-bit mask "a", and store the result in "k".</description>
-	<operation>
-k[15:0] := NOT a[15:0]
-k[MAX:16] := 0
-	</operation>
-	<instruction name='knot' form='k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kor">
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<parameter varname="b" type="__mmask16"/>
-	<description>Compute the bitwise OR of 16-bit masks "a" and "b", and store the result in "k".</description>
-	<operation>
-k[15:0] := a[15:0] OR b[15:0]
-k[MAX:16] := 0
-	</operation>
-	<instruction name='korw' form='k, k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kor">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<parameter varname="b" type="__mmask16"/>
-	<description>Compute the bitwise OR of 16-bit masks "a" and "b", and store the result in "k".</description>
-	<operation>
-k[15:0] := a[15:0] OR b[15:0]
-k[MAX:16] := 0
-	</operation>
-	<instruction name='kor' form='k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kunpackb">
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<parameter varname="b" type="__mmask16"/>
-	<description>Unpack and interleave 8 bits from masks "a" and "b", and store the 16-bit result in "k".</description>
-	<operation>
-k[7:0] := b[7:0]
-k[15:8] := a[7:0]
-k[MAX:16] := 0
-	</operation>
-	<instruction name='kunpckbw' form='k, k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kxnor">
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<parameter varname="b" type="__mmask16"/>
-	<description>Compute the bitwise XNOR of 16-bit masks "a" and "b", and store the result in "k".</description>
-	<operation>
-k[15:0] := NOT (a[15:0] XOR b[15:0])
-k[MAX:16] := 0
-	</operation>
-	<instruction name='kxnorw' form='k, k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kxnor">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<parameter varname="b" type="__mmask16"/>
-	<description>Compute the bitwise XNOR of 16-bit masks "a" and "b", and store the result in "k".</description>
-	<operation>
-k[15:0] := NOT (a[15:0] XOR b[15:0])
-k[MAX:16] := 0
-	</operation>
-	<instruction name='kxnor' form='k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_kxor">
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<parameter varname="b" type="__mmask16"/>
-	<description>Compute the bitwise XOR of 16-bit masks "a" and "b", and store the result in "k".</description>
-	<operation>
-k[15:0] := a[15:0] XOR b[15:0]
-k[MAX:16] := 0
-	</operation>
-	<instruction name='kxorw' form='k, k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kxor">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__mmask16"/>
-	<parameter varname="b" type="__mmask16"/>
-	<description>Compute the bitwise XOR of 16-bit masks "a" and "b", and store the result in "k".</description>
-	<operation>
-k[15:0] := a[15:0] XOR b[15:0]
-k[MAX:16] := 0
-	</operation>
-	<instruction name='kxor' form='k, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_add_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i] + b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vaddpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_add_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i] + b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vaddpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_add_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vaddpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_add_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vaddpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_add_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vaddpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_add_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vaddpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_add_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] + b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vaddps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_add_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] + b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vaddps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_add_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vaddps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_add_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vaddps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_add_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vaddps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_add_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vaddps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_add_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := a[63:0] + b[63:0]
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vaddsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_add_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-		[round_note]
-		</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] + b[63:0]
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vaddsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_add_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". </description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] + b[63:0]
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vaddsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_add_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-		[round_note]
-		</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] + b[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vaddsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_add_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] + b[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vaddsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_add_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-		[round_note]
-		</description>
-	<operation>
-dst[31:0] := a[31:0] + b[31:0]
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vaddss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_add_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-		[round_note]
-		</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] + b[31:0]
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vaddss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_add_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". </description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] + b[31:0]
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vaddss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_add_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] + b[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vaddss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_add_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] + b[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vaddss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_alignr_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 64 bytes (16 elements) in "dst".</description>
-	<operation>
-temp[1023:512] := a[511:0]
-temp[511:0] := b[511:0]
-temp[1023:0] := temp[1023:0] &gt;&gt; (32*count)
-dst[511:0] := temp[511:0]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='valignd' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_alignr_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 64 bytes (16 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-temp[1023:512] := a[511:0]
-temp[511:0] := b[511:0]
-temp[1023:0] := temp[1023:0] &gt;&gt; (32*count)
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := temp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='valignd' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_alignr_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "count" 32-bit elements, and stores the low 64 bytes (16 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-temp[1023:512] := a[511:0]
-temp[511:0] := b[511:0]
-temp[1023:0] := temp[1023:0] &gt;&gt; (32*count)
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := temp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='valignd' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_alignr_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 64 bytes (8 elements) in "dst".</description>
-	<operation>
-temp[1023:512] := a[511:0]
-temp[511:0] := b[511:0]
-temp[1023:0] := temp[1023:0] &gt;&gt; (64*count)
-dst[511:0] := temp[511:0]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='valignq' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_alignr_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 64 bytes (8 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-temp[1023:512] := a[511:0]
-temp[511:0] := b[511:0]
-temp[1023:0] := temp[1023:0] &gt;&gt; (64*count)
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := temp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='valignq' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_alignr_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "count" 64-bit elements, and stores the low 64 bytes (8 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-temp[1023:512] := a[511:0]
-temp[511:0] := b[511:0]
-temp[1023:0] := temp[1023:0] &gt;&gt; (64*count)
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := temp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='valignq' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_blend_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := b[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vblendmpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_blend_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vblendmps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_broadcast_f32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 4)*32
-	dst[i+31:i] := a[n+31:n]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcastf32x4' form='zmm {k}, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_broadcast_f32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 4)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcastf32x4' form='zmm {k}, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_broadcast_f32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 4)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcastf32x4' form='zmm {k}, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_broadcast_f64x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m256d"/>
-	<description>Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	n := (j mod 4)*64
-	dst[i+63:i] := a[n+63:n]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcastf64x4' form='zmm {k}, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_broadcast_f64x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	n := (j mod 4)*64
-	IF k[j]
-		dst[i+63:i] := a[n+63:n]
-	ELSE
-		dst[i+63:i] := src[n+63:n]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcastf64x4' form='zmm {k}, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_broadcast_f64x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	n := (j mod 4)*64
-	IF k[j]
-		dst[i+63:i] := a[n+63:n]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcastf64x4' form='zmm {k}, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcast_i32x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 4)*32
-	dst[i+31:i] := a[n+31:n]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcasti32x4' form='zmm {k}, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcast_i32x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 4)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := src[n+31:n]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcasti32x4' form='zmm {k}, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcast_i32x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 4)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcasti32x4' form='zmm {k}, m128'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcast_i64x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	n := (j mod 4)*64
-	dst[i+63:i] := a[n+63:n]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcasti64x4' form='zmm {k}, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcast_i64x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	n := (j mod 4)*64
-	IF k[j]
-		dst[i+63:i] := a[n+63:n]
-	ELSE
-		dst[i+63:i] := src[n+63:n]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcasti64x4' form='zmm {k}, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcast_i64x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	n := (j mod 4)*64
-	IF k[j]
-		dst[i+63:i] := a[n+63:n]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcasti64x4' form='zmm {k}, m256'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_broadcastsd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcastsd' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_broadcastsd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcastsd' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_broadcastsd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcastsd' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_broadcastss_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcastss' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_broadcastss_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcastss' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_broadcastss_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vbroadcastss' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmp_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmp_round_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="sae" type="const int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmpeq_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := (a[i+63:i] == b[i+63:i]) ? 1 : 0
-ENDFOR	
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmple_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := (a[i+63:i] &lt;= b[i+63:i]) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmplt_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := (a[i+63:i] &lt; b[i+63:i]) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmpneq_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := (a[i+63:i] != b[i+63:i]) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmpnle_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := !(a[i+63:i] &lt;= b[i+63:i]) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmpnlt_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := !(a[i+63:i] &lt; b[i+63:i]) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmpord_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 1 : 0 
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_cmpunord_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 1 : 0 
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmp_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmp_round_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="sae" type="const int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmpeq_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := (a[i+63:i] == b[i+63:i]) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR	
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmple_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := (a[i+63:i] &lt;= b[i+63:i]) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmplt_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := (a[i+63:i] &lt; b[i+63:i]) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmpneq_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := (a[i+63:i] != b[i+63:i]) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmpnle_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := !(a[i+63:i] &lt;= b[i+63:i]) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmpnlt_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := !(a[i+63:i] &lt; b[i+63:i]) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmpord_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask8" name="_mm512_mask_cmpunord_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vcmppd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmp_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmp_round_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="sae" type="const int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpeq_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := (a[i+31:i] == b[i+31:i]) ? 1 : 0
-ENDFOR	
-k[MAX:16] := 0		
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmple_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := (a[i+31:i] &lt;= b[i+31:i]) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmplt_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := (a[i+31:i] &lt; b[i+31:i]) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpneq_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := (a[i+31:i] != b[i+31:i]) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpnle_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := !(a[i+31:i] &lt;= b[i+31:i]) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpnlt_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := !(a[i+31:i] &lt; b[i+31:i]) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpord_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := (a[i+31:i] != NaN AND b[i+31:i] != NaN) ? 1 : 0 
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpunord_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := (a[i+31:i] == NaN OR b[i+31:i] == NaN) ? 1 : 0 
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmp_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmp_round_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="sae" type="const int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpeq_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>y
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := (a[i+31:i] == b[i+31:i]) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR		
-k[MAX:16] := 0	
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmple_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := (a[i+31:i] &lt;= b[i+31:i]) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmplt_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := (a[i+31:i] &lt; b[i+31:i]) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpneq_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := (a[i+31:i] != b[i+31:i]) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpnle_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := !(a[i+31:i] &lt;= b[i+31:i]) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpnlt_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := !(a[i+31:i] &lt; b[i+31:i]) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpord_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := (a[i+31:i] != NaN AND b[i+31:i] != NaN) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpunord_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := (a[i+31:i] == NaN OR b[i+31:i] == NaN) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vcmpps' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_round_sd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="sae" type="const int"/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-
-k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0
-
-k[MAX:1] := 0
-	</operation>
-	<instruction name='vcmpsd' form='k {k}, xmm, xmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_sd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-
-k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0
-
-k[MAX:1] := 0
-	</operation>
-	<instruction name='vcmpsd' form='k {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_round_sd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="sae" type="const int"/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set).
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-
-IF k1[0]
-	k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0
-ELSE
-	k[0] := 0
-FI
-k[MAX:1] := 0
-	</operation>
-	<instruction name='vcmpsd' form='k {k}, xmm, xmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_sd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). </description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-
-IF k1[0]
-	k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0
-ELSE
-	k[0] := 0
-FI
-k[MAX:1] := 0
-	</operation>
-	<instruction name='vcmpsd' form='k {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_round_ss_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="sae" type="const int"/>
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-
-k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0
-
-k[MAX:1] := 0
-	</operation>
-	<instruction name='vcmpss' form='k {k}, xmm, xmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_ss_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-
-k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0
-
-k[MAX:1] := 0
-	</operation>
-	<instruction name='vcmpss' form='k {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_round_ss_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="sae" type="const int"/>
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-
-IF k1[0]
-	k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0
-ELSE
-	k[0] := 0
-FI
-k[MAX:1] := 0
-	</operation>
-	<instruction name='vcmpss' form='k {k}, xmm, xmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_ss_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). </description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-
-IF k1[0]
-	k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0
-ELSE
-	k[0] := 0
-FI
-k[MAX:1] := 0
-	</operation>
-	<instruction name='vcmpss' form='k {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_comi_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="sae" type="const int"/>
-	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1).
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-
-RETURN ( a[63:0] OP b[63:0] ) ? 1 : 0
-	</operation>
-	<instruction name='vcomisd' form='xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_comi_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="sae" type="const int"/>
-	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1).
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-
-RETURN ( a[31:0] OP b[31:0] ) ? 1 : 0
-	</operation>
-	<instruction name='vcomiss' form='xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_compress_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
-	<operation>
-size := 64
-m := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-dst[511:m] := src[511:m]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcompresspd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_compressstoreu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<category>Swizzle</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-size := 64
-m := base_addr
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		MEM[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vcompresspd' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_compress_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
-	<operation>
-size := 64
-m := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-dst[511:m] := 0
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcompresspd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_compress_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
-	<operation>
-size := 32
-m := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-dst[511:m] := src[511:m]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcompressps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_compressstoreu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<category>Swizzle</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-size := 32
-m := base_addr
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		MEM[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vcompressps' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_compress_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
-	<operation>
-size := 32
-m := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-dst[511:m] := 0
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcompressps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvtepi32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	m := j*64
-	dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtdq2pd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvtepi32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	m := j*64
-	IF k[j]
-		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
-	ELSE
-		dst[m+63:m] := src[m+63:m]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtdq2pd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvtepi32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	m := j*64
-	IF k[j]
-		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
-	ELSE
-		dst[m+63:m] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtdq2pd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_cvt_roundepi32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtdq2ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_cvtepi32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtdq2ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_cvt_roundepi32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtdq2ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_cvtepi32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtdq2ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_cvt_roundepi32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtdq2ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_cvtepi32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtdq2ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvt_roundpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2dq' form='ymm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2dq' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvt_roundpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2dq' form='ymm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2dq' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvt_roundpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2dq' form='ymm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2dq' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_cvt_roundpd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2ps' form='ymm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_cvtpd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2ps' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_cvt_roundpd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2ps' form='ymm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_cvtpd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2ps' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_cvt_roundpd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2ps' form='ymm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_cvtpd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2ps' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvt_roundpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[k+63:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2udq' form='ymm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[k+63:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2udq' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvt_roundpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2udq' form='ymm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2udq' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvt_roundpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2udq' form='ymm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtpd2udq' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_cvt_roundph_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	m := j*16
-	dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtph2ps' form='zmm {k}, ymm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_cvtph_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	m := j*16
-	dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtph2ps' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_cvt_roundph_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	m := j*16
-	IF k[j]
-		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtph2ps' form='zmm {k}, ymm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_cvtph_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	m := j*16
-	IF k[j]
-		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtph2ps' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_cvt_roundph_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	m := j*16
-	IF k[j]
-		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtph2ps' form='zmm {k}, ymm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_cvtph_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	m := j*16
-	IF k[j]
-		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtph2ps' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvt_roundps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2dq' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2dq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvt_roundps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2dq' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2dq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvt_roundps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2dq' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2dq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvt_roundps_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 32*j
-	dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2pd' form='zmm {k}, ymm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvtps_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 32*j
-	dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2pd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvt_roundps_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2pd' form='zmm {k}, ymm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvtps_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2pd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvt_roundps_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2pd' form='zmm {k}, ymm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvtps_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2pd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvt_roundps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 32*j
-	dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtps2ph' form='ymm {k}, zmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 32*j
-	dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtps2ph' form='ymm {k}, zmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvt_roundps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 32*j
-	IF k[j]
-		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtps2ph' form='ymm {k}, zmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 32*j
-	IF k[j]
-		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtps2ph' form='ymm {k}, zmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvt_roundps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 32*j
-	IF k[j]
-		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtps2ph' form='ymm {k}, zmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 32*j
-	IF k[j]
-		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvtps2ph' form='ymm {k}, zmm {sae}, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvt_roundps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2udq' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2udq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvt_roundps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2udq' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2udq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvt_roundps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2udq' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtps2udq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_cvt_roundsd_i32">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_Int32(a[63:0])
-	</operation>
-	<instruction name='vcvtsd2si' form='r32, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvt_roundsd_i64">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_Int64(a[63:0])
-	</operation>
-	<instruction name='vcvtsd2si' form='r64, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_cvt_roundsd_si32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_Int32(a[63:0])
-	</operation>
-	<instruction name='vcvtsd2si' form='r32, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvt_roundsd_si64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_Int64(a[63:0])
-	</operation>
-	<instruction name='vcvtsd2si' form='r64, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_cvtsd_i32">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_Int32(a[63:0])
-	</operation>
-	<instruction name='vcvtsd2si' form='r32, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvtsd_i64">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_Int64(a[63:0])
-	</operation>
-	<instruction name='vcvtsd2si' form='r64, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundsd_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_FP32(b[63:0])
-dst[127:32] := a[127:31]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vcvtsd2ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvt_roundsd_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := Convert_FP64_To_FP32(b[63:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:31]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vcvtsd2ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvtsd_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := Convert_FP64_To_FP32(b[63:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:31]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vcvtsd2ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvt_roundsd_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := Convert_FP64_To_FP32(b[63:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:31]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vcvtsd2ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvtsd_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := Convert_FP64_To_FP32(b[63:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:31]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vcvtsd2ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvt_roundsd_u32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_UnsignedInt32(a[63:0])
-	</operation>
-	<instruction name='vcvtsd2usi' form='r32, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvt_roundsd_u64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_UnsignedInt64(a[63:0])
-	</operation>
-	<instruction name='vcvtsd2usi' form='r64, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvtsd_u32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_UnsignedInt32(a[63:0])
-	</operation>
-	<instruction name='vcvtsd2usi' form='r32, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvtsd_u64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_UnsignedInt64(a[63:0])
-	</operation>
-	<instruction name='vcvtsd2usi' form='r64, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvt_roundi64_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__int64"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_Int64_To_FP64(b[63:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtsi2sd' form='xmm, xmm, r64 {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvt_roundsi64_sd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__int64"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_Int64_To_FP64(b[63:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtsi2sd' form='xmm, xmm, r64 {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvti32_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="int"/>
-	<description>Convert the 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
-	<operation>
-dst[63:0] := Convert_Int32_To_FP64(b[31:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtsi2sd' form='xmm, xmm, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvti64_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__int64"/>
-	<description>Convert the 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </description>
-	<operation>
-dst[63:0] := Convert_Int64_To_FP64(b[63:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtsi2sd' form='xmm, xmm, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundi32_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_Int32_To_FP32(b[31:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtsi2ss' form='xmm, xmm, r32 {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundi64_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__int64"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_Int64_To_FP32(b[63:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtsi2ss' form='xmm, xmm, r64 {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundsi32_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_Int32_To_FP32(b[31:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtsi2ss' form='xmm, xmm, r32 {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundsi64_ss">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__int64"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_Int64_To_FP32(b[63:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtsi2ss' form='xmm, xmm, r64 {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvti32_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="int"/>
-	<description>Convert the 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := Convert_Int32_To_FP32(b[31:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtsi2ss' form='xmm, xmm, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvti64_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__int64"/>
-	<description>Convert the 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := Convert_Int64_To_FP32(b[63:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtsi2ss' form='xmm, xmm, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvt_roundss_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_FP32_To_FP64(b[31:0])
-dst[127:64] := a[127:64]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vcvtss2sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_cvt_roundss_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[63:0] := Convert_FP32_To_FP64(b[31:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vcvtss2sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_cvtss_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	</description>
-	<operation>
-IF k[0]
-	dst[63:0] := Convert_FP32_To_FP64(b[31:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vcvtss2sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_cvt_roundss_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". 
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[63:0] := Convert_FP32_To_FP64(b[31:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vcvtss2sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_cvtss_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := Convert_FP32_To_FP64(b[31:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vcvtss2sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_cvt_roundss_i32">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_Int32(a[31:0])
-	</operation>
-	<instruction name='vcvtss2si' form='r32, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvt_roundss_i64">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_FP32_To_Int64(a[31:0])
-	</operation>
-	<instruction name='vcvtss2si' form='r64, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_cvt_roundss_si32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_Int32(a[31:0])
-	</operation>
-	<instruction name='vcvtss2si' form='r32, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvt_roundss_si64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_FP32_To_Int64(a[31:0])
-	</operation>
-	<instruction name='vcvtss2si' form='r64, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_cvtss_i32">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_Int32(a[31:0])
-	</operation>
-	<instruction name='vcvtss2si' form='r32, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvtss_i64">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP32_To_Int64(a[31:0])
-	</operation>
-	<instruction name='vcvtss2si' form='r64, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvt_roundss_u32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_UnsignedInt32(a[31:0])
-	</operation>
-	<instruction name='vcvtss2usi' form='r32, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvt_roundss_u64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_FP32_To_UnsignedInt64(a[31:0])
-	</operation>
-	<instruction name='vcvtss2usi' form='r64, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvtss_u32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_UnsignedInt32(a[31:0])
-	</operation>
-	<instruction name='vcvtss2usi' form='r32, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvtss_u64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP32_To_UnsignedInt64(a[31:0])
-	</operation>
-	<instruction name='vcvtss2usi' form='r64, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtt_roundpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*i
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_IntegerTruncate(a[k+63:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvttpd2dq' form='ymm {k}, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvttpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvttpd2dq' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtt_roundpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*i
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_IntegerTruncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvttpd2dq' form='ymm {k}, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvttpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvttpd2dq' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtt_roundpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*i
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_IntegerTruncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvttpd2dq' form='ymm {k}, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvttpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvttpd2dq' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtt_roundpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*i
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_UnsignedIntegerTruncate(a[k+63:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvttpd2udq' form='ymm {k}, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvttpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[k+63:k])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvttpd2udq' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtt_roundpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*i
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedIntegerTruncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvttpd2udq' form='ymm {k}, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvttpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvttpd2udq' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtt_roundpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*i
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedIntegerTruncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvttpd2udq' form='ymm {k}, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvttpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vcvttpd2udq' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtt_roundps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*i
-	dst[i+31:i] := Convert_FP32_To_IntegerTruncate(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvttps2dq' form='zmm {k}, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvttps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvttps2dq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtt_roundps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions. </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*i
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_IntegerTruncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvttps2dq' form='zmm {k}, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvttps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvttps2dq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtt_roundps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*i
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_IntegerTruncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvttps2dq' form='zmm {k}, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvttps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvttps2dq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtt_roundps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*i
-	dst[i+31:i] := Convert_FP32_To_UnsignedIntegerTruncate(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvttps2udq' form='zmm {k}, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvttps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_UnsignedInt32_Truncate(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvttps2udq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtt_roundps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*i
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_UnsignedIntegerTruncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvttps2udq' form='zmm {k}, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvttps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvttps2udq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtt_roundps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*i
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_UnsignedIntegerTruncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvttps2udq' form='zmm {k}, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvttps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvttps2udq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_cvtt_roundsd_i32">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0])
-	</operation>
-	<instruction name='vcvttsd2si' form='r32, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvtt_roundsd_i64">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
-	</operation>
-	<instruction name='vcvttsd2si' form='r64, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_cvtt_roundsd_si32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0])
-	</operation>
-	<instruction name='vcvttsd2si' form='r32, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvtt_roundsd_si64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
-	</operation>
-	<instruction name='vcvttsd2si' form='r64, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_cvttsd_i32">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0])
-	</operation>
-	<instruction name='vcvttsd2si' form='r32, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvttsd_i64">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
-	</operation>
-	<instruction name='vcvttsd2si' form='r64, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvtt_roundsd_u32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_UnsignedInt32_Truncate(a[63:0])
-	</operation>
-	<instruction name='vcvttsd2usi' form='r32, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvtt_roundsd_u64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_UnsignedInt64_Truncate(a[63:0])
-	</operation>
-	<instruction name='vcvttsd2usi' form='r64, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvttsd_u32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP64_To_UnsignedInt32_Truncate(a[63:0])
-	</operation>
-	<instruction name='vcvttsd2usi' form='r32, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvttsd_u64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP64_To_UnsignedInt64_Truncate(a[63:0])
-	</operation>
-	<instruction name='vcvttsd2usi' form='r64, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_cvtt_roundss_i32">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
-	</operation>
-	<instruction name='vcvttss2si' form='r32, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvtt_roundss_i64">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0])
-	</operation>
-	<instruction name='vcvttss2si' form='r64, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_cvtt_roundss_si32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
-	</operation>
-	<instruction name='vcvttss2si' form='r32, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvtt_roundss_si64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0])
-	</operation>
-	<instruction name='vcvttss2si' form='r64, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm_cvttss_i32">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
-	</operation>
-	<instruction name='vcvttss2si' form='r32, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__int64" name="_mm_cvttss_i64">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0])
-	</operation>
-	<instruction name='vcvttss2si' form='r64, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvtt_roundss_u32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_UnsignedInt32_Truncate(a[31:0])
-	</operation>
-	<instruction name='vcvttss2usi' form='r32, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvtt_roundss_u64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_FP32_To_UnsignedInt64_Truncate(a[31:0])
-	</operation>
-	<instruction name='vcvttss2usi' form='r64, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned int" name="_mm_cvttss_u32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[31:0] := Convert_FP32_To_UnsignedInt32_Truncate(a[31:0])
-	</operation>
-	<instruction name='vcvttss2usi' form='r32, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="unsigned __int64" name="_mm_cvttss_u64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst".</description>
-	<operation>
-dst[63:0] := Convert_FP32_To_UnsignedInt64_Truncate(a[31:0])
-	</operation>
-	<instruction name='vcvttss2usi' form='r64, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvtepu32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtudq2pd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvtepu32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtudq2pd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvtepu32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtudq2pd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_cvt_roundepu32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := ConvertUnsignedInt32_To_FP32(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtudq2ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_cvtepu32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := ConvertUnsignedInt32_To_FP32(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtudq2ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_cvt_roundepu32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ConvertUnsignedInt32_To_FP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtudq2ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_cvtepu32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ConvertUnsignedInt32_To_FP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtudq2ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_cvt_roundepu32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := ConvertUnsignedInt32_To_FP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtudq2ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_cvtepu32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := ConvertUnsignedInt32_To_FP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vcvtudq2ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvt_roundu64_sd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="unsigned __int64"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the unsigned 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := Convert_UnsignedInt64_To_FP64(b[63:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtusi2sd' form='xmm, xmm, r64 {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvtu32_sd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="unsigned int"/>
-	<description>Convert the unsigned 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := Convert_UnsignedInt32_To_FP64(b[31:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtusi2sd' form='xmm, xmm, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvtu64_sd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="unsigned __int64"/>
-	<description>Convert the unsigned 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-dst[63:0] := Convert_UnsignedInt64_To_FP64(b[63:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtusi2sd' form='xmm, xmm, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundu32_ss">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="unsigned int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the unsigned 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_UnsignedInt32_To_FP32(b[31:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtusi2ss' form='xmm, xmm, r32 {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvt_roundu64_ss">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="unsigned __int64"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the unsigned 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := Convert_UnsignedInt64_To_FP32(b[63:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtusi2ss' form='xmm, xmm, r64 {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvtu32_ss">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="unsigned int"/>
-	<description>Convert the unsigned 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := Convert_UnsignedInt32_To_FP32(b[31:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtusi2ss' form='xmm, xmm, r32 {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvtu64_ss">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="unsigned __int64"/>
-	<description>Convert the unsigned 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-dst[31:0] := Convert_UnsignedInt64_To_FP32(b[63:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vcvtusi2ss' form='xmm, xmm, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_div_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	dst[i+63:i] := a[i+63:i] / b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vdivpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_div_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", =and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	dst[i+63:i] := a[i+63:i] / b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vdivpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_div_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] / b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vdivpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_div_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>
-	Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] / b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vdivpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_div_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] / b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vdivpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_div_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] / b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vdivpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_div_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := a[i+31:i] / b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vdivps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_div_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := a[i+31:i] / b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vdivps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_div_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] / b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vdivps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_div_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>
-	Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] / b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vdivps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_div_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] / b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vdivps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_div_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] / b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vdivps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_div_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-		[round_note]
-		</description>
-	<operation>
-dst[63:0] := a[63:0] / b[63:0]
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vdivsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_div_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". 
-		[round_note]
-		</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] / b[63:0]
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vdivsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_div_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". </description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] / b[63:0]
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vdivsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_div_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-		[round_note]
-		</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] / b[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vdivsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_div_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] / b[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vdivsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_div_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-		[round_note]
-		</description>
-	<operation>
-dst[31:0] := a[31:0] / b[31:0]
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vdivss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_div_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-		[round_note]
-		</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] / b[31:0]
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vdivss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_div_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". </description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] / b[31:0]
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vdivss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_div_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-		[round_note]
-		</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] / b[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vdivss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_div_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] / b[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vdivss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_expand_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[m+63:m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexpandpd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_expandloadu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexpandpd' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_expand_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[m+63:m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexpandpd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_expandloadu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexpandpd' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_expand_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[m+31:m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexpandps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_expandloadu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexpandps' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_expand_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[m+31:m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexpandps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_expandloadu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexpandps' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm512_extractf32x4_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-2: dst[127:0] := a[383:256]
-3: dst[127:0] := a[511:384]
-ESAC
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vextractf32x4' form='xmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm512_mask_extractf32x4_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[127:0] := a[127:0]
-1: tmp[127:0] := a[255:128]
-2: tmp[127:0] := a[383:256]
-3: tmp[127:0] := a[511:384]
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vextractf32x4' form='xmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm512_maskz_extractf32x4_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[127:0] := a[127:0]
-1: tmp[127:0] := a[255:128]
-2: tmp[127:0] := a[383:256]
-3: tmp[127:0] := a[511:384]
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vextractf32x4' form='xmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm512_extractf64x4_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[255:0] := a[255:0]
-1: dst[255:0] := a[511:256]
-ESAC
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vextractf64x4' form='ymm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm512_mask_extractf64x4_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[255:0] := a[255:0]
-1: tmp[255:0] := a[511:256]
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vextractf64x4' form='ymm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm512_maskz_extractf64x4_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[255:0] := a[255:0]
-1: tmp[255:0] := a[511:256]
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vextractf64x4' form='ymm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_extracti32x4_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-2: dst[127:0] := a[383:256]
-3: dst[127:0] := a[511:384]
-ESAC
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vextracti32x4' form='xmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_extracti32x4_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-2: dst[127:0] := a[383:256]
-3: dst[127:0] := a[511:384]
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vextracti32x4' form='xmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_extracti32x4_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-2: dst[127:0] := a[383:256]
-3: dst[127:0] := a[511:384]
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vextracti32x4' form='xmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_extracti64x4_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[255:0] := a[255:0]
-1: dst[255:0] := a[511:256]
-ESAC
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vextracti64x4' form='ymm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_extracti64x4_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[255:0] := a[255:0]
-1: dst[255:0] := a[511:256]
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vextracti64x4' form='ymm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_extracti64x4_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[255:0] := a[255:0]
-1: dst[255:0] := a[511:256]
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vextracti64x4' form='ymm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_fixupimm_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfixupimmpd' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_fixupimm_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.
-	[round_note]
-	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfixupimmpd' form='zmm {k}, zmm, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_fixupimm_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfixupimmpd' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_fixupimm_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.
-	[round_note]
-	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfixupimmpd' form='zmm {k}, zmm, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fixupimm_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfixupimmpd' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fixupimm_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.
-	[round_note]
-	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfixupimmpd' form='zmm {k}, zmm, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_fixupimm_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfixupimmps' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_fixupimm_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.
-	[round_note]
-	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfixupimmps' form='zmm {k}, zmm, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_fixupimm_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfixupimmps' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_fixupimm_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.
-	[round_note]
-	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfixupimmps' form='zmm {k}, zmm, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fixupimm_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfixupimmps' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fixupimm_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.
-	[round_note]
-	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfixupimmps' form='zmm {k}, zmm, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fixupimm_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.
-	[round_note]
-	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfixupimmsd' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fixupimm_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfixupimmsd' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fixupimm_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.
-	[round_note]
-	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-IF k[0]
-	dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
-ELSE
-	dst[63:0] := a[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfixupimmsd' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fixupimm_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-IF k[0]
-	dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
-ELSE
-	dst[63:0] := a[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfixupimmsd' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fixupimm_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.
-	[round_note]
-	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-IF k[0]
-	dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfixupimmsd' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fixupimm_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-IF k[0]
-	dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfixupimmsd' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fixupimm_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.
-	[round_note]
-	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfixupimmss' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fixupimm_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfixupimmss' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fixupimm_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.
-	[round_note]
-	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-IF k[0]
-	dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
-ELSE
-	dst[31:0] := a[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfixupimmss' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fixupimm_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-IF k[0]
-	dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
-ELSE
-	dst[31:0] := a[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfixupimmss' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fixupimm_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.
-	[round_note]
-	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-IF k[0]
-	dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfixupimmss' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fixupimm_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-IF k[0]
-	dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfixupimmss' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fmadd_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fmadd_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE 
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fmadd_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmadd_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fmadd_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fmadd_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fmadd_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmadd_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "a" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		a[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fmadd_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmadd_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
-ELSE
-	dst[63:0] := c[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmadd_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	</description>
-	<operation>
-IF k[0]
-	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
-ELSE
-	dst[63:0] := c[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmadd_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
-ELSE
-	dst[63:0] := a[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmadd_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	</description>
-	<operation>
-IF k[0]
-	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
-ELSE
-	dst[63:0] := a[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmadd_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmadd_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	</description>
-	<operation>
-IF k[0]
-	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmadd213sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmadd231sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmadd_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
-ELSE
-	dst[31:0] := c[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmadd_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
-ELSE
-	dst[31:0] := c[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fmadd_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmadd_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
-ELSE
-	dst[31:0] := a[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmadd_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
-ELSE
-	dst[31:0] := a[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmadd_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmadd_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmadd132ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmadd213ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmadd231ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_fmaddsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF (j is even) 
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_fmaddsub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF (j is even) 
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask3_fmaddsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask3_fmaddsub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		FI
-	ELSE 
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_fmaddsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_fmaddsub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmaddsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmaddsub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_fmaddsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF (j is even) 
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_fmaddsub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF (j is even) 
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask3_fmaddsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask3_fmaddsub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_fmaddsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_fmaddsub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmaddsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmaddsub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmaddsub132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmaddsub231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fmsub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fmsub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fmsub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmsub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fmsub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fmsub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fmsub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmsub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fmsub_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmsub_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
-ELSE
-	dst[63:0] := c[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmsub_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
-ELSE
-	dst[63:0] := c[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmsub_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
-ELSE
-	dst[63:0] := a[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmsub_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
-ELSE
-	dst[63:0] := a[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmsub_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". 
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmsub_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmsub213sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmsub231sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fmsub_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmsub_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
-ELSE
-	dst[31:0] := c[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmsub_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
-ELSE
-	dst[31:0] := c[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmsub_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
-ELSE
-	dst[31:0] := a[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmsub_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
-ELSE
-	dst[31:0] := a[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmsub_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmsub_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfmsub132ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmsub213ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfmsub231ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_fmsubadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF (j is even) 
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_fmsubadd_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF (j is even) 
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask3_fmsubadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask3_fmsubadd_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_fmsubadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_fmsubadd_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmsubadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fmsubadd_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_fmsubadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF (j is even) 
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_fmsubadd_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF (j is even) 
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask3_fmsubadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask3_fmsubadd_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_fmsubadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_fmsubadd_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmsubadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fmsubadd_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfmsubadd132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfmsubadd231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fnmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fnmadd_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".
-	 [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fnmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fnmadd_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fnmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fnmadd_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fnmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fnmadd_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fnmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". 
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	a[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fnmadd_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".  
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fnmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fnmadd_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fnmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fnmadd_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fnmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fnmadd_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmadd132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmadd231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fnmadd_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fnmadd_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
-ELSE
-	dst[63:0] := c[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fnmadd_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
-ELSE
-	dst[63:0] := c[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fnmadd_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
-ELSE
-	dst[63:0] := a[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fnmadd_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
-ELSE
-	dst[63:0] := a[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fnmadd_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fnmadd_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd213sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmadd231sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmadd132sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fnmadd_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fnmadd_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
-ELSE
-	dst[31:0] := c[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fnmadd_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
-ELSE
-	dst[31:0] := c[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fnmadd_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
-ELSE
-	dst[31:0] := a[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fnmadd_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
-ELSE
-	dst[31:0] := a[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fnmadd_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fnmadd_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmadd132ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmadd213ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmadd231ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fnmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_fnmsub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst".  
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fnmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask3_fnmsub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fnmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_fnmsub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fnmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_fnmsub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="c" type="__m512d"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub213pd' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub231pd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fnmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_fnmsub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fnmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask3_fnmsub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fnmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_fnmsub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fnmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_fnmsub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="c" type="__m512"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vfnmsub132ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub213ps' form='zmm {k}, zmm, zmm {er}'/>
-	<instruction name='vfnmsub231ps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fnmsub_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fnmsub_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
-ELSE
-	dst[63:0] := c[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fnmsub_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
-ELSE
-	dst[63:0] := c[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fnmsub_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
-ELSE
-	dst[63:0] := a[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fnmsub_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
-ELSE
-	dst[63:0] := a[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fnmsub_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fnmsub_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmsub213sd' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmsub231sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fnmsub_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", subtract the lower element in "c" from the negated intermediate result, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fnmsub_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", subtract the lower element in "c" from the negated intermediate result, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
-ELSE
-	dst[31:0] := c[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fnmsub_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
-	<operation>
-IF k[0]
-	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
-ELSE
-	dst[31:0] := c[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fnmsub_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
-ELSE
-	dst[31:0] := a[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fnmsub_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
-ELSE
-	dst[31:0] := a[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fnmsub_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm {er}'/>
-	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fnmsub_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vfnmsub132ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmsub213ss' form='xmm {k}, xmm, xmm'/>
-	<instruction name='vfnmsub231ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_i32gather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	m := j*32
-	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgatherdpd' form='zmm {k}, vm32y'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_i32gather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	m := j*32
-	IF k[j]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-k[MAX:8] := 0
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgatherdpd' form='zmm {k}, vm32y'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_i32gather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgatherdps' form='zmm {k}, vm32z'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_i32gather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>
-	Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-k[MAX:16] := 0
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgatherdps' form='zmm {k}, vm32z'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_i64gather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgatherqpd' form='zmm {k}, vm32z'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_i64gather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-k[MAX:8] := 0
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgatherqpd' form='zmm {k}, vm32z'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_i64gather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-		<category>Load</category>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>
-	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	m := j*64
-	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vgatherqps' form='ymm {k}, vm64z'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_i64gather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>
-	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	m := j*64
-	IF k[j]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-k[MAX:8] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vgatherqps' form='ymm {k}, vm64z'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_getexp_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetexppd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_getexp_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetexppd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_getexp_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetexppd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_getexp_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetexppd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_getexp_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetexppd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_getexp_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetexppd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_getexp_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetexpps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_getexp_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetexpps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_getexp_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetexpps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_getexp_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetexpps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_getexp_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetexpps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_getexp_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetexpps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_getexp_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := ConvertExpFP64(b[63:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetexpsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_getexp_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
-	<operation>
-dst[63:0] := ConvertExpFP64(b[63:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetexpsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_getexp_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[63:0] := ConvertExpFP64(b[63:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetexpsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_getexp_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
-	<operation>
-IF k[0]
-	dst[63:0] := ConvertExpFP64(b[63:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetexpsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_getexp_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[63:0] := ConvertExpFP64(b[63:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetexpsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_getexp_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
-	<operation>
-IF k[0]
-	dst[63:0] := ConvertExpFP64(b[63:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetexpsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_getexp_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := ConvertExpFP32(b[31:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetexpss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_getexp_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
-	<operation>
-dst[31:0] := ConvertExpFP32(b[31:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetexpss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_getexp_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := ConvertExpFP32(b[31:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetexpss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_getexp_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
-	<operation>
-IF k[0]
-	dst[31:0] := ConvertExpFP32(b[31:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetexpss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_getexp_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := ConvertExpFP32(b[31:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetexpss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_getexp_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
-	<operation>
-IF k[0]
-	dst[31:0] := ConvertExpFP32(b[31:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetexpss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_getmant_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetmantpd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_getmant_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note][round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetmantpd' form='zmm {k}, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_getmant_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetmantpd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_getmant_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note][round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetmantpd' form='zmm {k}, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_getmant_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetmantpd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_getmant_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note][round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetmantpd' form='zmm {k}, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_getmant_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetmantps' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_getmant_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note][round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetmantps' form='zmm {k}, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_getmant_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetmantps' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_getmant_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note][round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetmantps' form='zmm {k}, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_getmant_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetmantps' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_getmant_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note][round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vgetmantps' form='zmm {k}, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_getmant_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note][round_note]</description>
-	<operation>
-dst[63:0] := GetNormalizedMantissa(a[63:0], sc, interv)
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetmantsd' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_getmant_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-dst[63:0] := GetNormalizedMantissa(a[63:0], sc, interv)
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetmantsd' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_getmant_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note][round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := GetNormalizedMantissa(a[63:0], sc, interv)
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetmantsd' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_getmant_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := GetNormalizedMantissa(a[63:0], sc, interv)
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetmantsd' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_getmant_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note][round_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := GetNormalizedMantissa(a[63:0], sc, interv)
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetmantsd' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_getmant_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-IF k[0]
-	dst[63:0] := GetNormalizedMantissa(a[63:0], sc, interv)
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetmantsd' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_getmant_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note][round_note]</description>
-	<operation>
-dst[31:0] := GetNormalizedMantissa(a[31:0], sc, interv)
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetmantss' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_getmant_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-dst[31:0] := GetNormalizedMantissa(a[31:0], sc, interv)
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetmantss' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_getmant_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note][round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := GetNormalizedMantissa(a[31:0], sc, interv)
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetmantss' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_getmant_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := GetNormalizedMantissa(a[31:0], sc, interv)
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetmantss' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_getmant_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note][round_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := GetNormalizedMantissa(a[31:0], sc, interv)
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetmantss' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_getmant_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-IF k[0]
-	dst[31:0] := GetNormalizedMantissa(a[31:0], sc, interv)
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vgetmantss' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_insertf32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[511:0] := a[511:0]
-CASE (imm8[1:0]) of
-0: dst[127:0] := b[127:0]
-1: dst[255:128] := b[127:0]
-2: dst[383:256] := b[127:0]
-3: dst[511:384] := b[127:0]
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vinsertf32x4' form='zmm {k}, zmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_insertf32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-2: tmp[383:256] := b[127:0]
-3: tmp[511:384] := b[127:0]
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vinsertf32x4' form='zmm {k}, zmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_insertf32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-2: tmp[383:256] := b[127:0]
-3: tmp[511:384] := b[127:0]
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vinsertf32x4' form='zmm {k}, zmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_insertf64x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "dst", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[511:0] := a[511:0]
-CASE (imm8[0]) of
-0: dst[255:0] := b[255:0]
-1: dst[511:256] := b[255:0]
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vinsertf64x4' form='zmm {k}, zmm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_insertf64x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[0]) of
-0: tmp[255:0] := b[255:0]
-1: tmp[511:256] := b[255:0]
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vinsertf64x4' form='zmm {k}, zmm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_insertf64x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[0]) of
-0: tmp[255:0] := b[255:0]
-1: tmp[511:256] := b[255:0]
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vinsertf64x4' form='zmm {k}, zmm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_inserti32x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[511:0] := a[511:0]
-CASE (imm8[1:0]) of
-0: dst[127:0] := b[127:0]
-1: dst[255:128] := b[127:0]
-2: dst[383:256] := b[127:0]
-3: dst[511:384] := b[127:0]
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vinserti32x4' form='zmm {k}, zmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_inserti32x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-2: tmp[383:256] := b[127:0]
-3: tmp[511:384] := b[127:0]
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vinserti32x4' form='zmm {k}, zmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_inserti32x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-2: tmp[383:256] := b[127:0]
-3: tmp[511:384] := b[127:0]
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vinserti32x4' form='zmm {k}, zmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_inserti64x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "dst", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[511:0] := a[511:0]
-CASE (imm8[7:0]) OF
-0: dst[255:0] := b[255:0]
-1: dst[511:256] := b[255:0]
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vinserti64x4' form='zmm {k}, zmm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_inserti64x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[0]) of
-0: tmp[255:0] := b[255:0]
-1: tmp[511:256] := b[255:0]
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vinserti64x4' form='zmm {k}, zmm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_inserti64x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[0]) of
-0: tmp[255:0] := b[255:0]
-1: tmp[511:256] := b[255:0]
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vinserti64x4' form='zmm {k}, zmm, ymm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_max_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmaxpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_max_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmaxpd' form='zmm {k}, zmm, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_max_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmaxpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_max_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmaxpd' form='zmm {k}, zmm, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_max_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmaxpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_max_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmaxpd' form='zmm {k}, zmm, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_max_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmaxps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_max_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmaxps' form='zmm {k}, zmm, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_max_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmaxps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_max_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmaxps' form='zmm {k}, zmm, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_max_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmaxps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_max_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmaxps' form='zmm {k}, zmm, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_max_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-IF k[0]
-	dst[63:0] := MAX(a[63:0], b[63:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmaxsd' form='xmm {k}, xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_max_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := MAX(a[63:0], b[63:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmaxsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_max_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-IF k[0]
-	dst[63:0] := MAX(a[63:0], b[63:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmaxsd' form='xmm {k}, xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_max_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := MAX(a[63:0], b[63:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmaxsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_max_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-dst[63:0] := MAX(a[63:0], b[63:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmaxsd' form='xmm {k}, xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_max_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-IF k[0]
-	dst[31:0] := MAX(a[31:0], b[31:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmaxss' form='xmm {k}, xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_max_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := MAX(a[31:0], b[31:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmaxss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_max_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-IF k[0]
-	dst[31:0] := MAX(a[31:0], b[31:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmaxss' form='xmm {k}, xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_max_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := MAX(a[31:0], b[31:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmaxss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_max_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-dst[31:0] := MAX(a[31:0], b[31:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmaxss' form='xmm {k}, xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_min_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vminpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_min_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vminpd' form='zmm {k}, zmm, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_min_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vminpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_min_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vminpd' form='zmm {k}, zmm, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_min_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vminpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_min_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vminpd' form='zmm {k}, zmm, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_min_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vminps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_min_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vminps' form='zmm {k}, zmm, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_min_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vminps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_min_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vminps' form='zmm {k}, zmm, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_min_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vminps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_min_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vminps' form='zmm {k}, zmm, zmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_min_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-IF k[0]
-	dst[63:0] := MIN(a[63:0], b[63:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vminsd' form='xmm {k}, xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_min_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := MIN(a[63:0], b[63:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vminsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_min_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-IF k[0]
-	dst[63:0] := MIN(a[63:0], b[63:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vminsd' form='xmm {k}, xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_min_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := MIN(a[63:0], b[63:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vminsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_min_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" , and copy the upper element from "a" to the upper element of "dst".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-dst[63:0] := MIN(a[63:0], b[63:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vminsd' form='xmm {k}, xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_min_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-IF k[0]
-	dst[31:0] := MIN(a[31:0], b[31:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vminss' form='xmm {k}, xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_min_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := MIN(a[31:0], b[31:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vminss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_min_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-IF k[0]
-	dst[31:0] := MIN(a[31:0], b[31:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vminss' form='xmm {k}, xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_min_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := MIN(a[31:0], b[31:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vminss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_min_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="sae" type="int"/>
-	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-dst[31:0] := MIN(a[31:0], b[31:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vminss' form='xmm {k}, xmm, xmm {sae}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_load_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into "dst". 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[511:0] := MEM[mem_addr+511:mem_addr]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovapd' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_load_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovapd' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_mov_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovapd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_store_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vmovapd' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_load_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovapd' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_mov_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovapd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_store_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory.
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+511:mem_addr] := a[511:0]
-	</operation>
-	<instruction name='vmovapd' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_load_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into "dst". 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[511:0] := MEM[mem_addr+511:mem_addr]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovaps' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_load_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovaps' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_mov_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovaps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_store_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vmovaps' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_load_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovaps' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_mov_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovaps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_store_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory. 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+511:mem_addr] := a[511:0]
-	</operation>
-	<instruction name='vmovaps' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_movedup_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp[63:0] := a[63:0]
-tmp[127:64] := a[63:0]
-tmp[191:128] := a[191:128]
-tmp[255:192] := a[191:128]
-tmp[319:256] := a[319:256] 
-tmp[383:320] := a[319:256] 
-tmp[447:384] := a[447:384]
-tmp[511:448] := a[447:384]
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovddup' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_movedup_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-tmp[63:0] := a[63:0]
-tmp[127:64] := a[63:0]
-tmp[191:128] := a[191:128]
-tmp[255:192] := a[191:128]
-tmp[319:256] := a[319:256] 
-tmp[383:320] := a[319:256] 
-tmp[447:384] := a[447:384]
-tmp[511:448] := a[447:384]
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovddup' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_movedup_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst".
-	</description>
-	<operation>
-tmp[63:0] := a[63:0]
-tmp[127:64] := a[63:0]
-tmp[191:128] := a[191:128]
-tmp[255:192] := a[191:128]
-tmp[319:256] := a[319:256] 
-tmp[383:320] := a[319:256] 
-tmp[447:384] := a[447:384]
-tmp[511:448] := a[447:384]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovddup' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_load_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load 512-bits (composed of 16 packed 32-bit integers) from memory into "dst". 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[511:0] := MEM[mem_addr+511:mem_addr]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqa32' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_load_si512">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load 512-bits of integer data from memory into "dst". 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[511:0] := MEM[mem_addr+511:mem_addr]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqa32' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_load_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqa32' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_mov_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqa32' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_store_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vmovdqa32' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_load_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqa32' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mov_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqa32' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_store_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Store 512-bits (composed of 16 packed 32-bit integers) from "a" into memory. 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+511:mem_addr] := a[511:0]
-	</operation>
-	<instruction name='vmovdqa32' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_store_si512">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Store 512-bits of integer data from "a" into memory. 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+511:mem_addr] := a[511:0]
-	</operation>
-	<instruction name='vmovdqa32' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_load_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load 512-bits (composed of 8 packed 64-bit integers) from memory into "dst". 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[511:0] := MEM[mem_addr+511:mem_addr]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqa64' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_load_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqa64' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_mov_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqa64' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_store_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vmovdqa64' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_load_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqa64' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mov_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqa64' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_store_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Store 512-bits (composed of 8 packed 64-bit integers) from "a" into memory. 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+511:mem_addr] := a[511:0]
-	</operation>
-	<instruction name='vmovdqa64' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_loadu_si512">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load 512-bits of integer data from memory into "dst".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-dst[511:0] := MEM[mem_addr+511:mem_addr]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqu32' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_loadu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqu32' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_storeu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vmovdqu32' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_loadu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqu32' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_storeu_si512">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Store 512-bits of integer data from "a" into memory.
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-MEM[mem_addr+511:mem_addr] := a[511:0]
-	</operation>
-	<instruction name='vmovdqu32' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_loadu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqu64' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_storeu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vmovdqu64' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_loadu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovdqu64' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_stream_load_si512">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load 512-bits of integer data from memory into "dst" using a non-temporal memory hint. 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-dst[511:0] := MEM[mem_addr+511:mem_addr]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovntdqa' form='zmm, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_stream_si512">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Store 512-bits of integer data from "a" into memory using a non-temporal memory hint. 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+511:mem_addr] := a[511:0]
-	</operation>
-	<instruction name='vmovntdq' form='m512, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_stream_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+511:mem_addr] := a[511:0]
-	</operation>
-	<instruction name='vmovntpd' form='m512, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_stream_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. 
-	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-MEM[mem_addr+511:mem_addr] := a[511:0]
-	</operation>
-	<instruction name='vmovntps' form='m512, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_load_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="const double*"/>
-	<description>Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper element of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-IF k[0]
-	dst[63:0] := MEM[mem_addr+63:mem_addr]
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vmovsd' form='xmm {k}, m64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_move_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := b[63:0]
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmovsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_store_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="double*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Store the lower double-precision (64-bit) floating-point element from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-IF k[0]
-	MEM[mem_addr+63:mem_addr] := a[63:0]
-FI
-	</operation>
-	<instruction name='vmovsd' form='m64 {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_load_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="const double*"/>
-	<description>Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper element of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-IF k[0]
-	dst[63:0] := MEM[mem_addr+63:mem_addr]
-ELSE
-	dst[63:0] := 0
-FI
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vmovsd' form='xmm {k}, m64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_move_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := b[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmovsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_movehdup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp[31:0] := a[63:32] 
-tmp[63:32] := a[63:32] 
-tmp[95:64] := a[127:96] 
-tmp[127:96] := a[127:96]
-tmp[159:128] := a[191:160] 
-tmp[191:160] := a[191:160] 
-tmp[223:192] := a[255:224] 
-tmp[255:224] := a[255:224]
-tmp[287:256] := a[319:288] 
-tmp[319:288] := a[319:288] 
-tmp[351:320] := a[383:352] 
-tmp[383:352] := a[383:352] 
-tmp[415:384] := a[447:416] 
-tmp[447:416] := a[447:416] 
-tmp[479:448] := a[511:480]
-tmp[511:480] := a[511:480]
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovshdup' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_movehdup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-tmp[31:0] := a[63:32] 
-tmp[63:32] := a[63:32] 
-tmp[95:64] := a[127:96] 
-tmp[127:96] := a[127:96]
-tmp[159:128] := a[191:160] 
-tmp[191:160] := a[191:160] 
-tmp[223:192] := a[255:224] 
-tmp[255:224] := a[255:224]
-tmp[287:256] := a[319:288] 
-tmp[319:288] := a[319:288] 
-tmp[351:320] := a[383:352] 
-tmp[383:352] := a[383:352] 
-tmp[415:384] := a[447:416] 
-tmp[447:416] := a[447:416] 
-tmp[479:448] := a[511:480]
-tmp[511:480] := a[511:480]
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovshdup' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_movehdup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
-	</description>
-	<operation>
-dst[31:0] := a[63:32] 
-dst[63:32] := a[63:32] 
-dst[95:64] := a[127:96] 
-dst[127:96] := a[127:96]
-dst[159:128] := a[191:160] 
-dst[191:160] := a[191:160] 
-dst[223:192] := a[255:224] 
-dst[255:224] := a[255:224]
-dst[287:256] := a[319:288] 
-dst[319:288] := a[319:288] 
-dst[351:320] := a[383:352] 
-dst[383:352] := a[383:352] 
-dst[415:384] := a[447:416] 
-dst[447:416] := a[447:416] 
-dst[479:448] := a[511:480]
-dst[511:480] := a[511:480]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovshdup' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_moveldup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp[31:0] := a[31:0] 
-tmp[63:32] := a[31:0] 
-tmp[95:64] := a[95:64] 
-tmp[127:96] := a[95:64]
-tmp[159:128] := a[159:128] 
-tmp[191:160] := a[159:128] 
-tmp[223:192] := a[223:192] 
-tmp[255:224] := a[223:192]
-tmp[287:256] := a[287:256] 
-tmp[319:288] := a[287:256] 
-tmp[351:320] := a[351:320] 
-tmp[383:352] := a[351:320] 
-tmp[415:384] := a[415:384] 
-tmp[447:416] := a[415:384] 
-tmp[479:448] := a[479:448]
-tmp[511:480] := a[479:448]
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovsldup' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_moveldup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-tmp[31:0] := a[31:0] 
-tmp[63:32] := a[31:0] 
-tmp[95:64] := a[95:64] 
-tmp[127:96] := a[95:64]
-tmp[159:128] := a[159:128] 
-tmp[191:160] := a[159:128] 
-tmp[223:192] := a[223:192] 
-tmp[255:224] := a[223:192]
-tmp[287:256] := a[287:256] 
-tmp[319:288] := a[287:256] 
-tmp[351:320] := a[351:320] 
-tmp[383:352] := a[351:320] 
-tmp[415:384] := a[415:384] 
-tmp[447:416] := a[415:384] 
-tmp[479:448] := a[479:448]
-tmp[511:480] := a[479:448]
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0	
-	</operation>
-	<instruction name='vmovsldup' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_moveldup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".
-	</description>
-	<operation>
-dst[31:0] := a[31:0] 
-dst[63:32] := a[31:0] 
-dst[95:64] := a[95:64] 
-dst[127:96] := a[95:64]
-dst[159:128] := a[159:128] 
-dst[191:160] := a[159:128] 
-dst[223:192] := a[223:192] 
-dst[255:224] := a[223:192]
-dst[287:256] := a[287:256] 
-dst[319:288] := a[287:256] 
-dst[351:320] := a[351:320] 
-dst[383:352] := a[351:320] 
-dst[415:384] := a[415:384] 
-dst[447:416] := a[415:384] 
-dst[479:448] := a[479:448]
-dst[511:480] := a[479:448]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovsldup' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_load_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="const float*"/>
-	<description>Load a single-precision (32-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper elements of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-IF k[0]
-	dst[31:0] := MEM[mem_addr+31:mem_addr]
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[MAX:32] := 0
-	</operation>
-	<instruction name='vmovss' form='xmm {k}, m32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_move_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := b[31:0]
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmovss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_store_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="float*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Store the lower single-precision (32-bit) floating-point element from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-IF k[0]
-	MEM[mem_addr+31:mem_addr] := a[31:0]
-FI
-	</operation>
-	<instruction name='vmovss' form='m32 {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_load_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="const float*"/>
-	<description>Load a single-precision (32-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper elements of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-IF k[0]
-	dst[31:0] := MEM[mem_addr+31:mem_addr]
-ELSE
-	dst[31:0] := 0
-FI
-dst[MAX:32] := 0
-	</operation>
-	<instruction name='vmovss' form='xmm {k}, m32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_move_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := b[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmovss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_loadu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into "dst". 
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[511:0] := MEM[mem_addr+511:mem_addr]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovupd' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_loadu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary. </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovupd' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_storeu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vmovupd' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_loadu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovupd' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_storeu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory. 
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[mem_addr+511:mem_addr] := a[511:0]
-	</operation>
-	<instruction name='vmovupd' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_loadu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into "dst". 
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-dst[511:0] := MEM[mem_addr+511:mem_addr]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovups' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_loadu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovups' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_storeu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vmovups' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_loadu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmovups' form='zmm {k}, m512'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_storeu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory. 
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-MEM[mem_addr+511:mem_addr] := a[511:0]
-	</operation>
-	<instruction name='vmovups' form='m512 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_mul_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  RM.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] * b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmulpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_mul_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] * b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmulpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_mul_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] * b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmulpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_mul_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] * b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmulpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mul_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i] * b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmulpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mul_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". 
-	[round_note]
-	 </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i] * b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmulpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_mul_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  RM.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmulps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_mul_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	 [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmulps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_mul_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmulps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_mul_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmulps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mul_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] * b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmulps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mul_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". 
-	[round_note]
-	 </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] * b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vmulps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_mul_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-		[round_note]
-		</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] * b[63:0]
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmulsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_mul_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] * b[63:0]
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmulsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_mul_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-		[round_note]
-		</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] * b[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmulsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_mul_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] * b[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmulsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mul_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-		[round_note]
-		</description>
-	<operation>
-dst[63:0] := a[63:0] * b[63:0]
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmulsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_mul_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-		[round_note]
-		</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] * b[31:0]
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmulss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_mul_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] * b[31:0]
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmulss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_mul_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-		[round_note]
-		</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] * b[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmulss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_mul_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] * b[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmulss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mul_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-		[round_note]
-		</description>
-	<operation>
-dst[31:0] := a[31:0] * b[31:0]
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vmulss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_abs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ABS(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpabsd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_abs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ABS(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpabsd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_abs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ABS(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpabsd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_abs_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ABS(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpabsq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_abs_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ABS(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpabsq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_abs_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ABS(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpabsq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_add_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] + b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpaddd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_add_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpaddd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_add_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpaddd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_add_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i] + b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpaddq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_add_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpaddq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_add_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpaddq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_and_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] BITWISE AND b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpandd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_and_si512">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of 512 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[511:0] := (a[511:0] AND b[511:0])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpandd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_and_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] AND b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpandd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_andnot_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpandnd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_andnot_si512">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NOT of 512 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
-	<operation>
-dst[511:0] := ((NOT a[511:0]) AND b[511:0])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpandnd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_andnot_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpandnd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_andnot_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpandnd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_andnot_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in "a" and then AND with "b", and store the results in "dst".</description>
-	<operation>
-dst[511:0] := ((NOT a[511:0]) AND b[511:0])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpandnq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_andnot_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpandnq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_andnot_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpandnq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_and_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in "a" and "b", and store the results in "dst".</description>
-	<operation>
-dst[511:0] := (a[511:0] AND b[511:0])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpandq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_and_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpandq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_and_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpandq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_blend_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpblendmd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_blend_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := b[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpblendmq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_set1_epi8" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="a" type="char"/>
-	<description>Broadcast 8-bit integer "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	dst[i+7:i] := a[7:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastb' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcastd_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastd' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcastd_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastd' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_set1_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="int"/>
-	<description>Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastd' form='zmm {k}, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcastd_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastd' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_set1_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="int"/>
-	<description>Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastd' form='zmm {k}, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_set1_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="a" type="int"/>
-	<description>Broadcast 32-bit integer "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastd' form='zmm {k}, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcastmb_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ZeroExtend(k[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastmb2q' form='zmm, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcastmw_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<description>Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ZeroExtend(k[15:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastmw2d' form='zmm, k'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcastq_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcastq_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_set1_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__int64"/>
-	<description>Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastq' form='zmm {k}, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcastq_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_set1_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__int64"/>
-	<description>Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastq' form='zmm {k}, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_set1_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="a" type="__int64"/>
-	<description>Broadcast 64-bit integer "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastq' form='zmm {k}, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_set1_epi16" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="a" type="short"/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all all elements of "dst".
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	dst[i+15:i] := a[15:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpbroadcastw' form='ymm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmp_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpeq_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpeqd' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpge_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpgt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpgtd' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmple_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_cmplt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_cmplt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpltd' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpneq_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmp_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpeq_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpeqd' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpge_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpgt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpgtd' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmple_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_mask_cmplt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_mask_cmplt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpltd' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpneq_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpd' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmp_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpeq_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpeqq' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpge_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpgt_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpgtq' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmple_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmplt_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpneq_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmp_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpeq_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpeqq' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpge_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpgt_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpgtq' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmple_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmplt_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpneq_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmp_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpeq_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpge_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpgt_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmple_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmplt_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_cmpneq_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmp_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpeq_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpge_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpgt_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmple_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmplt_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_cmpneq_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpcmpud' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmp_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpeq_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpge_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpgt_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmple_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmplt_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_cmpneq_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmp_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpeq_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpge_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpgt_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmple_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmplt_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_cmpneq_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpcmpuq' form='k {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_compress_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
-	<operation>
-size := 32
-m := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-dst[511:m] := src[511:m]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpcompressd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_compressstoreu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<category>Swizzle</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-size := 32
-m := base_addr
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		MEM[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpcompressd' form='m32 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_compress_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
-	<operation>
-size := 32
-m := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-dst[511:m] := 0
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpcompressd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_compress_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
-	<operation>
-size := 64
-m := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-dst[511:m] := src[511:m]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpcompressq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_compressstoreu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<category>Swizzle</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-size := 64
-m := base_addr
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		MEM[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpcompressq' form='m64 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_compress_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
-	<operation>
-size := 64
-m := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-dst[511:m] := 0
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpcompressq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_conflict_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	FOR k := 0 to j-1
-		m := k*32
-		dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
-	ENDFOR
-	dst[i+31:i+j] := 0
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpconflictd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_conflict_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[i]
-		FOR l := 0 to j-1
-			m := l*32
-			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
-		ENDFOR
-		dst[i+31:i+j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpconflictd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_conflict_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[i]
-		FOR l := 0 to j-1
-			m := l*32
-			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
-		ENDFOR
-		dst[i+31:i+j] := 0
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpconflictd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_conflict_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	FOR k := 0 to j-1
-		m := k*64
-		dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
-	ENDFOR
-	dst[i+63:i+j] := 0
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpconflictq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_conflict_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		FOR l := 0 to j-1
-			m := l*64
-			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
-		ENDFOR
-		dst[i+63:i+j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpconflictq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_conflict_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		FOR l := 0 to j-1
-			m := l*64
-			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
-		ENDFOR
-		dst[i+63:i+j] := 0
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpconflictq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_permutevar_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the "permutevar" name. This intrinsic is identical to "_mm512_mask_permutexvar_epi32", and it is recommended that you use that intrinsic name.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	id := idx[i+3:i]*32
-	IF k[j]
-		dst[i+31:i] := a[id+31:id]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutexvar_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	id := idx[i+3:i]*32
-	IF k[j]
-		dst[i+31:i] := a[id+31:id]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutexvar_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	id := idx[i+3:i]*32
-	IF k[j]
-		dst[i+31:i] := a[id+31:id]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_permutevar_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the "permutevar" name. This intrinsic is identical to "_mm512_permutexvar_epi32", and it is recommended that you use that intrinsic name.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	id := idx[i+3:i]*32
-	dst[i+31:i] := a[id+31:id]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutexvar_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	id := idx[i+3:i]*32
-	dst[i+31:i] := a[id+31:id]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask2_permutex2var_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	off := idx[i+3:i]*32
-	IF k[j]
-		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := idx[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermi2d' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutex2var_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	off := idx[i+3:i]*32
-	IF k[j]
-		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermt2d' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutex2var_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	off := idx[i+3:i]*32
-	IF k[j]
-		dst[i+31:i] := (idx[i+4]) ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermi2d' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vpermt2d' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutex2var_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	off := idx[i+3:i]*32
-	dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermi2d' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vpermt2d' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask2_permutex2var_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set)</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	off := idx[i+2:i]*64
-	IF k[j]
-		dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := idx[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermi2pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_permutex2var_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	off := idx[i+2:i]*64
-	IF k[j]
-		dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermt2pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_permutex2var_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	off := idx[i+2:i]*64
-	IF k[j]
-		dst[i+63:i] := (idx[i+3]) ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermi2pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vpermt2pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_permutex2var_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	off := idx[i+2:i]*64
-	dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermi2pd' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vpermt2pd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask2_permutex2var_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	off := idx[i+3:i]*32
-	IF k[j]
-		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := idx[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermi2ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_permutex2var_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	off := idx[i+3:i]*32
-	IF k[j]
-		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermt2ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_permutex2var_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	off := idx[i+3:i]*32
-	IF k[j]
-		dst[i+31:i] := (idx[i+4]) ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermi2ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vpermt2ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_permutex2var_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	off := idx[i+3:i]*32
-	dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermi2ps' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vpermt2ps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask2_permutex2var_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	off := idx[i+2:i]*64
-	IF k[j]
-		dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := idx[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermi2q' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutex2var_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	off := idx[i+2:i]*64
-	IF k[j]
-		dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermt2q' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutex2var_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	off := idx[i+2:i]*64
-	IF k[j]
-		dst[i+63:i] := (idx[i+3]) ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermi2q' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vpermt2q' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutex2var_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	off := idx[i+2:i]*64
-	dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermi2q' form='zmm {k}, zmm, zmm'/>
-	<instruction name='vpermt2q' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_permute_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]
-IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]
-IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]
-IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]
-IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]
-IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]
-IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]
-IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]
-IF (imm8[4] == 0) tmp_dst[319:256] := a[319:256]
-IF (imm8[4] == 1) tmp_dst[319:256] := a[383:320]
-IF (imm8[5] == 0) tmp_dst[383:320] := a[319:256]
-IF (imm8[5] == 1) tmp_dst[383:320] := a[383:320]
-IF (imm8[6] == 0) tmp_dst[447:384] := a[447:384]
-IF (imm8[6] == 1) tmp_dst[447:384] := a[511:448]
-IF (imm8[7] == 0) tmp_dst[511:448] := a[447:384]
-IF (imm8[7] == 1) tmp_dst[511:448] := a[511:448]
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermilpd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_permutevar_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-IF (b[1] == 0) tmp_dst[63:0] := a[63:0]
-IF (b[1] == 1) tmp_dst[63:0] := a[127:64]
-IF (b[65] == 0) tmp_dst[127:64] := a[63:0]
-IF (b[65] == 1) tmp_dst[127:64] := a[127:64]
-IF (b[129] == 0) tmp_dst[191:128] := a[191:128]
-IF (b[129] == 1) tmp_dst[191:128] := a[255:192]
-IF (b[193] == 0) tmp_dst[255:192] := a[191:128]
-IF (b[193] == 1) tmp_dst[255:192] := a[255:192]
-IF (b[257] == 0) tmp_dst[319:256] := a[319:256]
-IF (b[257] == 1) tmp_dst[319:256] := a[383:320]
-IF (b[321] == 0) tmp_dst[383:320] := a[319:256]
-IF (b[321] == 1) tmp_dst[383:320] := a[383:320]
-IF (b[385] == 0) tmp_dst[447:384] := a[447:384]
-IF (b[385] == 1) tmp_dst[447:384] := a[511:448]
-IF (b[449] == 0) tmp_dst[511:448] := a[447:384]
-IF (b[449] == 1) tmp_dst[511:448] := a[511:448]
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermilpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_permute_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]
-IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]
-IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]
-IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]
-IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]
-IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]
-IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]
-IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]
-IF (imm8[4] == 0) tmp_dst[319:256] := a[319:256]
-IF (imm8[4] == 1) tmp_dst[319:256] := a[383:320]
-IF (imm8[5] == 0) tmp_dst[383:320] := a[319:256]
-IF (imm8[5] == 1) tmp_dst[383:320] := a[383:320]
-IF (imm8[6] == 0) tmp_dst[447:384] := a[447:384]
-IF (imm8[6] == 1) tmp_dst[447:384] := a[511:448]
-IF (imm8[7] == 0) tmp_dst[511:448] := a[447:384]
-IF (imm8[7] == 1) tmp_dst[511:448] := a[511:448]
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermilpd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_permutevar_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-IF (b[1] == 0) tmp_dst[63:0] := a[63:0]
-IF (b[1] == 1) tmp_dst[63:0] := a[127:64]
-IF (b[65] == 0) tmp_dst[127:64] := a[63:0]
-IF (b[65] == 1) tmp_dst[127:64] := a[127:64]
-IF (b[129] == 0) tmp_dst[191:128] := a[191:128]
-IF (b[129] == 1) tmp_dst[191:128] := a[255:192]
-IF (b[193] == 0) tmp_dst[255:192] := a[191:128]
-IF (b[193] == 1) tmp_dst[255:192] := a[255:192]
-IF (b[257] == 0) tmp_dst[319:256] := a[319:256]
-IF (b[257] == 1) tmp_dst[319:256] := a[383:320]
-IF (b[321] == 0) tmp_dst[383:320] := a[319:256]
-IF (b[321] == 1) tmp_dst[383:320] := a[383:320]
-IF (b[385] == 0) tmp_dst[447:384] := a[447:384]
-IF (b[385] == 1) tmp_dst[447:384] := a[511:448]
-IF (b[449] == 0) tmp_dst[511:448] := a[447:384]
-IF (b[449] == 1) tmp_dst[511:448] := a[511:448]
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermilpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_permute_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-IF (imm8[0] == 0) dst[63:0] := a[63:0]
-IF (imm8[0] == 1) dst[63:0] := a[127:64]
-IF (imm8[1] == 0) dst[127:64] := a[63:0]
-IF (imm8[1] == 1) dst[127:64] := a[127:64]
-IF (imm8[2] == 0) dst[191:128] := a[191:128]
-IF (imm8[2] == 1) dst[191:128] := a[255:192]
-IF (imm8[3] == 0) dst[255:192] := a[191:128]
-IF (imm8[3] == 1) dst[255:192] := a[255:192]
-IF (imm8[4] == 0) dst[319:256] := a[319:256]
-IF (imm8[4] == 1) dst[319:256] := a[383:320]
-IF (imm8[5] == 0) dst[383:320] := a[319:256]
-IF (imm8[5] == 1) dst[383:320] := a[383:320]
-IF (imm8[6] == 0) dst[447:384] := a[447:384]
-IF (imm8[6] == 1) dst[447:384] := a[511:448]
-IF (imm8[7] == 0) dst[511:448] := a[447:384]
-IF (imm8[7] == 1) dst[511:448] := a[511:448]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermilpd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_permutevar_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst".</description>
-	<operation>
-IF (b[1] == 0) dst[63:0] := a[63:0]
-IF (b[1] == 1) dst[63:0] := a[127:64]
-IF (b[65] == 0) dst[127:64] := a[63:0]
-IF (b[65] == 1) dst[127:64] := a[127:64]
-IF (b[129] == 0) dst[191:128] := a[191:128]
-IF (b[129] == 1) dst[191:128] := a[255:192]
-IF (b[193] == 0) dst[255:192] := a[191:128]
-IF (b[193] == 1) dst[255:192] := a[255:192]
-IF (b[257] == 0) dst[319:256] := a[319:256]
-IF (b[257] == 1) dst[319:256] := a[383:320]
-IF (b[321] == 0) dst[383:320] := a[319:256]
-IF (b[321] == 1) dst[383:320] := a[383:320]
-IF (b[385] == 0) dst[447:384] := a[447:384]
-IF (b[385] == 1) dst[447:384] := a[511:448]
-IF (b[449] == 0) dst[511:448] := a[447:384]
-IF (b[449] == 1) dst[511:448] := a[511:448]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermilpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_permute_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
-tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
-tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
-tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
-tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4])
-tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6])
-tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
-tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
-tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4])
-tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6])
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermilps' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_permutevar_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
-tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
-tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
-tmp_dst[159:128] := SELECT4(a[255:128], b[129:128])
-tmp_dst[191:160] := SELECT4(a[255:128], b[161:160])
-tmp_dst[223:192] := SELECT4(a[255:128], b[193:192])
-tmp_dst[255:224] := SELECT4(a[255:128], b[225:224])
-tmp_dst[287:256] := SELECT4(a[383:256], b[257:256])
-tmp_dst[319:288] := SELECT4(a[383:256], b[289:288])
-tmp_dst[351:320] := SELECT4(a[383:256], b[321:320])
-tmp_dst[383:352] := SELECT4(a[383:256], b[353:352])
-tmp_dst[415:384] := SELECT4(a[511:384], b[385:384])
-tmp_dst[447:416] := SELECT4(a[511:384], b[417:416])
-tmp_dst[479:448] := SELECT4(a[511:384], b[449:448])
-tmp_dst[511:480] := SELECT4(a[511:384], b[481:480])
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermilps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_permute_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
-tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
-tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
-tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
-tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4])
-tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6])
-tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
-tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
-tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4])
-tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6])
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermilps' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_permutevar_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
-tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
-tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
-tmp_dst[159:128] := SELECT4(a[255:128], b[129:128])
-tmp_dst[191:160] := SELECT4(a[255:128], b[161:160])
-tmp_dst[223:192] := SELECT4(a[255:128], b[193:192])
-tmp_dst[255:224] := SELECT4(a[255:128], b[225:224])
-tmp_dst[287:256] := SELECT4(a[383:256], b[257:256])
-tmp_dst[319:288] := SELECT4(a[383:256], b[289:288])
-tmp_dst[351:320] := SELECT4(a[383:256], b[321:320])
-tmp_dst[383:352] := SELECT4(a[383:256], b[353:352])
-tmp_dst[415:384] := SELECT4(a[511:384], b[385:384])
-tmp_dst[447:416] := SELECT4(a[511:384], b[417:416])
-tmp_dst[479:448] := SELECT4(a[511:384], b[449:448])
-tmp_dst[511:480] := SELECT4(a[511:384], b[481:480])
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermilps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_permute_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-dst[223:192] := SELECT4(a[255:128], imm8[5:4])
-dst[255:224] := SELECT4(a[255:128], imm8[7:6])
-dst[287:256] := SELECT4(a[383:256], imm8[1:0])
-dst[319:288] := SELECT4(a[383:256], imm8[3:2])
-dst[351:320] := SELECT4(a[383:256], imm8[5:4])
-dst[383:352] := SELECT4(a[383:256], imm8[7:6])
-dst[415:384] := SELECT4(a[511:384], imm8[1:0])
-dst[447:416] := SELECT4(a[511:384], imm8[3:2])
-dst[479:448] := SELECT4(a[511:384], imm8[5:4])
-dst[511:480] := SELECT4(a[511:384], imm8[7:6])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermilps' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_permutevar_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := SELECT4(a[127:0], b[1:0])
-dst[63:32] := SELECT4(a[127:0], b[33:32])
-dst[95:64] := SELECT4(a[127:0], b[65:64])
-dst[127:96] := SELECT4(a[127:0], b[97:96])
-dst[159:128] := SELECT4(a[255:128], b[129:128])
-dst[191:160] := SELECT4(a[255:128], b[161:160])
-dst[223:192] := SELECT4(a[255:128], b[193:192])
-dst[255:224] := SELECT4(a[255:128], b[225:224])
-dst[287:256] := SELECT4(a[383:256], b[257:256])
-dst[319:288] := SELECT4(a[383:256], b[289:288])
-dst[351:320] := SELECT4(a[383:256], b[321:320])
-dst[383:352] := SELECT4(a[383:256], b[353:352])
-dst[415:384] := SELECT4(a[511:384], b[385:384])
-dst[447:416] := SELECT4(a[511:384], b[417:416])
-dst[479:448] := SELECT4(a[511:384], b[449:448])
-dst[511:480] := SELECT4(a[511:384], b[481:480])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermilps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_permutex_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0])
-tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2])
-tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4])
-tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6])
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermpd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_permutexvar_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	id := idx[i+2:i]*64
-	IF k[j]
-		dst[i+63:i] := a[id+63:id]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_permutex_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0])
-tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2])
-tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4])
-tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6])
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermpd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_permutexvar_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	id := idx[i+2:i]*64
-	IF k[j]
-		dst[i+63:i] := a[id+63:id]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_permutex_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-dst[319:256] := SELECT4(a[511:256], imm8[1:0])
-dst[383:320] := SELECT4(a[511:256], imm8[3:2])
-dst[447:384] := SELECT4(a[511:256], imm8[5:4])
-dst[511:448] := SELECT4(a[511:256], imm8[7:6])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermpd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_permutexvar_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	id := idx[i+2:i]*64
-	dst[i+63:i] := a[id+63:id]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_permutexvar_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	id := idx[i+3:i]*32
-	IF k[j]
-		dst[i+31:i] := a[id+31:id]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_permutexvar_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	id := idx[i+3:i]*32
-	IF k[j]
-		dst[i+31:i] := a[id+31:id]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_permutexvar_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	id := idx[i+3:i]*32
-	dst[i+31:i] := a[id+31:id]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutex_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0])
-tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2])
-tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4])
-tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6])
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutexvar_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	id := idx[i+2:i]*64
-	IF k[j]
-		dst[i+63:i] := a[id+63:id]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutex_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0])
-tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2])
-tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4])
-tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6])
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutexvar_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	id := idx[i+2:i]*64
-	IF k[j]
-		dst[i+63:i] := a[id+63:id]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutex_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-dst[319:256] := SELECT4(a[511:256], imm8[1:0])
-dst[383:320] := SELECT4(a[511:256], imm8[3:2])
-dst[447:384] := SELECT4(a[511:256], imm8[5:4])
-dst[511:448] := SELECT4(a[511:256], imm8[7:6])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutexvar_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	id := idx[i+2:i]*64
-	dst[i+63:i] := a[id+63:id]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpermq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_expand_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[m+31:m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpexpandd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_expandloadu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpexpandd' form='zmm {k}, m32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_expand_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[m+31:m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpexpandd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_expandloadu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpexpandd' form='zmm {k}, m32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_expand_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[m+63:m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpexpandq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_expandloadu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpexpandq' form='zmm {k}, m64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_expand_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[m+63:m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpexpandq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_expandloadu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpexpandq' form='zmm {k}, m64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_i32gather_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>
-	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpgatherdd' form='zmm {k}, vm32z'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_i32gather_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>
-	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-k[MAX:16] := 0
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpgatherdd' form='zmm {k}, vm32z'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_i32gather_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	m := j*32
-	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpgatherdq' form='zmm {k}, vm32y'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_i32gather_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	m := j*32
-	IF k[j]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-k[MAX:8] := 0
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpgatherdq' form='zmm {k}, vm32y'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_i64gather_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	m := j*64
-	dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpgatherqd' form='ymm {k}, vm64z'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_i64gather_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	m := j*64
-	IF k[j]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-k[MAX:8] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpgatherqd' form='ymm {k}, vm64z'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_i64gather_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpgatherqq' form='zmm {k}, vm64z'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_i64gather_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-k[MAX:8] := 0
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpgatherqq' form='zmm {k}, vm64z'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_lzcnt_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	tmp := 31
-	dst[i+31:i] := 0
-	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-		tmp := tmp - 1
-		dst[i+31:i] := dst[i+31:i] + 1
-	OD
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vplzcntd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_lzcnt_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		tmp := 31
-		dst[i+31:i] := 0
-		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-			tmp := tmp - 1
-			dst[i+31:i] := dst[i+31:i] + 1
-		OD
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vplzcntd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_lzcnt_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		tmp := 31
-		dst[i+31:i] := 0
-		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-			tmp := tmp - 1
-			dst[i+31:i] := dst[i+31:i] + 1
-		OD
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vplzcntd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_lzcnt_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	tmp := 63
-	dst[i+63:i] := 0
-	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-		tmp := tmp - 1
-		dst[i+63:i] := dst[i+63:i] + 1
-	OD
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vplzcntq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_lzcnt_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		tmp := 63
-		dst[i+63:i] := 0
-		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-			tmp := tmp - 1
-			dst[i+63:i] := dst[i+63:i] + 1
-		OD
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vplzcntq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_lzcnt_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		tmp := 63
-		dst[i+63:i] := 0
-		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-			tmp := tmp - 1
-			dst[i+63:i] := dst[i+63:i] + 1
-		OD
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vplzcntq' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_max_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &gt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmaxsd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &gt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0 
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmaxsd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_max_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF a[i+31:i] &gt; b[i+31:i]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := b[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmaxsd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_max_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &gt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmaxsq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &gt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmaxsq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_max_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF a[i+63:i] &gt; b[i+63:i]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := b[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmaxsq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_max_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &gt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmaxud' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &gt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmaxud' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_max_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF a[i+31:i] &gt; b[i+31:i]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := b[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmaxud' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_max_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &gt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmaxuq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &gt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmaxuq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_max_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF a[i+63:i] &gt; b[i+63:i]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := b[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmaxuq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_min_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &lt; b[i+31:i]
-				dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpminsd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &lt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpminsd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_min_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF a[i+31:i] &lt; b[i+31:i]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := b[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpminsd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_min_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &lt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpminsq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &lt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpminsq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_min_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF a[i+63:i] &lt; b[i+63:i]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := b[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpminsq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_min_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &lt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpminud' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &lt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpminud' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_min_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF a[i+31:i] &lt; b[i+31:i]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := b[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpminud' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_min_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &lt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpminuq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &lt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpminuq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_min_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF a[i+63:i] &lt; b[i+63:i]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := b[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpminuq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	k := 8*j
-	dst[k+7:k] := Truncate_Int32_To_Int8(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovdb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovdb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtepi32_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Truncate_Int32_To_Int8(a[i+31:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovdb' form='m128 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovdb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	k := 16*j
-	dst[k+15:k] := Truncate_Int32_To_Int16(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovdw' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Truncate_Int32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovdw' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtepi32_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Truncate_Int32_To_Int16(a[i+31:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovdw' form='m256 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Truncate_Int32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovdw' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 8*j
-	dst[k+7:k] := Truncate_Int64_To_Int8(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovqb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovqb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtepi64_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Truncate_Int64_To_Int8(a[i+63:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovqb' form='m64 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovqb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 32*j
-	dst[k+31:k] := Truncate_Int64_To_Int32(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovqd' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Truncate_Int64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovqd' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtepi64_storeu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		MEM[base_addr+l+31:base_addr+l] := Truncate_Int64_To_Int32(a[i+63:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovqd' form='m256 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Truncate_Int64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovqd' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 16*j
-	dst[k+15:k] := Truncate_Int64_To_Int16(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovqw' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Truncate_Int64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovqw' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtepi64_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Truncate_Int64_To_Int16(a[i+63:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovqw' form='m128 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Truncate_Int64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovqw' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtsepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	k := 8*j
-	dst[k+7:k] := Saturate_Int32_To_Int8(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovsdb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtsepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovsdb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtsepi32_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_Int32_To_Int8(a[i+31:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovsdb' form='m128 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtsepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovsdb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtsepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	k := 16*j
-	dst[k+15:k] := Saturate_Int32_To_Int16(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovsdw' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtsepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_Int32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovsdw' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtsepi32_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Saturate_Int32_To_Int16(a[i+31:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovsdw' form='m256 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtsepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_Int32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovsdw' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtsepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 8*j
-	dst[k+7:k] := Saturate_Int64_To_Int8(a[i+63:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vpmovsqb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtsepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vpmovsqb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtsepi64_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_Int64_To_Int8(a[i+63:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovsqb' form='m64 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtsepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vpmovsqb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtsepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 32*j
-	dst[k+31:k] := Saturate_Int64_To_Int32(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovsqd' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtsepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Saturate_Int64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovsqd' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtsepi64_storeu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		MEM[base_addr+l+31:base_addr+l] := Saturate_Int64_To_Int32(a[i+63:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovsqd' form='m256 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtsepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Saturate_Int64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovsqd' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtsepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 16*j
-	dst[k+15:k] := Saturate_Int64_To_Int16(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovsqw' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtsepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_Int64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovsqw' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtsepi64_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Saturate_Int64_To_Int16(a[i+63:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovsqw' form='m128 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtsepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_Int64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovsqw' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepi8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	k := 8*j
-	dst[i+31:i] := SignExtend(a[k+7:k])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxbd' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepi8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[l+7:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxbd' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepi8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[l+7:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxbd' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepi8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 8*j
-	dst[i+63:i] := SignExtend(a[k+7:k])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxbq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepi8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+7:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxbq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepi8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+7:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxbq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepi32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 32*j
-	dst[i+63:i] := SignExtend(a[k+31:k])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxdq' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepi32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxdq' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepi32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxdq' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepi16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	k := 16*j
-	dst[i+31:i] := SignExtend(a[k+15:k])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxwd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepi16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	l := j*16
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[l+15:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxwd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepi16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[l+15:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxwd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepi16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 16*j
-	dst[i+63:i] := SignExtend(a[k+15:k])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxwq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepi16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+15:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxwq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepi16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+15:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovsxwq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtusepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	k := 8*j
-	dst[k+7:k] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovusdb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtusepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovusdb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtusepi32_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovusdb' form='m128 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtusepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovusdb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtusepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	k := 16*j
-	dst[k+15:k] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovusdw' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtusepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovusdw' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtusepi32_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovusdw' form='m256 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtusepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovusdw' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtusepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 8*j
-	dst[k+7:k] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vpmovusqb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtusepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vpmovusqb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtusepi64_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovusqb' form='m64 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtusepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name='vpmovusqb' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtusepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 32*j
-	dst[k+31:k] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovusqd' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtusepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovusqd' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtusepi64_storeu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		MEM[base_addr+l+31:base_addr+l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovusqd' form='m256 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtusepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name='vpmovusqd' form='ymm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_cvtusepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 16*j
-	dst[k+15:k] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovusqw' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_cvtusepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovusqw' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtusepi64_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name='vpmovusqw' form='m128 {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_cvtusepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vpmovusqw' form='xmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepu8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	k := 8*j
-	dst[i+31:i] := ZeroExtend(a[k+7:k])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxbd' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepu8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[l+7:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxbd' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepu8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[l+7:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxbd' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepu8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 8*j
-	dst[i+63:i] := ZeroExtend(a[k+7:k])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxbq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepu8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+7:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxbq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepu8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+7:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxbq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepu32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 32*j
-	dst[i+63:i] := ZeroExtend(a[k+31:k])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxdq' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepu32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxdq' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepu32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+31:l])
-	ELSE 
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxdq' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepu16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	k := 16*j
-	dst[i+31:i] := ZeroExtend(a[k+15:k])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxwd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepu16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[l+15:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxwd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepu16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[l+15:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxwd' form='zmm {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepu16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	k := 16*j
-	dst[i+63:i] := ZeroExtend(a[k+15:k])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxwq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepu16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+15:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxwq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepu16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+15:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmovzxwq' form='zmm {k}, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mul_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmuldq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mul_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmuldq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mul_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+31:i] * b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmuldq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_mullo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		tmp[63:0] := a[i+31:i] * b[i+31:i]
-		dst[i+31:i] := tmp[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmulld' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mullo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	tmp[63:0] := a[i+31:i] * b[i+31:i]
-	dst[i+31:i] := tmp[31:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmulld' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mul_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmuludq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mul_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmuludq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mul_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+31:i] * b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpmuludq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_or_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpord' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_or_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpord' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_or_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] OR b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpord' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_or_si512">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise OR of 512 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[511:0] := (a[511:0] OR b[511:0])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpord' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_or_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vporq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_or_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vporq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_or_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the resut in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i] OR b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vporq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_rol_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprold' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_rol_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprold' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_rol_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprold' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_rol_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprolq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_rol_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprolq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_rol_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprolq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_rolv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprolvd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_rolv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprolvd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_rolv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprolvd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_rolv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprolvq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_rolv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprolvq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_rolv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprolvq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_ror_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprord' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_ror_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprord' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_ror_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprord' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_ror_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprorq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_ror_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprorq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_ror_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprorq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_rorv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprorvd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_rorv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprorvd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_rorv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprorvd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_rorv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprorvq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_rorv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprorvq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_rorv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vprorvq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32scatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='vpscatterdd' form='vm32z {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32scatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vpscatterdd' form='vm32z {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_i32scatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name='vpscatterdq' form='vz32y {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_i32scatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpscatterdq' form='vz32y {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_i64scatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='vpscatterqd' form='vm64z {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_i64scatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpscatterqd' form='vm64z {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_i64scatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name='vpscatterqq' form='vm64z {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_i64scatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vpscatterqq' form='vm64z {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_shuffle_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
-	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
-tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
-tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
-tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
-tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4])
-tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6])
-tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
-tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
-tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4])
-tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6])
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpshufd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_shuffle_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
-	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
-tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
-tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
-tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
-tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4])
-tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6])
-tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
-tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
-tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4])
-tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6])
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpshufd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_shuffle_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
-	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-dst[223:192] := SELECT4(a[255:128], imm8[5:4])
-dst[255:224] := SELECT4(a[255:128], imm8[7:6])
-dst[287:256] := SELECT4(a[383:256], imm8[1:0])
-dst[319:288] := SELECT4(a[383:256], imm8[3:2])
-dst[351:320] := SELECT4(a[383:256], imm8[5:4])
-dst[383:352] := SELECT4(a[383:256], imm8[7:6])
-dst[415:384] := SELECT4(a[511:384], imm8[1:0])
-dst[447:416] := SELECT4(a[511:384], imm8[3:2])
-dst[479:448] := SELECT4(a[511:384], imm8[5:4])
-dst[511:480] := SELECT4(a[511:384], imm8[7:6])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpshufd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sll_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpslld' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_slli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpslld' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sll_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpslld' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_slli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpslld' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sll_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpslld' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_slli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpslld' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sll_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsllq' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_slli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsllq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sll_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsllq' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_slli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsllq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sll_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF count[63:0] &gt; 63
-		dst[i+63:i] := 0
-	ELSE
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsllq' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_slli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF imm8[7:0] &gt; 63
-		dst[i+63:i] := 0
-	ELSE
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsllq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_sllv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsllvd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sllv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsllvd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_sllv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsllvd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sllv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsllvq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sllv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsllvq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sllv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsllvq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sra_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := SignBit
-		ELSE
-			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrad' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_srai_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := SignBit
-		ELSE
-			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrad' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sra_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := SignBit
-		ELSE
-			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrad' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srai_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := SignBit
-		ELSE
-			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrad' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sra_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := SignBit
-	ELSE
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrad' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_srai_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := SignBit
-	ELSE
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrad' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sra_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := SignBit
-		ELSE
-			dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsraq' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srai_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := SignBit
-		ELSE
-			dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsraq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sra_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := SignBit
-		ELSE
-			dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsraq' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srai_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := SignBit
-		ELSE
-			dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsraq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sra_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF count[63:0] &gt; 63
-		dst[i+63:i] := SignBit
-	ELSE
-		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsraq' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srai_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF imm8[7:0] &gt; 63
-		dst[i+63:i] := SignBit
-	ELSE
-		dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsraq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_srav_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsravd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srav_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsravd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_srav_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsravd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srav_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsravq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srav_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsravq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srav_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsravq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srl_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrld' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_srli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrld' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srl_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrld' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrld' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srl_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF count[63:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrld' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_srli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF imm8[7:0] &gt; 31
-		dst[i+31:i] := 0
-	ELSE
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrld' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srl_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrlq' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrlq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srl_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrlq' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrlq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srl_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF count[63:0] &gt; 63
-		dst[i+63:i] := 0
-	ELSE
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrlq' form='zmm {k}, zmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF imm8[7:0] &gt; 63
-		dst[i+63:i] := 0
-	ELSE
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrlq' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_srlv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrlvd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srlv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrlvd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_srlv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrlvd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srlv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrlvq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srlv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrlvq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srlv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsrlvq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_sub_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsubd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sub_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsubd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_sub_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] - b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsubd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sub_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsubq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sub_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsubq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sub_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i] - b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpsubq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_ternarylogic_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		FOR h := 0 to 31
-			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
-			dst[i+h] := imm8[index[2:0]]
-		ENDFOR
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpternlogd' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_ternarylogic_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		FOR h := 0 to 31
-			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
-			dst[i+h] := imm8[index[2:0]]
-		ENDFOR
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpternlogd' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_ternarylogic_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	FOR h := 0 to 31
-		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
-		dst[i+h] := imm8[index[2:0]]
-	ENDFOR
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpternlogd' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_ternarylogic_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		FOR h := 0 to 63
-			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
-			dst[i+h] := imm8[index[2:0]]
-		ENDFOR
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpternlogq' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_ternarylogic_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		FOR h := 0 to 63
-			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
-			dst[i+h] := imm8[index[2:0]]
-		ENDFOR
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpternlogq' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_ternarylogic_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	FOR h := 0 to 63
-		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
-		dst[i+h] := imm8[index[2:0]]
-	ENDFOR
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpternlogq' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_mask_test_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vptestmd' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__mmask16" name="_mm512_test_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vptestmd' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_test_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vptestmq' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_test_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vptestmq' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_mask_testn_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vptestnmd' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_testn_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vptestnmd' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_testn_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vptestnmq' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_testn_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vptestnmq' form='k {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpackhi_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpunpckhdq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpackhi_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpunpckhdq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpackhi_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
-dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
-dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpunpckhdq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpackhi_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpunpckhqdq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpackhi_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpunpckhqdq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpackhi_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
-dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
-dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpunpckhqdq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpacklo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpunpckldq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpacklo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpunpckldq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpacklo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
-dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
-dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpunpckldq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpacklo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpunpcklqdq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpacklo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpunpcklqdq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpacklo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
-dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
-dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpunpcklqdq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_xor_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpxord' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_xor_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpxord' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_xor_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpxord' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_xor_si512">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise XOR of 512 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
-	<operation>
-dst[511:0] := (a[511:0] XOR b[511:0])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpxord' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_xor_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_xor_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_xor_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_rcp14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrcp14pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_rcp14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrcp14pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_rcp14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrcp14pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_rcp14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrcp14ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_rcp14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrcp14ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_rcp14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrcp14ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rcp14_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-IF k[0]
-	dst[63:0] := APPROXIMATE(1.0/b[63:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrcp14sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rcp14_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-IF k[0]
-	dst[63:0] := APPROXIMATE(1.0/b[63:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrcp14sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rcp14_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-dst[63:0] := APPROXIMATE(1.0/b[63:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrcp14sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rcp14_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-IF k[0]
-	dst[31:0] := APPROXIMATE(1.0/b[31:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrcp14ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rcp14_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-IF k[0]
-	dst[31:0] := APPROXIMATE(1.0/b[31:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrcp14ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rcp14_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-dst[31:0] := APPROXIMATE(1.0/b[31:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrcp14ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_roundscale_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}	
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrndscalepd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_roundscale_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}	
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrndscalepd' form='zmm {k}, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_roundscale_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}	
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrndscalepd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_roundscale_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}	
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrndscalepd' form='zmm {k}, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_roundscale_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
-	</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}	
-
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrndscalepd' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_roundscale_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
-	[round_note]</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}	
-
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrndscalepd' form='zmm {k}, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_roundscale_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}	
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrndscaleps' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_roundscale_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}	
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrndscaleps' form='zmm {k}, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_roundscale_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}	
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrndscaleps' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_roundscale_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}	
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrndscaleps' form='zmm {k}, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_roundscale_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
-	</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}	
-
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrndscaleps' form='zmm {k}, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_roundscale_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
-	[round_note]</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}	
-
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrndscaleps' form='zmm {k}, zmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_roundscale_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Round the lower double-precision (64-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}		
-
-IF k[0]
-	dst[63:0] := RoundTo_IntegerPD(a[63:0], imm8[7:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrndscalesd' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_roundscale_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Round the lower double-precision (64-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}		
-
-IF k[0]
-	dst[63:0] := RoundTo_IntegerPD(a[63:0], imm8[7:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrndscalesd' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_roundscale_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Round the lower double-precision (64-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}		
-
-IF k[0]
-	dst[63:0] := RoundTo_IntegerPD(a[63:0], imm8[7:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrndscalesd' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_roundscale_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Round the lower double-precision (64-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}		
-
-IF k[0]
-	dst[63:0] := RoundTo_IntegerPD(a[63:0], imm8[7:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrndscalesd' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_roundscale_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Round the lower double-precision (64-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}		
-
-dst[63:0] := RoundTo_IntegerPD(a[63:0], imm8[7:0])
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrndscalesd' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_roundscale_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Round the lower double-precision (64-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst".</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}		
-
-dst[63:0] := RoundTo_IntegerPD(a[63:0], imm8[7:0])
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrndscalesd' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_roundscale_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Round the lower single-precision (32-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}
-
-IF k[0]
-	dst[31:0] := RoundTo_IntegerPS(a[31:0], imm8[7:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrndscaless' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_roundscale_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Round the lower single-precision (32-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}
-
-IF k[0]
-	dst[31:0] := RoundTo_IntegerPS(a[31:0], imm8[7:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrndscaless' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_roundscale_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Round the lower single-precision (32-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}
-
-IF k[0]
-	dst[31:0] := RoundTo_IntegerPS(a[31:0], imm8[7:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrndscaless' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_roundscale_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Round the lower single-precision (32-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}
-
-IF k[0]
-	dst[31:0] := RoundTo_IntegerPS(a[31:0], imm8[7:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrndscaless' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_roundscale_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<parameter varname="rounding" type="const int"/>
-	<description>Round the lower single-precision (32-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}
-
-dst[31:0] := RoundTo_IntegerPS(a[31:0], imm8[7:0])
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrndscaless' form='xmm {k}, xmm, xmm, imm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_roundscale_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Round the lower single-precision (32-bit) floating-point element in "a" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}
-
-dst[31:0] := RoundTo_IntegerPS(a[31:0], imm8[7:0])
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrndscaless' form='xmm {k}, xmm, xmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_rsqrt14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrsqrt14pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_rsqrt14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrsqrt14pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_rsqrt14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrsqrt14pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_rsqrt14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrsqrt14ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_rsqrt14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrsqrt14ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_rsqrt14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vrsqrt14ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rsqrt14_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-IF k[0]
-	dst[63:0] := APPROXIMATE(1.0 / SQRT(b[63:0]))
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrsqrt14sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rsqrt14_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-IF k[0]
-	dst[63:0] := APPROXIMATE(1.0 / SQRT(b[63:0]))
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrsqrt14sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rsqrt14_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-dst[63:0] := APPROXIMATE(1.0 / SQRT(b[63:0]))
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrsqrt14sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rsqrt14_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-IF k[0]
-	dst[31:0] := APPROXIMATE(1.0 / SQRT(b[31:0]))
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrsqrt14ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rsqrt14_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-IF k[0]
-	dst[31:0] := APPROXIMATE(1.0 / SQRT(b[31:0]))
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrsqrt14ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rsqrt14_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-dst[31:0] := APPROXIMATE(1.0 / SQRT(b[31:0]))
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vrsqrt14ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_scalef_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vscalefpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_scalef_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vscalefpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_scalef_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vscalefpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_scalef_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vscalefpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_scalef_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vscalefpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_scalef_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst".
-	[round_note]</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vscalefpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_scalef_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[31:0]
-}
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vscalefps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_scalef_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[31:0]
-}
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vscalefps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_scalef_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[31:0]
-}
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vscalefps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_scalef_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[31:0]
-}
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vscalefps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_scalef_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[31:0]
-}
-
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vscalefps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_scalef_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst".
-	[round_note]</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[31:0]
-}
-
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vscalefps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_scalef_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-IF k[0]
-	dst[63:0] := SCALE(a[63:0], b[63:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vscalefsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_scalef_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-IF k[0]
-	dst[63:0] := SCALE(a[63:0], b[63:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vscalefsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_scalef_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-IF k[0]
-	dst[63:0] := SCALE(a[63:0], b[63:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vscalefsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_scalef_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-IF k[0]
-	dst[63:0] := SCALE(a[63:0], b[63:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vscalefsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_scalef_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-dst[63:0] := SCALE(a[63:0], b[63:0])
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vscalefsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_scalef_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst".</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-dst[63:0] := SCALE(a[63:0], b[63:0])
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vscalefsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_scalef_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[63:0]
-}
-
-IF k[0]
-	dst[31:0] := SCALE(a[31:0], b[31:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vscalefss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_scalef_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[63:0]
-}
-
-IF k[0]
-	dst[31:0] := SCALE(a[31:0], b[31:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vscalefss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_scalef_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[63:0]
-}
-
-IF k[0]
-	dst[31:0] := SCALE(a[31:0], b[31:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vscalefss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_scalef_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[63:0]
-}
-
-IF k[0]
-	dst[31:0] := SCALE(a[31:0], b[31:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vscalefss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_scalef_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[63:0]
-}
-
-dst[31:0] := SCALE(a[31:0], b[31:0])
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vscalefss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_scalef_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[63:0]
-}
-
-dst[31:0] := SCALE(a[31:0], b[31:0])
-dst[127:32] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vscalefss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_i32scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name='vscatterdpd' form='vm32y {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_i32scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vscatterdpd' form='vm32y {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='vscatterdps' form='vm32z {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name='vscatterdps' form='vm32z {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_i64scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name='vscatterqpd' form='vm32z {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_i64scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vscatterqpd' form='vm32z {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_i64scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name='vscatterqps' form='vm32z {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_i64scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="scale" type="int"/>
-	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name='vscatterqps' form='vm32z {k}, ymm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_shuffle_f32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	2:	tmp[127:0] := src[383:256]
-	3:	tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
-tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
-tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
-tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshuff32x4' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_shuffle_f32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	2:	tmp[127:0] := src[383:256]
-	3:	tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
-tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
-tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
-tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshuff32x4' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_shuffle_f32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	2:	tmp[127:0] := src[383:256]
-	3:	tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := SELECT4(a[511:0], imm8[1:0])
-dst[255:128] := SELECT4(a[511:0], imm8[3:2])
-dst[383:256] := SELECT4(b[511:0], imm8[5:4])
-dst[511:384] := SELECT4(b[511:0], imm8[7:6])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshuff32x4' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_shuffle_f64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	2:	tmp[127:0] := src[383:256]
-	3:	tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
-tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
-tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
-tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshuff64x2' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_shuffle_f64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	2:	tmp[127:0] := src[383:256]
-	3:	tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
-tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
-tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
-tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshuff64x2' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_shuffle_f64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	2:	tmp[127:0] := src[383:256]
-	3:	tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := SELECT4(a[511:0], imm8[1:0])
-dst[255:128] := SELECT4(a[511:0], imm8[3:2])
-dst[383:256] := SELECT4(b[511:0], imm8[5:4])
-dst[511:384] := SELECT4(b[511:0], imm8[7:6])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshuff64x2' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_shuffle_i32x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	2:	tmp[127:0] := src[383:256]
-	3:	tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
-tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
-tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
-tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshufi32x4' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_shuffle_i32x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	2:	tmp[127:0] := src[383:256]
-	3:	tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
-tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
-tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
-tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshufi32x4' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_shuffle_i32x4">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	2:	tmp[127:0] := src[383:256]
-	3:	tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := SELECT4(a[511:0], imm8[1:0])
-dst[255:128] := SELECT4(a[511:0], imm8[3:2])
-dst[383:256] := SELECT4(b[511:0], imm8[5:4])
-dst[511:384] := SELECT4(b[511:0], imm8[7:6])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshufi32x4' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_shuffle_i64x2">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	2:	tmp[127:0] := src[383:256]
-	3:	tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
-tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
-tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
-tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshufi64x2' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_shuffle_i64x2">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	2:	tmp[127:0] := src[383:256]
-	3:	tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
-tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
-tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
-tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshufi64x2' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_shuffle_i64x2">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	2:	tmp[127:0] := src[383:256]
-	3:	tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := SELECT4(a[511:0], imm8[1:0])
-dst[255:128] := SELECT4(a[511:0], imm8[3:2])
-dst[383:256] := SELECT4(b[511:0], imm8[5:4])
-dst[511:384] := SELECT4(b[511:0], imm8[7:6])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshufi64x2' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_shuffle_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
-tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
-tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
-tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
-tmp_dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320]
-tmp_dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320]
-tmp_dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448]
-tmp_dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448]
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshufpd' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_shuffle_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
-tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
-tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
-tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
-tmp_dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320]
-tmp_dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320]
-tmp_dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448]
-tmp_dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448]
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshufpd' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_shuffle_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst". </description>
-	<operation>
-dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
-dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
-dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
-dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
-dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320]
-dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320]
-dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448]
-dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshufpd' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_shuffle_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
-tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4])
-tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6])
-tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
-tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
-tmp_dst[351:320] := SELECT4(b[383:256], imm8[5:4])
-tmp_dst[383:352] := SELECT4(b[383:256], imm8[7:6])
-tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
-tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
-tmp_dst[479:448] := SELECT4(b[511:384], imm8[5:4])
-tmp_dst[511:480] := SELECT4(b[511:384], imm8[7:6])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshufps' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_shuffle_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
-tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4])
-tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6])
-tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
-tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
-tmp_dst[351:320] := SELECT4(b[383:256], imm8[5:4])
-tmp_dst[383:352] := SELECT4(b[383:256], imm8[7:6])
-tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
-tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
-tmp_dst[479:448] := SELECT4(b[511:384], imm8[5:4])
-tmp_dst[511:480] := SELECT4(b[511:384], imm8[7:6])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshufps' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_shuffle_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-dst[95:64] := SELECT4(b[127:0], imm8[5:4])
-dst[127:96] := SELECT4(b[127:0], imm8[7:6])
-dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-dst[223:192] := SELECT4(b[255:128], imm8[5:4])
-dst[255:224] := SELECT4(b[255:128], imm8[7:6])
-dst[287:256] := SELECT4(a[383:256], imm8[1:0])
-dst[319:288] := SELECT4(a[383:256], imm8[3:2])
-dst[351:320] := SELECT4(b[383:256], imm8[5:4])
-dst[383:352] := SELECT4(b[383:256], imm8[7:6])
-dst[415:384] := SELECT4(a[511:384], imm8[1:0])
-dst[447:416] := SELECT4(a[511:384], imm8[3:2])
-dst[479:448] := SELECT4(b[511:384], imm8[5:4])
-dst[511:480] := SELECT4(b[511:384], imm8[7:6])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vshufps' form='zmm {k}, zmm, zmm, imm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_sqrt_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SQRT(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsqrtpd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_sqrt_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SQRT(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsqrtpd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_sqrt_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SQRT(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsqrtpd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_sqrt_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note].</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SQRT(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsqrtpd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_sqrt_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := SQRT(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsqrtpd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_sqrt_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".
-	[round_note].</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := SQRT(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsqrtpd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_sqrt_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SQRT(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsqrtps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_sqrt_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SQRT(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsqrtps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_sqrt_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SQRT(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsqrtps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_sqrt_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SQRT(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsqrtps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_sqrt_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := SQRT(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsqrtps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_sqrt_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".
-	[round_note].</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := SQRT(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsqrtps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_sqrt_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[63:0] := SQRT(b[63:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsqrtsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_sqrt_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := SQRT(b[63:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsqrtsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_sqrt_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[63:0] := SQRT(b[63:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsqrtsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_sqrt_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := SQRT(b[63:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsqrtsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_sqrt_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := SQRT(b[63:0])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsqrtsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_sqrt_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := SQRT(b[31:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsqrtss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_sqrt_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := SQRT(b[31:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsqrtss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_sqrt_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := SQRT(b[31:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsqrtss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_sqrt_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := SQRT(b[31:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsqrtss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_sqrt_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := SQRT(b[31:0])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsqrtss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_sub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsubpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_sub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsubpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_sub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsubpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_sub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]</description> =
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsubpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_sub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i] - b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsubpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_sub_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i] - b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsubpd' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_sub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsubps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_sub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsubps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_sub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsubps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_sub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsubps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_sub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] - b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsubps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_sub_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".
-	[round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] - b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vsubps' form='zmm {k}, zmm, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_sub_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] - b[63:0]
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsubsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_sub_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] - b[63:0]
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsubsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_sub_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] - b[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsubsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_sub_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
-	<operation>
-IF k[0]
-	dst[63:0] := a[63:0] - b[63:0]
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsubsd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_sub_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[63:0] := a[63:0] - b[63:0]
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsubsd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_sub_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] - b[31:0]
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsubss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_sub_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] - b[31:0]
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsubss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_sub_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]
-	</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] - b[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsubss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_sub_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
-	<operation>
-IF k[0]
-	dst[31:0] := a[31:0] - b[31:0]
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsubss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_sub_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	[round_note]
-	</description>
-	<operation>
-dst[31:0] := a[31:0] - b[31:0]
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name='vsubss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_unpackhi_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vunpckhpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_unpackhi_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vunpckhpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_unpackhi_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
-dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
-dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vunpckhpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_unpackhi_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vunpckhps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_unpackhi_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vunpckhps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_unpackhi_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
-dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
-dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vunpckhps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_unpacklo_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vunpcklpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_unpacklo_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vunpcklpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_unpacklo_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
-dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
-dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vunpcklpd' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_unpacklo_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vunpcklps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_unpacklo_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vunpcklps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_unpacklo_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
-dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
-dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vunpcklps' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_castpd128_pd512">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. 
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_castpd256_pd512">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m256d"/>
-	<description>Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. 
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm512_castpd512_pd128">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Cast vector of type __m512d to type __m128d. 
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm512_castps512_ps128">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Cast vector of type __m512 to type __m128. 
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm512_castpd512_pd256">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Cast vector of type __m512d to type __m256d. 
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_castpd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Cast vector of type __m512d to type __m512.
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_castpd_si512">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Cast vector of type __m512d to type __m512i.
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_castps128_ps512">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. 
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_castps256_ps512">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m256"/>
-	<description>Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. 
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_castps512_ps256">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Cast vector of type __m512 to type __m256. 
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_castps_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Cast vector of type __m512 to type __m512d.
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_castps_si512">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Cast vector of type __m512 to type __m512i.
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_castsi128_si512">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. 
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_castsi256_si512">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined.
-	 This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_castsi512_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Cast vector of type __m512i to type __m512d.
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_castsi512_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Cast vector of type __m512i to type __m512.
-	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_castsi512_si128">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Cast vector of type __m512i to type __m128i.
-	 This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_castsi512_si256">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Cast vector of type __m512i to type __m256i.
-	 This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' rettype='__m512d' name='_mm512_zextpd128_pd512'>
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m128d'/>
-	<description>Casts vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' rettype='__m512' name='_mm512_zextps128_ps512'>
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m128'/>
-	<description>Casts vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' rettype='__m512i' name='_mm512_zextsi128_si512'>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m128i'/>
-	<description>Casts vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' rettype='__m512d' name='_mm512_zextpd256_pd512'>
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m256d'/>
-	<description>Casts vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' rettype='__m512' name='_mm512_zextps256_ps512'>
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m256'/>
-	<description>Casts vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' rettype='__m512i' name='_mm512_zextsi256_si512'>
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Cast</category>
-	<parameter varname='a' type='__m256i'/>
-	<description>Casts vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
-	</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_mask_reduce_add_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 32-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a".</description>
-	<operation>
-sum[31:0] := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		sum[31:0] := sum[31:0] + a[i+31:i]
-	FI
-ENDFOR
-RETURN sum[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_mask_reduce_add_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 64-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a".</description>
-	<operation>
-sum[63:0] := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		sum[63:0] := sum[63:0] + a[i+63:i]
-	FI
-ENDFOR
-RETURN sum[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_mask_reduce_add_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by addition using mask "k". Returns the sum of all active elements in "a".</description>
-	<operation>
-sum[63:0] := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		sum[63:0] := sum[63:0] + a[i+63:i]
-	FI
-ENDFOR
-RETURN sum[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_mask_reduce_add_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by addition using mask "k". Returns the sum of all active elements in "a".</description>
-	<operation>
-sum[31:0] := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		sum[31:0] := sum[31:0] + a[i+31:i]
-	FI
-ENDFOR
-RETURN sum[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_mask_reduce_and_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 32-bit integers in "a" by bitwise AND using mask "k". Returns the bitwise AND of all active elements in "a".</description>
-	<operation>
-reduced[31:0] := 0xFFFFFFFF
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		reduced[31:0] := reduced[31:0] AND a[i+31:i]
-	FI
-ENDFOR
-RETURN reduced[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_mask_reduce_and_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 64-bit integers in "a" by bitwise AND using mask "k". Returns the bitwise AND of all active elements in "a".</description>
-	<operation>
-reduced[63:0] := 0xFFFFFFFFFFFFFFFF
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		reduced[63:0] := reduced[63:0] AND a[i+63:i]
-	FI
-ENDFOR
-RETURN reduced[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_mask_reduce_max_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 32-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
-	<operation>
-max[31:0] := MIN_INT
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		max[31:0] := MAXIMUM(max[31:0], a[i+31:i])
-	FI
-ENDFOR
-RETURN max[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_mask_reduce_max_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 64-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
-	<operation>
-max[63:0] := MIN_INT
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		max[63:0] := MAXIMUM(max[63:0], a[i+63:i])
-	FI
-ENDFOR
-RETURN max[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned int" name="_mm512_mask_reduce_max_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed unsigned 32-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
-	<operation>
-max[31:0] := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		max[31:0] := MAXIMUM(max[31:0], a[i+31:i])
-	FI
-ENDFOR
-RETURN max[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned __int64" name="_mm512_mask_reduce_max_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed unsigned 64-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
-	<operation>
-max[63:0] := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		max[63:0] := MAXIMUM(max[63:0], a[i+63:i])
-	FI
-ENDFOR
-RETURN max[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_mask_reduce_max_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
-	<operation>
-max[63:0] := MIN_DOUBLE
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		max[63:0] := MAXIMUM(max[63:0], a[i+63:i])
-	FI
-ENDFOR
-RETURN max[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_mask_reduce_max_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
-	<operation>
-max[31:0] := MIN_FLOAT
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		max[31:0] := MAXIMUM(max[31:0], a[i+31:i])
-	FI
-ENDFOR
-RETURN max[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_mask_reduce_min_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 32-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
-	<operation>
-min[31:0] := MAX_INT
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		min[31:0] := MINIMUM(min[31:0], a[i+31:i])
-	FI
-ENDFOR
-RETURN min[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_mask_reduce_min_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 64-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
-	<operation>
-min[63:0] := MAX_INT
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		min[63:0] := MINIMUM(min[63:0], a[i+63:i])
-	FI
-ENDFOR
-RETURN min[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned int" name="_mm512_mask_reduce_min_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed unsigned 32-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
-	<operation>
-min[31:0] := MAX_UINT
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		min[31:0] := MINIMUM(min[31:0], a[i+31:i])
-	FI
-ENDFOR
-RETURN min[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned __int64" name="_mm512_mask_reduce_min_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed unsigned 64-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a".</description>
-	<operation>
-min[63:0] := MAX_UINT
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		min[63:0] := MINIMUM(min[63:0], a[i+63:i])
-	FI
-ENDFOR
-RETURN min[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_mask_reduce_min_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
-	<operation>
-min[63:0] := MAX_DOUBLE
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		min[63:0] := MINIMUM(min[63:0], a[i+63:i])
-	FI
-ENDFOR
-RETURN min[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_mask_reduce_min_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
-	<operation>
-min[31:0] := MAX_FLOAT
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		min[31:0] := MINIMUM(min[31:0], a[i+31:i])
-	FI
-ENDFOR
-RETURN min[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_mask_reduce_mul_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 32-bit integers in "a" by multiplication using mask "k". Returns the product of all active elements in "a".</description>
-	<operation>
-prod[31:0] := 1
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		prod[31:0] := prod[31:0] * a[i+31:i]
-	FI
-ENDFOR
-RETURN prod[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_mask_reduce_mul_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 64-bit integers in "a" by multiplication using mask "k". Returns the product of all active elements in "a".</description>
-	<operation>
-prod[63:0] := 1
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		prod[63:0] := prod[63:0] * a[i+63:i]
-	FI
-ENDFOR
-RETURN prod[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_mask_reduce_mul_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by multiplication using mask "k". Returns the product of all active elements in "a".</description>
-	<operation>
-prod[63:0] := 1
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		prod[63:0] := prod[63:0] * a[i+63:i]
-	FI
-ENDFOR
-RETURN prod[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_mask_reduce_mul_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by multiplication using mask "k". Returns the product of all active elements in "a".</description>
-	<operation>
-prod[31:0] := 1
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		prod[31:0] := prod[31:0] * a[i+31:i]
-	FI
-ENDFOR
-RETURN prod[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_mask_reduce_or_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 32-bit integers in "a" by bitwise OR using mask "k". Returns the bitwise OR of all active elements in "a".</description>
-	<operation>
-reduced[31:0] := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		reduced[31:0] := reduced[31:0] OR a[i+31:i]
-	FI
-ENDFOR
-RETURN reduced[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_mask_reduce_or_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 64-bit integers in "a" by bitwise OR using mask "k". Returns the bitwise OR of all active elements in "a".</description>
-	<operation>
-reduced[63:0] := 0
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		reduced[63:0] := reduced[63:0] OR a[i+63:i]
-	FI
-ENDFOR
-RETURN reduced[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_reduce_add_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 32-bit integers in "a" by addition. Returns the sum of all elements in "a".</description>
-	<operation>
-sum[31:0] := 0
-FOR j := 0 to 15
-	i := j*32
-	sum[31:0] := sum[31:0] + a[i+31:i]
-ENDFOR
-RETURN sum[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_reduce_add_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 64-bit integers in "a" by addition. Returns the sum of all elements in "a".</description>
-	<operation>
-sum[63:0] := 0
-FOR j := 0 to 7
-	i := j*64
-	sum[63:0] := sum[63:0] + a[i+63:i]
-ENDFOR
-RETURN sum[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_reduce_add_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a".</description>
-	<operation>
-sum[63:0] := 0
-FOR j := 0 to 7
-	i := j*64
-	sum[63:0] := sum[63:0] + a[i+63:i]
-ENDFOR
-RETURN sum[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_reduce_add_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a".</description>
-	<operation>
-sum[31:0] := 0
-FOR j := 0 to 15
-	i := j*32
-	sum[31:0] := sum[31:0] + a[i+31:i]
-ENDFOR
-RETURN sum[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_reduce_and_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 32-bit integers in "a" by bitwise AND. Returns the bitwise AND of all elements in "a".</description>
-	<operation>
-reduced[31:0] := 0xFFFFFFFF
-FOR j := 0 to 15
-	i := j*32
-	reduced[31:0] := reduced[31:0] AND a[i+31:i]
-ENDFOR
-RETURN reduced[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_reduce_and_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 64-bit integers in "a" by bitwise AND. Returns the bitwise AND of all elements in "a".</description>
-	<operation>
-reduced[63:0] := 0xFFFFFFFFFFFFFFFF
-FOR j := 0 to 7
-	i := j*64
-	reduced[63:0] := reduced[63:0] AND a[i+63:i]
-ENDFOR
-RETURN reduced[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_reduce_max_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 32-bit integers in "a" by maximum. Returns the maximum of all elements in "a".</description>
-	<operation>
-max[31:0] := MIN_INT
-FOR j := 0 to 15
-	i := j*32
-	max[31:0] := MAXIMUM(max[31:0], a[i+31:i])
-ENDFOR
-RETURN max[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_reduce_max_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 64-bit integers in "a" by maximum. Returns the maximum of all elements in "a".</description>
-	<operation>
-max[63:0] := MIN_INT
-FOR j := 0 to 7
-	i := j*64
-	max[63:0] := MAXIMUM(max[63:0], a[i+63:i])
-ENDFOR
-RETURN max[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned int" name="_mm512_reduce_max_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed unsigned 32-bit integers in "a" by maximum. Returns the maximum of all elements in "a".</description>
-	<operation>
-max[31:0] := 0
-FOR j := 0 to 15
-	i := j*32
-	max[31:0] := MAXIMUM(max[31:0], a[i+31:i])
-ENDFOR
-RETURN max[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned __int64" name="_mm512_reduce_max_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed unsigned 64-bit integers in "a" by maximum. Returns the maximum of all elements in "a".</description>
-	<operation>
-max[63:0] := 0
-FOR j := 0 to 7
-	i := j*64
-	max[63:0] := MAXIMUM(max[63:0], a[i+63:i])
-ENDFOR
-RETURN max[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_reduce_max_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a".</description>
-	<operation>
-max[63:0] := MIN_DOUBLE
-FOR j := 0 to 7
-	i := j*64
-	max[63:0] := MAXIMUM(max[63:0], a[i+63:i])
-ENDFOR
-RETURN max[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_reduce_max_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a".</description>
-	<operation>
-max[31:0] := MIN_FLOAT
-FOR j := 0 to 15
-	i := j*32
-	max[31:0] := MAXIMUM(max[31:0], a[i+31:i])
-ENDFOR
-RETURN max[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_reduce_min_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 32-bit integers in "a" by minimum. Returns the minimum of all elements in "a".</description>
-	<operation>
-min[31:0] := MAX_INT
-FOR j := 0 to 15
-	i := j*32
-	min[31:0] := MINIMUM(min[31:0], a[i+31:i])
-ENDFOR
-RETURN min[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_reduce_min_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 64-bit integers in "a" by minimum. Returns the minimum of all elements in "a".</description>
-	<operation>
-min[63:0] := MAX_INT
-FOR j := 0 to 7
-	i := j*64
-	min[63:0] := MINIMUM(min[63:0], a[i+63:i])
-ENDFOR
-RETURN min[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned int" name="_mm512_reduce_min_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed unsigned 32-bit integers in "a" by minimum. Returns the minimum of all elements in "a".</description>
-	<operation>
-min[31:0] := MAX_UINT
-FOR j := 0 to 15
-	i := j*32
-	min[31:0] := MINIMUM(min[31:0], a[i+31:i])
-ENDFOR
-RETURN min[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="unsigned __int64" name="_mm512_reduce_min_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed unsigned 64-bit integers in "a" by minimum. Returns the minimum of all elements in "a".</description>
-	<operation>
-min[63:0] := MAX_UINT
-FOR j := 0 to 7
-	i := j*64
-	min[63:0] := MINIMUM(min[63:0], a[i+63:i])
-ENDFOR
-RETURN min[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_reduce_min_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a".</description>
-	<operation>
-min[63:0] := MAX_DOUBLE
-FOR j := 0 to 7
-	i := j*64
-	min[63:0] := MINIMUM(min[63:0], a[i+63:i])
-ENDFOR
-RETURN min[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_reduce_min_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a".</description>
-	<operation>
-min[31:0] := MAX_INT
-FOR j := 0 to 15
-	i := j*32
-	min[31:0] := MINIMUM(min[31:0], a[i+31:i])
-ENDFOR
-RETURN min[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_reduce_mul_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 32-bit integers in "a" by multiplication. Returns the product of all elements in "a".</description>
-	<operation>
-prod[31:0] := 1
-FOR j := 0 to 15
-	i := j*32
-	prod[31:0] := prod[31:0] * a[i+31:i]
-ENDFOR
-RETURN prod[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_reduce_mul_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 64-bit integers in "a" by multiplication. Returns the product of all elements in "a".</description>
-	<operation>
-prod[63:0] := 1
-FOR j := 0 to 7
-	i := j*64
-	prod[63:0] := prod[63:0] * a[i+63:i]
-ENDFOR
-RETURN prod[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="double" name="_mm512_reduce_mul_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a".</description>
-	<operation>
-prod[63:0] := 1
-FOR j := 0 to 7
-	i := j*64
-	prod[63:0] := prod[63:0] * a[i+63:i]
-ENDFOR
-RETURN prod[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="float" name="_mm512_reduce_mul_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a".</description>
-	<operation>
-prod[31:0] := 1
-FOR j := 0 to 15
-	i := j*32
-	prod[31:0] := prod[31:0] * a[i+31:i]
-ENDFOR
-RETURN prod[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="int" name="_mm512_reduce_or_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 32-bit integers in "a" by bitwise OR. Returns the bitwise OR of all elements in "a".</description>
-	<operation>
-reduced[31:0] := 0
-FOR j := 0 to 15
-	i := j*32
-	reduced[31:0] := reduced[31:0] OR a[i+31:i]
-ENDFOR
-RETURN reduced[31:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" sequence="true" rettype="__int64" name="_mm512_reduce_or_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Reduce the packed 64-bit integers in "a" by bitwise OR. Returns the bitwise OR of all elements in "a".</description>
-	<operation>
-reduced[63:0] := 0
-FOR j := 0 to 7
-	i := j*64
-	reduced[63:0] := reduced[63:0] OR a[i+63:i]
-ENDFOR
-RETURN reduced[63:0]
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512d" name="_mm512_set1_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="a" type="double"/>
-	<description>Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[63:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512" name="_mm512_set1_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="a" type="float"/>
-	<description>Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[31:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_set4_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="d" type="int"/>
-	<parameter varname="c" type="int"/>
-	<parameter varname="b" type="int"/>
-	<parameter varname="a" type="int"/>
-	<description>Set packed 32-bit integers in "dst" with the repeated 4 element sequence.</description>
-	<operation>
-dst[31:0] := d
-dst[63:32] := c
-dst[95:64] := b
-dst[127:96] := a
-dst[159:128] := d
-dst[191:160] := c
-dst[223:192] := b
-dst[255:224] := a
-dst[287:256] := d
-dst[319:288] := c
-dst[351:320] := b
-dst[383:352] := a
-dst[415:384] := d
-dst[447:416] := c
-dst[479:448] := b
-dst[511:480] := a
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_set4_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="d" type="__int64"/>
-	<parameter varname="c" type="__int64"/>
-	<parameter varname="b" type="__int64"/>
-	<parameter varname="a" type="__int64"/>
-	<description>Set packed 64-bit integers in "dst" with the repeated 4 element sequence.</description>
-	<operation>
-dst[63:0] := d
-dst[127:64] := c
-dst[191:128] := b
-dst[255:192] := a
-dst[319:256] := d
-dst[383:320] := c
-dst[447:384] := b
-dst[511:448] := a
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512d" name="_mm512_set4_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="d" type="double"/>
-	<parameter varname="c" type="double"/>
-	<parameter varname="b" type="double"/>
-	<parameter varname="a" type="double"/>
-	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the repeated 4 element sequence.</description>
-	<operation>
-dst[63:0] := d
-dst[127:64] := c
-dst[191:128] := b
-dst[255:192] := a
-dst[319:256] := d
-dst[383:320] := c
-dst[447:384] := b
-dst[511:448] := a
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512" name="_mm512_set4_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="d" type="float"/>
-	<parameter varname="c" type="float"/>
-	<parameter varname="b" type="float"/>
-	<parameter varname="a" type="float"/>
-	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the repeated 4 element sequence.</description>
-	<operation>
-dst[31:0] := d
-dst[63:32] := c
-dst[95:64] := b
-dst[127:96] := a
-dst[159:128] := d
-dst[191:160] := c
-dst[223:192] := b
-dst[255:224] := a
-dst[287:256] := d
-dst[319:288] := c
-dst[351:320] := b
-dst[383:352] := a
-dst[415:384] := d
-dst[447:416] := c
-dst[479:448] := b
-dst[511:480] := a
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_set_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname='e63' type='char'/>
-	<parameter varname='e62' type='char'/>
-	<parameter varname='e61' type='char'/>
-	<parameter varname='e60' type='char'/>
-	<parameter varname='e59' type='char'/>
-	<parameter varname='e58' type='char'/>
-	<parameter varname='e57' type='char'/>
-	<parameter varname='e56' type='char'/>
-	<parameter varname='e55' type='char'/>
-	<parameter varname='e54' type='char'/>
-	<parameter varname='e53' type='char'/>
-	<parameter varname='e52' type='char'/>
-	<parameter varname='e51' type='char'/>
-	<parameter varname='e50' type='char'/>
-	<parameter varname='e49' type='char'/>
-	<parameter varname='e48' type='char'/>
-	<parameter varname='e47' type='char'/>
-	<parameter varname='e46' type='char'/>
-	<parameter varname='e45' type='char'/>
-	<parameter varname='e44' type='char'/>
-	<parameter varname='e43' type='char'/>
-	<parameter varname='e42' type='char'/>
-	<parameter varname='e41' type='char'/>
-	<parameter varname='e40' type='char'/>
-	<parameter varname='e39' type='char'/>
-	<parameter varname='e38' type='char'/>
-	<parameter varname='e37' type='char'/>
-	<parameter varname='e36' type='char'/>
-	<parameter varname='e35' type='char'/>
-	<parameter varname='e34' type='char'/>
-	<parameter varname='e33' type='char'/>
-	<parameter varname='e32' type='char'/>
-	<parameter varname='e31' type='char'/>
-	<parameter varname='e30' type='char'/>
-	<parameter varname='e29' type='char'/>
-	<parameter varname='e28' type='char'/>
-	<parameter varname='e27' type='char'/>
-	<parameter varname='e26' type='char'/>
-	<parameter varname='e25' type='char'/>
-	<parameter varname='e24' type='char'/>
-	<parameter varname='e23' type='char'/>
-	<parameter varname='e22' type='char'/>
-	<parameter varname='e21' type='char'/>
-	<parameter varname='e20' type='char'/>
-	<parameter varname='e19' type='char'/>
-	<parameter varname='e18' type='char'/>
-	<parameter varname='e17' type='char'/>
-	<parameter varname='e16' type='char'/>
-	<parameter varname='e15' type='char'/>
-	<parameter varname='e14' type='char'/>
-	<parameter varname='e13' type='char'/>
-	<parameter varname='e12' type='char'/>
-	<parameter varname='e11' type='char'/>
-	<parameter varname='e10' type='char'/>
-	<parameter varname='e9' type='char'/>
-	<parameter varname='e8' type='char'/>
-	<parameter varname='e7' type='char'/>
-	<parameter varname='e6' type='char'/>
-	<parameter varname='e5' type='char'/>
-	<parameter varname='e4' type='char'/>
-	<parameter varname='e3' type='char'/>
-	<parameter varname='e2' type='char'/>
-	<parameter varname='e1' type='char'/>
-	<parameter varname='e0' type='char'/>
-	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[7:0] := e0
-dst[15:8] := e1
-dst[23:16] := e2
-dst[31:24] := e3
-dst[39:32] := e4
-dst[47:40] := e5
-dst[55:48] := e6
-dst[63:56] := e7
-dst[71:64] := e8
-dst[79:72] := e9
-dst[87:80] := e10
-dst[95:88] := e11
-dst[103:96] := e12
-dst[111:104] := e13
-dst[119:112] := e14
-dst[127:120] := e15
-dst[135:128] := e16
-dst[143:136] := e17
-dst[151:144] := e18
-dst[159:152] := e19
-dst[167:160] := e20
-dst[175:168] := e21
-dst[183:176] := e22
-dst[191:184] := e23
-dst[199:192] := e24
-dst[207:200] := e25
-dst[215:208] := e26
-dst[223:216] := e27
-dst[231:224] := e28
-dst[239:232] := e29
-dst[247:240] := e30
-dst[255:248] := e31
-dst[263:256] := e32
-dst[271:264] := e33
-dst[279:272] := e34
-dst[287:280] := e35
-dst[295:288] := e36
-dst[303:296] := e37
-dst[311:304] := e38
-dst[319:312] := e39
-dst[327:320] := e40
-dst[335:328] := e41
-dst[343:336] := e42
-dst[351:344] := e43
-dst[359:352] := e44
-dst[367:360] := e45
-dst[375:368] := e46
-dst[383:376] := e47
-dst[391:384] := e48
-dst[399:392] := e49
-dst[407:400] := e50
-dst[415:408] := e51
-dst[423:416] := e52
-dst[431:424] := e53
-dst[439:432] := e54
-dst[447:440] := e55
-dst[455:448] := e56
-dst[463:456] := e57
-dst[471:464] := e58
-dst[479:472] := e59
-dst[487:480] := e60
-dst[495:488] := e61
-dst[503:496] := e62
-dst[511:503] := e63
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_set_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname='e31' type='short'/>
-	<parameter varname='e30' type='short'/>
-	<parameter varname='e29' type='short'/>
-	<parameter varname='e28' type='short'/>
-	<parameter varname='e27' type='short'/>
-	<parameter varname='e26' type='short'/>
-	<parameter varname='e25' type='short'/>
-	<parameter varname='e24' type='short'/>
-	<parameter varname='e23' type='short'/>
-	<parameter varname='e22' type='short'/>
-	<parameter varname='e21' type='short'/>
-	<parameter varname='e20' type='short'/>
-	<parameter varname='e19' type='short'/>
-	<parameter varname='e18' type='short'/>
-	<parameter varname='e17' type='short'/>
-	<parameter varname='e16' type='short'/>
-	<parameter varname='e15' type='short'/>
-	<parameter varname='e14' type='short'/>
-	<parameter varname='e13' type='short'/>
-	<parameter varname='e12' type='short'/>
-	<parameter varname='e11' type='short'/>
-	<parameter varname='e10' type='short'/>
-	<parameter varname='e9' type='short'/>
-	<parameter varname='e8' type='short'/>
-	<parameter varname='e7' type='short'/>
-	<parameter varname='e6' type='short'/>
-	<parameter varname='e5' type='short'/>
-	<parameter varname='e4' type='short'/>
-	<parameter varname='e3' type='short'/>
-	<parameter varname='e2' type='short'/>
-	<parameter varname='e1' type='short'/>
-	<parameter varname='e0' type='short'/>
-	<description>Set packed 16-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[15:0] := e0
-dst[31:16] := e1
-dst[47:32] := e2
-dst[63:48] := e3
-dst[79:64] := e4
-dst[95:80] := e5
-dst[111:96] := e6
-dst[127:112] := e7
-dst[145:128] := e8
-dst[159:144] := e9
-dst[175:160] := e10
-dst[191:176] := e11
-dst[207:192] := e12
-dst[223:208] := e13
-dst[239:224] := e14
-dst[255:240] := e15
-dst[271:256] := e16
-dst[287:272] := e17
-dst[303:288] := e18
-dst[319:304] := e19
-dst[335:320] := e20
-dst[351:336] := e21
-dst[367:352] := e22
-dst[383:368] := e23
-dst[399:384] := e24
-dst[415:400] := e25
-dst[431:416] := e26
-dst[447:432] := e27
-dst[463:448] := e28
-dst[479:464] := e29
-dst[495:480] := e30
-dst[511:496] := e31
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_set_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="e15" type="int"/>
-	<parameter varname="e14" type="int"/>
-	<parameter varname="e13" type="int"/>
-	<parameter varname="e12" type="int"/>
-	<parameter varname="e11" type="int"/>
-	<parameter varname="e10" type="int"/>
-	<parameter varname="e9" type="int"/>
-	<parameter varname="e8" type="int"/>
-	<parameter varname="e7" type="int"/>
-	<parameter varname="e6" type="int"/>
-	<parameter varname="e5" type="int"/>
-	<parameter varname="e4" type="int"/>
-	<parameter varname="e3" type="int"/>
-	<parameter varname="e2" type="int"/>
-	<parameter varname="e1" type="int"/>
-	<parameter varname="e0" type="int"/>
-	<description>Set packed 32-bit integers in "dst" with the supplied values.</description>
-	<operation>
-dst[31:0] := e0
-dst[63:32] := e1
-dst[95:64] := e2
-dst[127:96] := e3
-dst[159:128] := e4
-dst[191:160] := e5
-dst[223:192] := e6
-dst[255:224] := e7
-dst[287:256] := e8
-dst[319:288] := e9
-dst[351:320] := e10
-dst[383:352] := e11
-dst[415:384] := e12
-dst[447:416] := e13
-dst[479:448] := e14
-dst[511:480] := e15
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_set_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="e7" type="__int64"/>
-	<parameter varname="e6" type="__int64"/>
-	<parameter varname="e5" type="__int64"/>
-	<parameter varname="e4" type="__int64"/>
-	<parameter varname="e3" type="__int64"/>
-	<parameter varname="e2" type="__int64"/>
-	<parameter varname="e1" type="__int64"/>
-	<parameter varname="e0" type="__int64"/>
-	<description>Set packed 64-bit integers in "dst" with the supplied values.</description>
-	<operation>
-dst[63:0] := e0
-dst[127:64] := e1
-dst[191:128] := e2
-dst[255:192] := e3
-dst[319:256] := e4
-dst[383:320] := e5
-dst[447:384] := e6
-dst[511:448] := e7
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512d" name="_mm512_set_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="e7" type="double"/>
-	<parameter varname="e6" type="double"/>
-	<parameter varname="e5" type="double"/>
-	<parameter varname="e4" type="double"/>
-	<parameter varname="e3" type="double"/>
-	<parameter varname="e2" type="double"/>
-	<parameter varname="e1" type="double"/>
-	<parameter varname="e0" type="double"/>
-	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values.</description>
-	<operation>
-dst[63:0] := e0
-dst[127:64] := e1
-dst[191:128] := e2
-dst[255:192] := e3
-dst[319:256] := e4
-dst[383:320] := e5
-dst[447:384] := e6
-dst[511:448] := e7
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512" name="_mm512_set_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="e15" type="float"/>
-	<parameter varname="e14" type="float"/>
-	<parameter varname="e13" type="float"/>
-	<parameter varname="e12" type="float"/>
-	<parameter varname="e11" type="float"/>
-	<parameter varname="e10" type="float"/>
-	<parameter varname="e9" type="float"/>
-	<parameter varname="e8" type="float"/>
-	<parameter varname="e7" type="float"/>
-	<parameter varname="e6" type="float"/>
-	<parameter varname="e5" type="float"/>
-	<parameter varname="e4" type="float"/>
-	<parameter varname="e3" type="float"/>
-	<parameter varname="e2" type="float"/>
-	<parameter varname="e1" type="float"/>
-	<parameter varname="e0" type="float"/>
-	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values.</description>
-	<operation>
-dst[31:0] := e0
-dst[63:32] := e1
-dst[95:64] := e2
-dst[127:96] := e3
-dst[159:128] := e4
-dst[191:160] := e5
-dst[223:192] := e6
-dst[255:224] := e7
-dst[287:256] := e8
-dst[319:288] := e9
-dst[351:320] := e10
-dst[383:352] := e11
-dst[415:384] := e12
-dst[447:416] := e13
-dst[479:448] := e14
-dst[511:480] := e15
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_setr4_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="d" type="int"/>
-	<parameter varname="c" type="int"/>
-	<parameter varname="b" type="int"/>
-	<parameter varname="a" type="int"/>
-	<description>Set packed 32-bit integers in "dst" with the repeated 4 element sequence in reverse order.</description>
-	<operation>
-dst[31:0] := a
-dst[63:32] := b
-dst[95:64] := c
-dst[127:96] := d
-dst[159:128] := a
-dst[191:160] := b
-dst[223:192] := c
-dst[255:224] := d
-dst[287:256] := a
-dst[319:288] := b
-dst[351:320] := c
-dst[383:352] := d
-dst[415:384] := a
-dst[447:416] := b
-dst[479:448] := c
-dst[511:480] := d
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_setr4_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="d" type="__int64"/>
-	<parameter varname="c" type="__int64"/>
-	<parameter varname="b" type="__int64"/>
-	<parameter varname="a" type="__int64"/>
-	<description>Set packed 64-bit integers in "dst" with the repeated 4 element sequence in reverse order.</description>
-	<operation>
-dst[63:0] := a
-dst[127:64] := b
-dst[191:128] := c
-dst[255:192] := d
-dst[319:256] := a
-dst[383:320] := b
-dst[447:384] := c
-dst[511:448] := d
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512d" name="_mm512_setr4_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="d" type="double"/>
-	<parameter varname="c" type="double"/>
-	<parameter varname="b" type="double"/>
-	<parameter varname="a" type="double"/>
-	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the repeated 4 element sequence in reverse order.</description>
-	<operation>
-dst[63:0] := a
-dst[127:64] := b
-dst[191:128] := c
-dst[255:192] := d
-dst[319:256] := a
-dst[383:320] := b
-dst[447:384] := c
-dst[511:448] := d
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512" name="_mm512_setr4_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="d" type="float"/>
-	<parameter varname="c" type="float"/>
-	<parameter varname="b" type="float"/>
-	<parameter varname="a" type="float"/>
-	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the repeated 4 element sequence in reverse order.</description>
-	<operation>
-dst[31:0] := a
-dst[63:32] := b
-dst[95:64] := c
-dst[127:96] := d
-dst[159:128] := a
-dst[191:160] := b
-dst[223:192] := c
-dst[255:224] := d
-dst[287:256] := a
-dst[319:288] := b
-dst[351:320] := c
-dst[383:352] := d
-dst[415:384] := a
-dst[447:416] := b
-dst[479:448] := c
-dst[511:480] := d
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_setr_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="e15" type="int"/>
-	<parameter varname="e14" type="int"/>
-	<parameter varname="e13" type="int"/>
-	<parameter varname="e12" type="int"/>
-	<parameter varname="e11" type="int"/>
-	<parameter varname="e10" type="int"/>
-	<parameter varname="e9" type="int"/>
-	<parameter varname="e8" type="int"/>
-	<parameter varname="e7" type="int"/>
-	<parameter varname="e6" type="int"/>
-	<parameter varname="e5" type="int"/>
-	<parameter varname="e4" type="int"/>
-	<parameter varname="e3" type="int"/>
-	<parameter varname="e2" type="int"/>
-	<parameter varname="e1" type="int"/>
-	<parameter varname="e0" type="int"/>
-	<description>Set packed 32-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[31:0] := e15
-dst[63:32] := e14
-dst[95:64] := e13
-dst[127:96] := e12
-dst[159:128] := e11
-dst[191:160] := e10
-dst[223:192] := e9
-dst[255:224] := e8
-dst[287:256] := e7
-dst[319:288] := e6
-dst[351:320] := e5
-dst[383:352] := e4
-dst[415:384] := e3
-dst[447:416] := e2
-dst[479:448] := e1
-dst[511:480] := e0
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512i" name="_mm512_setr_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="e7" type="__int64"/>
-	<parameter varname="e6" type="__int64"/>
-	<parameter varname="e5" type="__int64"/>
-	<parameter varname="e4" type="__int64"/>
-	<parameter varname="e3" type="__int64"/>
-	<parameter varname="e2" type="__int64"/>
-	<parameter varname="e1" type="__int64"/>
-	<parameter varname="e0" type="__int64"/>
-	<description>Set packed 64-bit integers in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[63:0] := e7
-dst[127:64] := e6
-dst[191:128] := e5
-dst[255:192] := e4
-dst[319:256] := e3
-dst[383:320] := e2
-dst[447:384] := e1
-dst[511:448] := e0
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512d" name="_mm512_setr_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="e7" type="double"/>
-	<parameter varname="e6" type="double"/>
-	<parameter varname="e5" type="double"/>
-	<parameter varname="e4" type="double"/>
-	<parameter varname="e3" type="double"/>
-	<parameter varname="e2" type="double"/>
-	<parameter varname="e1" type="double"/>
-	<parameter varname="e0" type="double"/>
-	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[63:0] := e7
-dst[127:64] := e6
-dst[191:128] := e5
-dst[255:192] := e4
-dst[319:256] := e3
-dst[383:320] := e2
-dst[447:384] := e1
-dst[511:448] := e0
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" sequence="true" rettype="__m512" name="_mm512_setr_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="e15" type="float"/>
-	<parameter varname="e14" type="float"/>
-	<parameter varname="e13" type="float"/>
-	<parameter varname="e12" type="float"/>
-	<parameter varname="e11" type="float"/>
-	<parameter varname="e10" type="float"/>
-	<parameter varname="e9" type="float"/>
-	<parameter varname="e8" type="float"/>
-	<parameter varname="e7" type="float"/>
-	<parameter varname="e6" type="float"/>
-	<parameter varname="e5" type="float"/>
-	<parameter varname="e4" type="float"/>
-	<parameter varname="e3" type="float"/>
-	<parameter varname="e2" type="float"/>
-	<parameter varname="e1" type="float"/>
-	<parameter varname="e0" type="float"/>
-	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
-	<operation>
-dst[31:0] := e15
-dst[63:32] := e14
-dst[95:64] := e13
-dst[127:96] := e12
-dst[159:128] := e11
-dst[191:160] := e10
-dst[223:192] := e9
-dst[255:224] := e8
-dst[287:256] := e7
-dst[319:288] := e6
-dst[351:320] := e5
-dst[383:352] := e4
-dst[415:384] := e3
-dst[447:416] := e2
-dst[479:448] := e1
-dst[511:480] := e0
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_setzero">
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="" type="void"/>
-	<description>Return vector of type __m512 with all elements set to zero.</description>
-	<operation>
-dst[MAX:0] := 0
-	</operation>
-	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_setzero_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<description>Return vector of type __m512i with all elements set to zero.</description>
-	<operation>
-dst[MAX:0] := 0
-	</operation>
-	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_setzero_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<description>Return vector of type __m512d with all elements set to zero.</description>
-	<operation>
-dst[MAX:0] := 0
-	</operation>
-	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_setzero_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<description>Return vector of type __m512 with all elements set to zero.</description>
-	<operation>
-dst[MAX:0] := 0
-	</operation>
-	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_setzero_si512">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<description>Return vector of type __m512i with all elements set to zero.</description>
-	<operation>
-dst[MAX:0] := 0
-	</operation>
-	<instruction name='vpxorq' form='zmm {k}, zmm, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_undefined">
-	<CPUID>AVX512F</CPUID>
-	<category>General Support</category>
-	<parameter varname="" type="void"/>
-	<description>Return vector of type __m512 with undefined elements.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_undefined_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>General Support</category>
-	<description>Return vector of type __m512i with undefined elements.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_undefined_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>General Support</category>
-	<description>Return vector of type __m512d with undefined elements.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_undefined_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>General Support</category>
-	<description>Return vector of type __m512 with undefined elements.</description>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_prefetch_i64gather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetch single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache. "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
-	<operation>
-FOR j:= 0 to 7
-	i := j*64;
-	Prefetch([base_addr + SignExtend(vindex[i+63:i]) * scale], hint, RFO=0);
-ENDFOR;
-	</operation>
-	<instruction name='vgatherpf0qps' form='vm64z {k}'/>
-	<instruction name='vgatherpf1qps' form='vm64z {k}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_prefetch_i64gather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="mask" type="__mmask8"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetch single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache using writemask "k" (elements are only brought into cache when their corresponding mask bit is set). "scale" should be 1, 2, 4 or 8.. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
-	<operation>
-FOR j:= 0 to 7
-	i := j*64;
-	IF mask[j] THEN
-		Prefetch([base_addr + SignExtend(vindex[i+63:i]) * scale], hint, RFO=0);
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vgatherpf0qps' form='vm64z {k}'/>
-	<instruction name='vgatherpf1qps' form='vm64z {k}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_prefetch_i64scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetch single-precision (32-bit) floating-point elements with intent to write into memory using 64-bit indices. Elements are prefetched into cache level "hint", where "hint" is 0 or 1. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	Prefetch([base_addr + SignExtend(vindex[i+63:i]) * scale], Level=hint, RFO=1);
-ENDFOR;
-	</operation>
-	<instruction name='vscatterpf0qps' form='vm64z {k}'/>
-	<instruction name='vscatterpf1qps' form='vm64z {k}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_prefetch_i64scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="mask" type="__mmask8"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetch single-precision (32-bit) floating-point elements with intent to write into memory using 64-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not brought into cache when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF mask[j] THEN
-		Prefetch([base_addr + SignExtend(vindex[i+63:i]) * scale], Level=hint, RFO=1);
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vscatterpf0qps' form='vm64z {k}'/>
-	<instruction name='vscatterpf1qps' form='vm64z {k}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_prefetch_i32gather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetch double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache. "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32;
-	Prefetch([base_addr + SignExtend(vindex[i*31:i]) * scale], hint, RFO=0);
-ENDFOR;
-	</operation>
-	<instruction name='vgatherpf0dpd' form='vm32y {k}'/>
-	<instruction name='vgatherpf1dpd' form='vm32y {k}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_prefetch_i32gather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="mask" type="__mmask8"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetch double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache using writemask "k" (elements are brought into cache only when their corresponding mask bits are set). "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32;
-	IF mask[j] THEN
-		Prefetch([base_addr + SignExtend(vindex[i*31:i]) * scale], hint, RFO=0);
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vgatherpf0dpd' form='vm32y {k}'/>
-	<instruction name='vgatherpf1dpd' form='vm32y {k}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_prefetch_i32gather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="mask" type="__mmask16"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetch single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache using writemask "k" (elements are brought into cache only when their corresponding mask bits are set). "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16;
-	IF mask[j] THEN
-		Prefetch([base_addr + SignExtend(vindex[i*31:i]) * scale], hint, RFO=0);
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vgatherpf0dps' form='vm32y {k}'/>
-	<instruction name='vgatherpf1dps' form='vm32y {k}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_prefetch_i32scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetch double-precision (64-bit) floating-point elements with intent to write using 32-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 TO 7
-	i := j*32;
-	Prefetch(base_addr + SignExtend(vindex[i+31:i]) * scale], Level=hint, RFO=1);
-ENDFOR;
-	</operation>
-	<instruction name='vscatterpf0dpd' form='vm32y {k}'/>
-	<instruction name='vscatterpf1dpd' form='vm32y {k}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_prefetch_i32scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="mask" type="__mmask8"/>
-	<parameter varname="vinde" type="__m256i"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetch double-precision (64-bit) floating-point elements with intent to write using 32-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not brought into cache when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 TO 7
-	i := j*32;
-	IF mask[j] THEN
-		Prefetch(base_addr + SignExtend(vindex[i+31:i]) * scale], Level=hint, RFO=1);
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vscatterpf0dpd' form='vm32y {k}'/>
-	<instruction name='vscatterpf1dpd' form='vm32y {k}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_prefetch_i64gather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetch double-precision (64-bit) floating-point elements from memory into cache level specified by "hint" using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	Prefetch([base_addr + SignExtend(vindex[i*63:i] * scale]), Level=hint, RFO=0);
-ENDFOR;
-	</operation>
-	<instruction name='vgatherpf0qpd' form='vm32z {k}'/>
-	<instruction name='vgatherpf1qpd' form='vm32z {k}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_prefetch_i64gather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF</CPUID>
-	<category>Load</category>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="mask" type="__mmask8"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetch double-precision (64-bit) floating-point elements from memory into cache level specified by "hint" using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Prefetched elements are merged in cache using writemask "k" (elements are copied from memory when the corresponding mask bit is set). "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF mask[j] THEN
-		Prefetch([base_addr + SignExtend(vindex[i*63:i] * scale]), Level=hint, RFO=0);
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vgatherpf0qpd' form='vm32z {k}'/>
-	<instruction name='vgatherpf1qpd' form='vm32z {k}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_prefetch_i64scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetch double-precision (64-bit) floating-point elements with intent to write into memory using 64-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	Prefetch([base_addr + SignExtend(vindex[i+63:i]) * scale], Level=hint, RFO=1);
-ENDFOR;
-	</operation>
-	<instruction name='vscatterpf0qpd' form='vm32z {k}'/>
-	<instruction name='vscatterpf1qpd' form='vm32z {k}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_prefetch_i64scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="mask" type="__mmask8"/>
-	<parameter varname="vindex" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetch double-precision (64-bit) floating-point elements with intent to write into memory using 64-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not brought into cache when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF mask[j] THEN
-		Prefetch([base_addr + SignExtend(vindex[i+63:i]) * scale], Level=hint, RFO=1);
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vscatterpf0qpd' form='vm32z {k}'/>
-	<instruction name='vscatterpf1qpd' form='vm32z {k}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_exp2a23_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-23. [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	dst[i+31:i] := EXP_2_23_SP(a[i+31:i]);
-ENDFOR;
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexp2ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_exp2a23_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-23.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	dst[i+31:i] := EXP_2_23_SP(a[i+31:i]);
-ENDFOR;
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexp2ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_exp2a23_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23. [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	IF k[j] THEN
-		dst[i+31:i] := EXP_2_23_SP(a[i+31:i]);
-	ELSE
-		dst[i*31:i] := src[i*31:i];
-	FI
-ENDFOR;
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexp2ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_exp2a23_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	IF k[j] THEN
-		dst[i+31:i] := EXP_2_23_SP(a[i+31:i]);
-	ELSE
-		dst[i*31:i] := src[i*31:i];
-	FI
-ENDFOR;
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexp2ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_exp2a23_round_ps">
-	<type>Floating Point</type>
-<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23. [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	IF k[j] THEN
-		dst[i+31:i] := EXP_2_23_SP(a[i+31:i]);
-	ELSE
-		dst[i*31:i] := 0;
-	FI
-ENDFOR;
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexp2ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_exp2a23_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	IF k[j] THEN
-		dst[i+31:i] := EXP_2_23_SP(a[i+31:i]);
-	ELSE
-		dst[i*31:i] := 0;
-	FI
-ENDFOR;
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexp2ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_exp2a23_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-23. [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	dst[i+63:i] := EXP_2_23_DP(a[i+63:i]);
-ENDFOR;
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexp2pd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_exp2a23_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-23.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	dst[i+63:i] := EXP_2_23_DP(a[i+63:i]);
-ENDFOR;
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexp2pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_exp2a23_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23. [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF k[j] THEN
-		dst[i+63:i] := EXP_2_23_DP(a[i+63:i]);
-	ELSE
-		dst[i+63:i] := src[i+63:i];
-	FI
-ENDFOR;
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexp2pd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_exp2a23_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="src" type="__m512d"/>
-	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF k[j] THEN
-		dst[i+63:i] := EXP_2_23_DP(a[i+63:i]);
-	ELSE
-		dst[i+63:i] := src[i+63:i];
-	FI
-ENDFOR;
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexp2pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_exp2a23_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23. [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF k[j] THEN
-		dst[i+63:i] := EXP_2_23_DP(a[i+63:i]);
-	ELSE
-		dst[i+63:i] := 0;
-	FI
-ENDFOR;
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexp2pd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_exp2a23_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF k[j] THEN
-		dst[i+63:i] := EXP_2_23_DP(a[i+63:i]);
-	ELSE
-		dst[i+63:i] := 0;
-	FI
-ENDFOR;
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vexp2pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rcp28_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-dst[63:0] := RCP_28_DP(1.0/b[63:0];
-dst[127:64] := a[127:64];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrcp28sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rcp28_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-dst[63:0] := RCP_28_DP(1.0/b[63:0];
-dst[127:64] := a[127:64];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrcp28sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rcp28_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-IF k[0] THEN
-	dst[63:0] := RCP_28_DP(1.0/b[63:0];
-ELSE
-	dst[63:0] := src[63:0];
-FI
-dst[127:64] := a[127:64];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrcp28sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rcp28_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-IF k[0] THEN
-	dst[63:0] := RCP_28_DP(1.0/b[63:0];
-ELSE
-	dst[63:0] := src[63:0];
-FI
-dst[127:64] := a[127:64];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrcp28sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rcp28_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-IF k[0] THEN
-	dst[63:0] := RCP_28_DP(1.0/b[63:0];
-ELSE
-	dst[63:0] := 0;
-FI
-dst[127:64] := a[127:64];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrcp28sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rcp28_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-IF k[0] THEN
-	dst[63:0] := RCP_28_DP(1.0/b[63:0];
-ELSE
-	dst[63:0] := 0;
-FI
-dst[127:64] := a[127:64];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrcp28sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rcp28_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst". The maximum relative error for this approximation is less than 2^-28, and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_note]</description>
-	<operation>
-dst[31:0] := RCP_28_DP(1.0/b[31:0];
-dst[127:32] := a[127:32];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrcp28ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rcp28_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-dst[31:0] := RCP_28_DP(1.0/b[31:0];
-dst[127:32] := a[127:32];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrcp28ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rcp28_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-IF k[0] THEN
-	dst[31:0] := RCP_28_DP(1.0/b[31:0];
-ELSE
-	dst[31:0] := src[31:0];
-FI
-dst[127:32] := a[127:32];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrcp28ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rcp28_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-IF k[0] THEN
-	dst[31:0] := RCP_28_DP(1.0/b[31:0];
-ELSE
-	dst[31:0] := src[31:0];
-FI
-dst[127:32] := a[127:32];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrcp28ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rcp28_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-IF k[0] THEN
-	dst[31:0] := RCP_28_DP(1.0/b[31:0];
-ELSE
-	dst[31:0] := 0;
-FI
-dst[127:32] := a[127:32];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrcp28ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rcp28_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-IF k[0] THEN
-	dst[31:0] := RCP_28_DP(1.0/b[31:0];
-ELSE
-	dst[31:0] := 0;
-FI
-dst[127:32] := a[127:32];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrcp28ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_rcp28_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	dst[i+31:i] := RCP_28_SP(1.0/a[i+31:i];
-ENDFOR;
-	</operation>
-	<instruction name='vrcp28ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_rcp28_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	dst[i+31:i] := RCP_28_SP(1.0/a[i+31:i];
-ENDFOR;
-	</operation>
-	<instruction name='vrcp28ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_rcp28_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	IF k[j] THEN
-		dst[i+31:i] := RCP_28_SP(1.0/a[i+31:i];
-	ELSE
-		dst[i+31:i] := src[i+31:i];
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrcp28ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_rcp28_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	IF k[j] THEN
-		dst[i+31:i] := RCP_28_SP(1.0/a[i+31:i];
-	ELSE
-		dst[i+31:i] := src[i+31:i];
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrcp28ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_rcp28_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	IF k[j] THEN
-		dst[i+31:i] := RCP_28_SP(1.0/a[i+31:i];
-	ELSE
-		dst[i+31:i] := 0;
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrcp28ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_rcp28_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	IF k[j] THEN
-		dst[i+31:i] := RCP_28_SP(1.0/a[i+31:i];
-	ELSE
-		dst[i+31:i] := 0;
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrcp28ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_rcp28_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	dst[i+63:i] := RCP_28_SP(1.0/a[i+63:i];
-ENDFOR;
-	</operation>
-	<instruction name='vrcp28pd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_rcp28_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	dst[i+63:i] := RCP_28_SP(1.0/a[i+63:i];
-ENDFOR;
-	</operation>
-	<instruction name='vrcp28pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_rcp28_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF k[j] THEN
-		dst[i+63:i] := RCP_28_SP(1.0/a[i+63:i];
-	ELSE
-		dst[i+63:i] := src[i+63:i];
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrcp28pd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_rcp28_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF k[j] THEN
-		dst[i+63:i] := RCP_28_SP(1.0/a[i+63:i];
-	ELSE
-		dst[i+63:i] := src[i+63:i];
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrcp28pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_rcp28_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF k[j] THEN
-		dst[i+63:i] := RCP_28_SP(1.0/a[i+63:i];
-	ELSE
-		dst[i+63:i] := 0;
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrcp28pd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_rcp28_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF k[j] THEN
-		dst[i+63:i] := RCP_28_SP(1.0/a[i+63:i];
-	ELSE
-		dst[i+63:i] := 0;
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrcp28pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rsqrt28_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-dst[63:0] := (1.0/SQRT(b[63:0]));
-dst[127:64] := a[127:64];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrsqrt28sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rsqrt28_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-dst[63:0] := (1.0/SQRT(b[63:0]));
-dst[127:64] := a[127:64];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrsqrt28sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rsqrt28_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-IF k[0] THEN
-	dst[63:0] := (1.0/SQRT(b[63:0]));
-ELSE
-	dst[63:0] := src[63:0];
-FI
-dst[127:64] := a[127:64];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrsqrt28sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rsqrt28_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-IF k[0] THEN
-	dst[63:0] := (1.0/SQRT(b[63:0]));
-ELSE
-	dst[63:0] := src[63:0];
-FI
-dst[127:64] := a[127:64];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrsqrt28sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rsqrt28_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-IF k[0] THEN
-	dst[63:0] := (1.0/SQRT(b[63:0]));
-ELSE
-	dst[63:0] := 0;
-FI
-dst[127:64] := a[127:64];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrsqrt28sd' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rsqrt28_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-IF k[0] THEN
-	dst[63:0] := (1.0/SQRT(b[63:0]));
-ELSE
-	dst[63:0] := 0;
-FI
-dst[127:64] := a[127:64];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrsqrt28sd' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rsqrt28_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-dst[31:0] := (1.0/SQRT(b[31:0]));
-dst[127:32] := a[127:32];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrsqrt28ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rsqrt28_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-dst[31:0] := (1.0/SQRT(b[31:0]));
-dst[127:32] := a[127:32];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrsqrt28ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rsqrt28_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-IF k[0] THEN
-	dst[31:0] := (1.0/SQRT(b[31:0]));
-ELSE
-	dst[31:0] := src[31:0];
-FI
-dst[127:32] := a[127:32];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrsqrt28ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rsqrt28_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-IF k[0] THEN
-	dst[31:0] := (1.0/SQRT(b[31:0]));
-ELSE
-	dst[31:0] := src[31:0];
-FI
-dst[127:32] := a[127:32];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrsqrt28ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rsqrt28_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-IF k[0] THEN
-	dst[31:0] := (1.0/SQRT(b[31:0]));
-ELSE
-	dst[31:0] := 0;
-FI
-dst[127:32] := a[127:32];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrsqrt28ss' form='xmm {k}, xmm, xmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rsqrt28_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-IF k[0] THEN
-	dst[31:0] := (1.0/SQRT(b[31:0]));
-ELSE
-	dst[31:0] := 0;
-FI
-dst[127:32] := a[127:32];
-dst[MAX:128] := 0;
-	</operation>
-	<instruction name='vrsqrt28ss' form='xmm {k}, xmm, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_rsqrt28_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	dst[i+31:i] := (1.0/SQRT(a[i+31:i]));
-ENDFOR;
-	</operation>
-	<instruction name='vrsqrt28ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_rsqrt28_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	dst[i+31:i] := (1.0/SQRT(a[i+31:i]));
-ENDFOR;
-	</operation>
-	<instruction name='vrsqrt28ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_rsqrt28_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	IF k[j] THEN
-		dst[i+31:i] := (1.0/SQRT(a[i+31:i]));
-	ELSE
-		dst[i+31:i] := src[i+31:i];
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrsqrt28ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_rsqrt28_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	IF k[j] THEN
-		dst[i+31:i] := (1.0/SQRT(a[i+31:i]));
-	ELSE
-		dst[i+31:i] := src[i+31:i];
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrsqrt28ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_rsqrt28_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	IF k[j] THEN
-		dst[i+31:i] := (1.0/SQRT(a[i+31:i]));
-	ELSE
-		dst[i+31:i] := 0;
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrsqrt28ps' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_rsqrt28_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32;
-	IF k[j] THEN
-		dst[i+31:i] := (1.0/SQRT(a[i+31:i]));
-	ELSE
-		dst[i+31:i] := 0;
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrsqrt28ps' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_rsqrt28_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst". The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	dst[i+63:i] := (1.0/SQRT(a[i+63:i]));
-ENDFOR;
-	</operation>
-	<instruction name='vrsqrt28pd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_rsqrt28_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst". The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	dst[i+63:i] := (1.0/SQRT(a[i+63:i]));
-ENDFOR;
-	</operation>
-	<instruction name='vrsqrt28pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_rsqrt28_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF k[j] THEN
-		dst[i+63:i] := (1.0/SQRT(a[i+63:i]));
-	ELSE
-		dst[i+63:i] := src[i+63:i];
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrsqrt28pd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_rsqrt28_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF k[j] THEN
-		dst[i+63:i] := (1.0/SQRT(a[i+63:i]));
-	ELSE
-		dst[i+63:i] := src[i+63:i];
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrsqrt28pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_rsqrt28_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [round_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF k[j] THEN
-		dst[i+63:i] := (1.0/SQRT(a[i+63:i]));
-	ELSE
-		dst[i+63:i] := 0;
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrsqrt28pd' form='zmm {k}, zmm {er}'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_rsqrt28_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512ER</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64;
-	IF k[j] THEN
-		dst[i+63:i] := (1.0/SQRT(a[i+63:i]));
-	ELSE
-		dst[i+63:i] := 0;
-	FI
-ENDFOR;
-	</operation>
-	<instruction name='vrsqrt28pd' form='zmm {k}, zmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='int' name='_mm_tzcnt_32'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned int' varname='a' />
-	<description>Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst".</description>
-	<operation>
-tmp := 0
-dst := 0
-DO WHILE ((tmp &lt; 32) AND a[tmp] = 0)
-	tmp := tmp + 1
-	dst := dst + 1
-OD	
-	</operation>
-	<instruction name='tzcnt' form='r32, r32'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="Other" rettype='__int64' name='_mm_tzcnt_64'>
-	<type>Integer</type>
-	<CPUID>BMI1</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter type='unsigned __int64' varname='a' />
-	<description>Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst".</description>
-	<operation>
-tmp := 0
-dst := 0
-DO WHILE ((tmp &lt; 64) AND a[tmp] = 0)
-	tmp := tmp + 1
-	dst := dst + 1
-OD	
-	</operation>
-	<instruction name='tzcnt' form='r64, r64'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_extload_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="bc" type="_MM_BROADCAST32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to single-precision (32-bit) floating-point elements, storing the results in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr = MEM[mt]
-FOR j := 0 to 15
-	i := j*32
-	CASE bc OF
-	_MM_BROADCAST32_NONE:
-		CASE conv OF
-		_MM_UPCONV_PS_NONE:
-			n	 := j*32
-			dst[i+31:i] := addr[n+31:n]
-		_MM_UPCONV_PS_FLOAT16:
-			n	 := j*16
-			dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
-		_MM_UPCONV_PS_UINT8:
-			n	 := j*8
-			dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
-		_MM_UPCONV_PS_SINT8:
-			n	 := j*8
-			dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
-		_MM_UPCONV_PS_UINT16:
-			n	 := j*16
-			dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
-		_MM_UPCONV_PS_SINT16:
-			n	 := j*16
-			dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
-		ESAC
-	_MM_BROADCAST_1X16:
-		CASE conv OF
-		_MM_UPCONV_PS_NONE:
-			n	 := j*32
-			dst[i+31:i] := addr[31:0]
-		_MM_UPCONV_PS_FLOAT16:
-			n	 := j*16
-			dst[i+31:i] := Float16ToFloat32(addr[15:0])
-		_MM_UPCONV_PS_UINT8:
-			n	 := j*8
-			dst[i+31:i] := UInt8ToFloat32(addr[7:0])
-		_MM_UPCONV_PS_SINT8:
-			n	 := j*8
-			dst[i+31:i] := SInt8ToFloat32(addr[7:0])
-		_MM_UPCONV_PS_UINT16:
-			n	 := j*16
-			dst[i+31:i] := UInt16ToFloat32(addr[15:0])
-		_MM_UPCONV_PS_SINT16:
-			n	 := j*16
-			dst[i+31:i] := SInt16ToFloat32(addr[15:0])
-		ESAC
-	_MM_BROADCAST_4X16:
-		mod := j%4
-		CASE conv OF
-		_MM_UPCONV_PS_NONE:
-			n := mod*32
-			dst[i+31:i] := addr[n+31:n]
-		_MM_UPCONV_PS_FLOAT16:
-			n := mod*16
-			dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
-		_MM_UPCONV_PS_UINT8:
-			n := mod*8
-			dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
-		_MM_UPCONV_PS_SINT8:
-			n := mod*8
-			dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
-		_MM_UPCONV_PS_UINT16:
-			n := mod*16
-			dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
-		_MM_UPCONV_PS_SINT16:
-			n := mod*16
-			dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
-		ESAC
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovaps" form="zmm {k}, m512" xed=""/>
-	<instruction name="vbroadcastf32x4" form="zmm {k}, m512" xed=""/>
-	<instruction name="vbroadcastss" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_extload_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="bc" type="_MM_BROADCAST32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to single-precision (32-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr = MEM[mt]
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		CASE bc OF
-		_MM_BROADCAST32_NONE:
-			CASE conv OF
-			_MM_UPCONV_PS_NONE:
-				n	 := j*32
-				dst[i+31:i] := addr[n+31:n]
-			_MM_UPCONV_PS_FLOAT16:
-				n	 := j*16
-				dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
-			_MM_UPCONV_PS_UINT8:
-				n	 := j*8
-				dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
-			_MM_UPCONV_PS_SINT8:
-				n	 := j*8
-				dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
-			_MM_UPCONV_PS_UINT16:
-				n	 := j*16
-				dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
-			_MM_UPCONV_PS_SINT16:
-				n	 := j*16
-				dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
-			ESAC
-		_MM_BROADCAST_1X16:
-			CASE conv OF
-			_MM_UPCONV_PS_NONE:
-				n	 := j*32
-				dst[i+31:i] := addr[31:0]
-			_MM_UPCONV_PS_FLOAT16:
-				n	 := j*16
-				dst[i+31:i] := Float16ToFloat32(addr[15:0])
-			_MM_UPCONV_PS_UINT8:
-				n	 := j*8
-				dst[i+31:i] := UInt8ToFloat32(addr[7:0])
-			_MM_UPCONV_PS_SINT8:
-				n	 := j*8
-				dst[i+31:i] := SInt8ToFloat32(addr[7:0])
-			_MM_UPCONV_PS_UINT16:
-				n	 := j*16
-				dst[i+31:i] := UInt16ToFloat32(addr[15:0])
-			_MM_UPCONV_PS_SINT16:
-				n	 := j*16
-				dst[i+31:i] := SInt16ToFloat32(addr[15:0])
-			ESAC
-		_MM_BROADCAST_4X16:
-			mod := j%4
-			CASE conv OF
-			_MM_UPCONV_PS_NONE:
-				n := mod*32
-				dst[i+31:i] := addr[n+31:n]
-			_MM_UPCONV_PS_FLOAT16:
-				n := mod*16
-				dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
-			_MM_UPCONV_PS_UINT8:
-				n := mod*8
-				dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
-			_MM_UPCONV_PS_SINT8:
-				n := mod*8
-				dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
-			_MM_UPCONV_PS_UINT16:
-				n := mod*16
-				dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
-			_MM_UPCONV_PS_SINT16:
-				n := mod*16
-				dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
-			ESAC
-		ESAC
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovaps" form="zmm {k}, m512" xed=""/>
-	<instruction name="vbroadcastf32x4" form="zmm {k}, m512" xed=""/>
-	<instruction name="vbroadcastss" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_extload_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
-	<parameter varname="bc" type="_MM_BROADCAST32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to 32-bit integer elements, storing the results in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr = MEM[mt]
-FOR j := 0 to 15
-	i := j*32
-	CASE bc OF
-	_MM_BROADCAST32_NONE:
-		CASE conv OF
-		_MM_UPCONV_EPI32_NONE:
-			n	 := j*32
-			dst[i+31:i] := addr[n+31:n]
-		_MM_UPCONV_EPI32_UINT8:
-			n	 := j*8
-			dst[i+31:i] := UInt8ToInt32(addr[n+7:n])
-		_MM_UPCONV_EPI32_SINT8:
-			n	 := j*8
-			dst[i+31:i] := SInt8ToInt32(addr[n+7:n])
-		_MM_UPCONV_EPI32_UINT16:
-			n	 := j*16
-			dst[i+31:i] := UInt16ToInt32(addr[n+15:n])
-		_MM_UPCONV_EPI32_SINT16:
-			n	 := j*16
-			dst[i+31:i] := SInt16ToInt32(addr[n+15:n])
-		ESAC
-	_MM_BROADCAST_1X16:
-		CASE conv OF
-		_MM_UPCONV_EPI32_NONE:
-			n	 := j*32
-			dst[i+31:i] := addr[31:0]
-		_MM_UPCONV_EPI32_UINT8:
-			n	 := j*8
-			dst[i+31:i] := UInt8ToInt32(addr[7:0])
-		_MM_UPCONV_EPI32_SINT8:
-			n	 := j*8
-			dst[i+31:i] := SInt8ToInt32(addr[7:0])
-		_MM_UPCONV_EPI32_UINT16:
-			n	 := j*16
-			dst[i+31:i] := UInt16ToInt32(addr[15:0])
-		_MM_UPCONV_EPI32_SINT16:
-			n	 := j*16
-			dst[i+31:i] := SInt16ToInt32(addr[15:0])
-		ESAC
-	_MM_BROADCAST_4X16:
-		mod := j%4
-		CASE conv OF
-		_MM_UPCONV_EPI32_NONE:
-			n := mod*32
-			dst[i+31:i] := addr[n+31:n]
-		_MM_UPCONV_EPI32_UINT8:
-			n := mod*8
-			dst[i+31:i] := UInt8ToInt32(addr[n+7:n])
-		_MM_UPCONV_EPI32_SINT8:
-			n := mod*8
-			dst[i+31:i] := SInt8ToInt32(addr[n+7:n])
-		_MM_UPCONV_EPI32_UINT16:
-			n := mod*16
-			dst[i+31:i] := UInt16ToInt32(addr[n+15:n])
-		_MM_UPCONV_EPI32_SINT16:
-			n := mod*16
-			dst[i+31:i] := SInt16ToInt32(addr[n+15:n])
-		ESAC
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovdqa32" form="zmm {k}, m512" xed=""/>
-	<instruction name="vbroadcasti32x4" form="zmm {k}, m512" xed=""/>
-	<instruction name="vpbroadcastd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_extload_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
-	<parameter varname="bc" type="_MM_BROADCAST32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to 32-bit integer elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr = MEM[mt]
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		CASE bc OF
-		_MM_BROADCAST32_NONE:
-			CASE conv OF
-			_MM_UPCONV_EPI32_NONE:
-				n	 := j*32
-				dst[i+31:i] := addr[n+31:n]
-			_MM_UPCONV_EPI32_UINT8:
-				n	 := j*8
-				dst[i+31:i] := UInt8ToInt32(addr[n+7:n])
-			_MM_UPCONV_EPI32_SINT8:
-				n	 := j*8
-				dst[i+31:i] := SInt8ToInt32(addr[n+7:n])
-			_MM_UPCONV_EPI32_UINT16:
-				n	 := j*16
-				dst[i+31:i] := UInt16ToInt32(addr[n+15:n])
-			_MM_UPCONV_EPI32_SINT16:
-				n	 := j*16
-				dst[i+31:i] := SInt16ToInt32(addr[n+15:n])
-			ESAC
-		_MM_BROADCAST_1X16:
-			CASE conv OF
-			_MM_UPCONV_EPI32_NONE:
-				n	 := j*32
-				dst[i+31:i] := addr[31:0]
-			_MM_UPCONV_EPI32_UINT8:
-				n	 := j*8
-				dst[i+31:i] := UInt8ToInt32(addr[7:0])
-			_MM_UPCONV_EPI32_SINT8:
-				n	 := j*8
-				dst[i+31:i] := SInt8ToInt32(addr[7:0])
-			_MM_UPCONV_EPI32_UINT16:
-				n	 := j*16
-				dst[i+31:i] := UInt16ToInt32(addr[15:0])
-			_MM_UPCONV_EPI32_SINT16:
-				n	 := j*16
-				dst[i+31:i] := SInt16ToInt32(addr[15:0])
-			ESAC
-		_MM_BROADCAST_4X16:
-			mod := j%4
-			CASE conv OF
-			_MM_UPCONV_EPI32_NONE:
-				n := mod*32
-				dst[i+31:i] := addr[n+31:n]
-			_MM_UPCONV_EPI32_UINT8:
-				n := mod*8
-				dst[i+31:i] := UInt8ToInt32(addr[n+7:n])
-			_MM_UPCONV_EPI32_SINT8:
-				n := mod*8
-				dst[i+31:i] := SInt8ToInt32(addr[n+7:n])
-			_MM_UPCONV_EPI32_UINT16:
-				n := mod*16
-				dst[i+31:i] := UInt16ToInt32(addr[n+15:n])
-			_MM_UPCONV_EPI32_SINT16:
-				n := mod*16
-				dst[i+31:i] := SInt16ToInt32(addr[n+15:n])
-			ESAC
-		ESAC
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovdqa32" form="zmm {k}, m512" xed=""/>
-	<instruction name="vbroadcasti32x4" form="zmm {k}, m512" xed=""/>
-	<instruction name="vpbroadcastd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_extload_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
-	<parameter varname="bc" type="_MM_BROADCAST64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Depending on "bc", loads 1, 4, or 8 elements of type and size determined by "conv" from memory address "mt" and converts all elements to double-precision (64-bit) floating-point elements, storing the results in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr = MEM[mt]
-FOR j := 0 to 7
-	i := j*64
-	CASE bc OF
-	_MM_BROADCAST64_NONE:
-		CASE conv OF
-		_MM_UPCONV_PD_NONE:
-			n := j*64
-			dst[i+63:i] := addr[n+63:n]
-		ESAC
-	_MM_BROADCAST_1X8:
-		CASE conv OF
-		_MM_UPCONV_PD_NONE:
-			n := j*64
-			dst[i+63:i] := addr[63:0]
-		ESAC
-	_MM_BROADCAST_4X8:
-		mod := j%4
-		CASE conv OF
-		_MM_UPCONV_PD_NONE:
-			n := mod*64
-			dst[i+63:i] := addr[n+63:n]
-		ESAC
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovapd" form="zmm {k}, m512" xed=""/>
-	<instruction name="vbroadcastf64x4" form="zmm {k}, m512" xed=""/>
-	<instruction name="vbroadcastsd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_extload_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
-	<parameter varname="bc" type="_MM_BROADCAST64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Depending on "bc", loads 1, 4, or 8 elements of type and size determined by "conv" from memory address "mt" and converts all elements to double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr = MEM[mt]
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		CASE bc OF
-		_MM_BROADCAST64_NONE:
-			CASE conv OF
-			_MM_UPCONV_PD_NONE:
-				n := j*64
-				dst[i+63:i] := addr[n+63:n]
-			ESAC
-		_MM_BROADCAST_1X8:
-			CASE conv OF
-			_MM_UPCONV_PD_NONE:
-				n := j*64
-				dst[i+63:i] := addr[63:0]
-			ESAC
-		_MM_BROADCAST_4X8:
-			mod := j%4
-			CASE conv OF
-			_MM_UPCONV_PD_NONE:
-				n := mod*64
-				dst[i+63:i] := addr[n+63:n]
-			ESAC
-		ESAC
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovapd" form="zmm {k}, m512" xed=""/>
-	<instruction name="vbroadcastf64x4" form="zmm {k}, m512" xed=""/>
-	<instruction name="vbroadcastsd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_extload_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
-	<parameter varname="bc" type="_MM_BROADCAST64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Depending on "bc", loads 1, 4, or 8 elements of type and size determined by "conv" from memory address "mt" and converts all elements to 64-bit integer elements, storing the results in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr = MEM[mt]
-FOR j := 0 to 7
-	i := j*64
-	CASE bc OF
-	_MM_BROADCAST64_NONE:
-		CASE conv OF
-		_MM_UPCONV_EPI64_NONE:
-			n := j*64
-			dst[i+63:i] := addr[n+63:n]
-		ESAC
-	_MM_BROADCAST_1X8:
-		CASE conv OF
-		_MM_UPCONV_EPI64_NONE:
-			n := j*64
-			dst[i+63:i] := addr[63:0]
-		ESAC
-	_MM_BROADCAST_4X8:
-		mod := j%4
-		CASE conv OF
-		_MM_UPCONV_EPI64_NONE:
-			n := mod*64
-			dst[i+63:i] := addr[n+63:n]
-		ESAC
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovdqa64" form="zmm {k}, m512" xed=""/>
-	<instruction name="vbroadcasti64x4" form="zmm {k}, m512" xed=""/>
-	<instruction name="vpbroadcastq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_extload_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
-	<parameter varname="bc" type="_MM_BROADCAST64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Depending on "bc", loads 1, 4, or 8 elements of type and size determined by "conv" from memory address "mt" and converts all elements to 64-bit integer elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr = MEM[mt]
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		CASE bc OF
-		_MM_BROADCAST64_NONE:
-			CASE conv OF
-			_MM_UPCONV_EPI64_NONE:
-				n := j*64
-				dst[i+63:i] := addr[n+63:n]
-			ESAC
-		_MM_BROADCAST_1X8:
-			CASE conv OF
-			_MM_UPCONV_EPI64_NONE:
-				n := j*64
-				dst[i+63:i] := addr[63:0]
-			ESAC
-		_MM_BROADCAST_4X8:
-			mod := j%4
-			CASE conv OF
-			_MM_UPCONV_EPI64_NONE:
-				n := mod*64
-				dst[i+63:i] := addr[n+63:n]
-			ESAC
-		ESAC
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovdqa64" form="m512 {k}, zmm" xed=""/>
-	<instruction name="vbroadcasti64x4" form="zmm {k}, m512" xed=""/>
-	<instruction name="vpbroadcastq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_swizzle_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="v" type="__m512"/>
-	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
-	<description>Performs a swizzle transformation of each of the four groups of packed 4xsingle-precision (32-bit) floating-point elements in "v" using swizzle parameter "s", storing the results in "dst".</description>
-	<operation>
-CASE s OF
-_MM_SWIZ_REG_NONE:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_DCBA:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_CDAB:
-	FOR j := 0 to 7
-		i := j*64
-		dst[i+31:i]    := v[i+63:i+32]
-		dst[i+63:i+32] := v[i+31:i]
-	ENDFOR
-_MM_SWIZ_REG_BADC:
-	FOR j := 0 to 3
-		i := j*128
-		dst[i+31:i]     := v[i+95:i+64]
-		dst[i+63:i+32]  := v[i+127:i+96]
-		dst[i+95:i+64]  := v[i+31:i]
-		dst[i+127:i+96] := v[i+63:i+32]
-	ENDFOR
-_MM_SWIZ_REG_AAAA:
-	FOR j := 0 to 3
-		i := j*128
-		dst[i+31:i]     := v[i+31:i]
-		dst[i+63:i+32]  := v[i+31:i]
-		dst[i+95:i+64]  := v[i+31:i]
-		dst[i+127:i+96] := v[i+31:i]
-	ENDFOR
-_MM_SWIZ_REG_BBBB:
-	FOR j := 0 to 3
-		i := j*128
-		dst[i+31:i]     := v[i+63:i+32]
-		dst[i+63:i+32]  := v[i+63:i+32]
-		dst[i+95:i+64]  := v[i+63:i+32]
-		dst[i+127:i+96] := v[i+63:i+32]
-	ENDFOR
-_MM_SWIZ_REG_CCCC:
-	FOR j := 0 to 3
-		i := j*128
-		dst[i+31:i]     := v[i+95:i+64]
-		dst[i+63:i+32]  := v[i+95:i+64]
-		dst[i+95:i+64]  := v[i+95:i+64]
-		dst[i+127:i+96] := v[i+95:i+64]
-	ENDFOR
-_MM_SWIZ_REG_DDDD:
-	FOR j := 0 to 3
-		i := j*128
-		dst[i+31:i]     := v[i+127:i+96]
-		dst[i+63:i+32]  := v[i+127:i+96]
-		dst[i+95:i+64]  := v[i+127:i+96]
-		dst[i+127:i+96] := v[i+127:i+96]
-	ENDFOR
-_MM_SWIZ_REG_DACB:
-	FOR j := 0 to 3
-		i := j*128
-		dst[i+31:i]     := v[i+63:i+32]
-		dst[i+63:i+32]  := v[i+95:i+64]
-		dst[i+95:i+64]  := v[i+31:i]
-		dst[i+127:i+96] := v[i+127:i+96]
-	ENDFOR
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_swizzle_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="v" type="__m512d"/>
-	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
-	<description>Performs a swizzle transformation of each of the two groups of packed 4x double-precision (64-bit) floating-point elements in "v" using swizzle parameter "s", storing the results in "dst".</description>
-	<operation>
-CASE s OF
-_MM_SWIZ_REG_NONE:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_DCBA:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_CDAB:
-	FOR j := 0 to 3
-		i := j*64
-		dst[i+63:i]     := v[i+127:i+64]
-		dst[i+127:i+64] := v[i+63:i]
-	ENDFOR
-_MM_SWIZ_REG_BADC:
-	FOR j := 0 to 1
-		i := j*256
-		dst[i+63:i]      := v[i+191:i+128]
-		dst[i+127:i+64]  := v[i+255:i+192]
-		dst[i+191:i+128] := v[i+63:i]
-		dst[i+255:i+192] := v[i+127:i+64]
-	ENDFOR
-_MM_SWIZ_REG_AAAA:
-	FOR j := 0 to 1
-		i := j*256
-		dst[i+63:i]      := v[i+63:i]
-		dst[i+127:i+64]  := v[i+63:i]
-		dst[i+191:i+128] := v[i+63:i]
-		dst[i+255:i+192] := v[i+63:i]
-	ENDFOR
-_MM_SWIZ_REG_BBBB:
-	FOR j := 0 to 1
-		i := j*256
-		dst[i+63:i]      := v[i+127:i+63]
-		dst[i+127:i+64]  := v[i+127:i+63]
-		dst[i+191:i+128] := v[i+127:i+63]
-		dst[i+255:i+192] := v[i+127:i+63]
-	ENDFOR
-_MM_SWIZ_REG_CCCC:
-	FOR j := 0 to 1
-		i := j*256
-		dst[i+63:i]      := v[i+191:i+128]
-		dst[i+127:i+64]  := v[i+191:i+128]
-		dst[i+191:i+128] := v[i+191:i+128]
-		dst[i+255:i+192] := v[i+191:i+128]
-	ENDFOR
-_MM_SWIZ_REG_DDDD:
-	FOR j := 0 to 1
-		i := j*256
-		dst[i+63:i]	     := v[i+255:i+192]
-		dst[i+127:i+64]  := v[i+255:i+192]
-		dst[i+191:i+128] := v[i+255:i+192]
-		dst[i+255:i+192] := v[i+255:i+192]
-	ENDFOR
-_MM_SWIZ_REG_DACB:
-	FOR j := 0 to 1
-		i := j*256
-		dst[i+63:i]	     := v[i+127:i+64]
-		dst[i+127:i+64]  := v[i+191:i+128]
-		dst[i+191:i+128] := v[i+63:i]
-		dst[i+255:i+192] := v[i+255:i+192]
-	ENDFOR
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_swizzle_epi32" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="v" type="__m512i"/>
-	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
-	<description>Performs a swizzle transformation of each of the four groups of packed 4x 32-bit integer elements in "v" using swizzle parameter "s", storing the results in "dst".</description>
-	<operation>
-CASE s OF
-_MM_SWIZ_REG_NONE:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_DCBA:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_CDAB:
-	FOR j := 0 to 7
-		i := j*64
-		dst[i+31:i]    := v[i+63:i+32]
-		dst[i+63:i+32] := v[i+31:i]
-	ENDFOR
-_MM_SWIZ_REG_BADC:
-	FOR j := 0 to 3
-		i := j*128
-		dst[i+31:i]	    := v[i+95:i+64]
-		dst[i+63:i+32]  := v[i+127:i+96]
-		dst[i+95:i+64]  := v[i+31:i]
-		dst[i+127:i+96] := v[i+63:i+32]
-	ENDFOR
-_MM_SWIZ_REG_AAAA:
-	FOR j := 0 to 3
-		i := j*128
-		dst[i+31:i]	    := v[i+31:i]
-		dst[i+63:i+32]  := v[i+31:i]
-		dst[i+95:i+64]  := v[i+31:i]
-		dst[i+127:i+96] := v[i+31:i]
-	ENDFOR
-_MM_SWIZ_REG_BBBB:
-	FOR j := 0 to 3
-		i := j*128
-		dst[i+31:i]	    := v[i+63:i+32]
-		dst[i+63:i+32]  := v[i+63:i+32]
-		dst[i+95:i+64]  := v[i+63:i+32]
-		dst[i+127:i+96] := v[i+63:i+32]
-	ENDFOR
-_MM_SWIZ_REG_CCCC:
-	FOR j := 0 to 3
-		i := j*128
-		dst[i+31:i]	    := v[i+95:i+64]
-		dst[i+63:i+32]  := v[i+95:i+64]
-		dst[i+95:i+64]  := v[i+95:i+64]
-		dst[i+127:i+96] := v[i+95:i+64]
-	ENDFOR
-_MM_SWIZ_REG_DDDD:
-	FOR j := 0 to 3
-		i := j*128
-		dst[i+31:i]	    := v[i+127:i+96]
-		dst[i+63:i+32]  := v[i+127:i+96]
-		dst[i+95:i+64]  := v[i+127:i+96]
-		dst[i+127:i+96] := v[i+127:i+96]
-	ENDFOR
-_MM_SWIZ_REG_DACB:
-	FOR j := 0 to 3
-		i := j*128
-		dst[i+31:i]	    := v[i+63:i+32]
-		dst[i+63:i+32]  := v[i+95:i+64]
-		dst[i+95:i+64]  := v[i+31:i]
-		dst[i+127:i+96] := v[i+127:i+96]
-	ENDFOR
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_swizzle_epi64" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="v" type="__m512i"/>
-	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
-	<description>Performs a swizzle transformation of each of the two groups of packed 4x64-bit integer elements in "v" using swizzle parameter "s", storing the results in "dst".</description>
-	<operation>
-CASE s OF
-_MM_SWIZ_REG_NONE:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_DCBA:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_CDAB:
-	FOR j := 0 to 3
-		i := j*64
-		dst[i+63:i]	    := v[i+127:i+64]
-		dst[i+127:i+64] := v[i+63:i]
-	ENDFOR
-_MM_SWIZ_REG_BADC:
-	FOR j := 0 to 1
-		i := j*256
-		dst[i+63:i]	     := v[i+191:i+128]
-		dst[i+127:i+64]  := v[i+255:i+192]
-		dst[i+191:i+128] := v[i+63:i]
-		dst[i+255:i+192] := v[i+127:i+64]
-	ENDFOR
-_MM_SWIZ_REG_AAAA:
-	FOR j := 0 to 1
-		i := j*256
-		dst[i+63:i]	     := v[i+63:i]
-		dst[i+127:i+64]  := v[i+63:i]
-		dst[i+191:i+128] := v[i+63:i]
-		dst[i+255:i+192] := v[i+63:i]
-	ENDFOR
-_MM_SWIZ_REG_BBBB:
-	FOR j := 0 to 1
-		i := j*256
-		dst[i+63:i]	     := v[i+127:i+63]
-		dst[i+127:i+64]  := v[i+127:i+63]
-		dst[i+191:i+128] := v[i+127:i+63]
-		dst[i+255:i+192] := v[i+127:i+63]
-	ENDFOR
-_MM_SWIZ_REG_CCCC:
-	FOR j := 0 to 1
-		i := j*256
-		dst[i+63:i]	     := v[i+191:i+128]
-		dst[i+127:i+64]  := v[i+191:i+128]
-		dst[i+191:i+128] := v[i+191:i+128]
-		dst[i+255:i+192] := v[i+191:i+128]
-	ENDFOR
-_MM_SWIZ_REG_DDDD:
-	FOR j := 0 to 1
-		i := j*256
-		dst[i+63:i]	     := v[i+255:i+192]
-		dst[i+127:i+64]  := v[i+255:i+192]
-		dst[i+191:i+128] := v[i+255:i+192]
-		dst[i+255:i+192] := v[i+255:i+192]
-	ENDFOR
-_MM_SWIZ_REG_DACB:
-	FOR j := 0 to 1
-		i := j*256
-		dst[i+63:i]	     := v[i+127:i+64]
-		dst[i+127:i+64]  := v[i+191:i+128]
-		dst[i+191:i+128] := v[i+63:i]
-		dst[i+255:i+192] := v[i+255:i+192]
-	ENDFOR
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_swizzle_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v" type="__m512"/>
-	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
-	<description>Performs a swizzle transformation of each of the four groups of packed 4x single-precision (32-bit) floating-point elements in "v" using swizzle parameter "s", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-CASE s OF
-_MM_SWIZ_REG_NONE:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_DCBA:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_CDAB:
-	FOR j := 0 to 7
-		i := j*64
-		IF k[j*2]
-			dst[i+31:i]	:= v[i+63:i+32]
-		ELSE
-			dst[i+31:i]	:= src[i+31:i]
-		FI
-		IF k[j*2+1]
-			dst[i+63:i+32] := v[i+31:i]
-		ELSE
-			dst[i+63:i+32] := src[i+63:i+32]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_BADC:
-	FOR j := 0 to 3
-		i := j*128
-		IF k[j*4]
-			dst[i+31:i]	 := v[i+95:i+64]
-		ELSE
-			dst[i+31:i]	 := src[i+31:i]
-		FI
-		IF k[j*4+1]
-			dst[i+63:i+32]  := v[i+127:i+96]
-		ELSE
-			dst[i+63:i+32]  := src[i+63:i+32]
-		FI
-		IF k[j*4+2]
-			dst[i+95:i+64]  := v[i+31:i]
-		ELSE
-			dst[i+95:i+64]  := src[i+95:i+64]
-		FI
-		IF k[j*4+3]
-			dst[i+127:i+96] := v[i+63:i+32]
-		ELSE
-			dst[i+127:i+96] := src[i+127:i+96]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_AAAA:
-	FOR j := 0 to 3
-		i := j*128
-		IF k[j*4]
-			dst[i+31:i]	 := v[i+31:i]
-		ELSE
-			dst[i+31:i]	 := src[i+31:i]
-		FI
-		IF k[j*4+1]
-			dst[i+63:i+32]  := v[i+31:i]
-		ELSE
-			dst[i+63:i+32]  := src[i+63:i+32]
-		FI
-		IF k[j*4+2]
-			dst[i+95:i+64]  := v[i+31:i]
-		ELSE
-			dst[i+95:i+64]  := src[i+95:i+64]
-		FI
-		IF k[j*4+3]
-			dst[i+127:i+96] := v[i+31:i]
-		ELSE
-			dst[i+127:i+96] := src[i+127:i+96]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_BBBB:
-	FOR j := 0 to 3
-		i := j*128
-		IF k[j*4]
-			dst[i+31:i]	 := v[i+63:i+32]
-		ELSE
-			dst[i+31:i]	 := src[i+31:i]
-		FI
-		IF k[j*4+1]
-			dst[i+63:i+32]  := v[i+63:i+32]
-		ELSE
-			dst[i+63:i+32]  := src[i+63:i+32]
-		FI
-		IF k[j*4+2]
-			dst[i+95:i+64]  := v[i+63:i+32]
-		ELSE
-			dst[i+95:i+64]  := src[i+95:i+64]
-		FI
-		IF k[j*4+3]
-			dst[i+127:i+96] := v[i+63:i+32]
-		ELSE
-			dst[i+127:i+96] := src[i+127:i+96]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_CCCC:
-	FOR j := 0 to 3
-		i := j*128
-		IF k[j*4]
-			dst[i+31:i]	 := v[i+95:i+64]
-		ELSE
-			dst[i+31:i]	 := src[i+31:i]
-		FI
-		IF k[j*4+1]
-			dst[i+63:i+32]  := v[i+95:i+64]
-		ELSE
-			dst[i+63:i+32]  := src[i+63:i+32]
-		FI
-		IF k[j*4+2]
-			dst[i+95:i+64]  := v[i+95:i+64]
-		ELSE
-			dst[i+95:i+64]  := src[i+95:i+64]
-		FI
-		IF k[j*4+3]
-			dst[i+127:i+96] := v[i+95:i+64]
-		ELSE
-			dst[i+127:i+96] := src[i+127:i+96]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_DDDD:
-	FOR j := 0 to 3
-		i := j*128
-		IF k[j*4]
-			dst[i+31:i]	 := v[i+127:i+96]
-		ELSE
-			dst[i+31:i]	 := src[i+31:i]
-		FI
-		IF k[j*4+1]
-			dst[i+63:i+32]  := v[i+127:i+96]
-		ELSE
-			dst[i+63:i+32]  := src[i+63:i+32]
-		FI
-		IF k[j*4+2]
-			dst[i+95:i+64]  := v[i+127:i+96]
-		ELSE
-			dst[i+95:i+64]  := src[i+95:i+64]
-		FI
-		IF k[j*4+3]
-			dst[i+127:i+96] := v[i+127:i+96]
-		ELSE
-			dst[i+127:i+96] := src[i+127:i+96]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_DACB:
-	FOR j := 0 to 3
-		i := j*128
-		IF k[j*4]
-			dst[i+31:i]	 := v[i+63:i+32]
-		ELSE
-			dst[i+31:i]	 := src[i+31:i]
-		FI
-		IF k[j*4+1]
-			dst[i+63:i+32]  := v[i+95:i+64]
-		ELSE
-			dst[i+63:i+32]  := src[i+63:i+32]
-		FI
-		IF k[j*4+2]
-			dst[i+95:i+64]  := v[i+31:i]
-		ELSE
-			dst[i+95:i+64]  := src[i+95:i+64]
-		FI
-		IF k[j*4+3]
-			dst[i+127:i+96] := v[i+127:i+96]
-		ELSE
-			dst[i+127:i+96] := src[i+127:i+96]
-		FI
-	ENDFOR
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_swizzle_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v" type="__m512d"/>
-	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
-	<description>Performs a swizzle transformation of each of the two groups of packed 4x double-precision (64-bit) floating-point elements in "v" using swizzle parameter "s", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-CASE s OF
-_MM_SWIZ_REG_NONE:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_DCBA:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_CDAB:
-	FOR j := 0 to 3
-		i := j*64
-		IF k[j*2]
-			dst[i+63:i]	 := v[i+127:i+64]
-		ELSE
-			dst[i+63:i]	 := src[i+63:i]
-		FI
-		IF k[j*2+1]
-			dst[i+127:i+64] := v[i+63:i]
-		ELSE
-			dst[i+127:i+64] := src[i+127:i+64]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_BADC:
-	FOR j := 0 to 1
-		i := j*256
-		IF k[j*4]
-			dst[i+63:i]	  := v[i+191:i+128]
-		ELSE
-			dst[i+63:i]	  := src[i+63:i]
-		FI
-		IF k[j*4+1]
-			dst[i+127:i+64]  := v[i+255:i+192]
-		ELSE
-			dst[i+127:i+64]  := src[i+127:i+64]
-		FI
-		IF k[j*4+2]
-			dst[i+191:i+128] := v[i+63:i]
-		ELSE
-			dst[i+191:i+128] := src[i+191:i+128]
-		FI
-		IF k[j*4+3]
-			dst[i+255:i+192] := v[i+127:i+64]
-		ELSE
-			dst[i+255:i+192] := src[i+255:i+192]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_AAAA:
-	FOR j := 0 to 1
-		i := j*256
-		IF k[j*4]
-			dst[i+63:i]	  := v[i+63:i]
-		ELSE
-			dst[i+63:i]	  := src[i+63:i]
-		FI
-		IF k[j*4+1]
-			dst[i+127:i+64]  := v[i+63:i]
-		ELSE
-			dst[i+127:i+64]  := src[i+127:i+64]
-		FI
-		IF k[j*4+2]
-			dst[i+191:i+128] := v[i+63:i]
-		ELSE
-			dst[i+191:i+128] := src[i+191:i+128]
-		FI
-		IF k[j*4+3]
-			dst[i+255:i+192] := v[i+63:i]
-		ELSE
-			dst[i+255:i+192] := src[i+255:i+192]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_BBBB:
-	FOR j := 0 to 1
-		i := j*256
-		IF k[j*4]
-			dst[i+63:i]	  := v[i+127:i+63]
-		ELSE
-			dst[i+63:i]	  := src[i+63:i]
-		FI
-		IF k[j*4+1]
-			dst[i+127:i+64]  := v[i+127:i+63]
-		ELSE
-			dst[i+127:i+64]  := src[i+127:i+64]
-		FI
-		IF k[j*4+2]
-			dst[i+191:i+128] := v[i+127:i+63]
-		ELSE
-			dst[i+191:i+128] := src[i+191:i+128]
-		FI
-		IF k[j*4+3]
-			dst[i+255:i+192] := v[i+127:i+63]
-		ELSE
-			dst[i+255:i+192] := src[i+255:i+192]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_CCCC:
-	FOR j := 0 to 1
-		i := j*256
-		IF k[j*4]
-			dst[i+63:i]	  := v[i+191:i+128]
-		ELSE
-			dst[i+63:i]	  := src[i+63:i]
-		FI
-		IF k[j*4+1]
-			dst[i+127:i+64]  := v[i+191:i+128]
-		ELSE
-			dst[i+127:i+64]  := src[i+127:i+64]
-		FI
-		IF k[j*4+2]
-			dst[i+191:i+128] := v[i+191:i+128]
-		ELSE
-			dst[i+191:i+128] := src[i+191:i+128]
-		FI
-		IF k[j*4+3]
-			dst[i+255:i+192] := v[i+191:i+128]
-		ELSE
-			dst[i+255:i+192] := src[i+255:i+192]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_DDDD:
-	FOR j := 0 to 1
-		i := j*256
-		IF k[j*4]
-			dst[i+63:i]	  := v[i+255:i+192]
-		ELSE
-			dst[i+63:i]	  := src[i+63:i]
-		FI
-		IF k[j*4+1]
-			dst[i+127:i+64]  := v[i+255:i+192]
-		ELSE
-			dst[i+127:i+64]  := src[i+127:i+64]
-		FI
-		IF k[j*4+2]
-			dst[i+191:i+128] := v[i+255:i+192]
-		ELSE
-			dst[i+191:i+128] := src[i+191:i+128]
-		FI
-		IF k[j*4+3]
-			dst[i+255:i+192] := v[i+255:i+192]
-		ELSE
-			dst[i+255:i+192] := src[i+255:i+192]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_DACB:
-	FOR j := 0 to 1
-		i := j*256
-		IF k[j*4]
-			dst[i+63:i]	  := v[i+127:i+64]
-		ELSE
-			dst[i+63:i]	  := src[i+63:i]
-		FI
-		IF k[j*4+1]
-			dst[i+127:i+64]  := v[i+191:i+128]
-		ELSE
-			dst[i+127:i+64]  := src[i+127:i+64]
-		FI
-		IF k[j*4+2]
-			dst[i+191:i+128] := v[i+63:i]
-		ELSE
-			dst[i+191:i+128] := src[i+191:i+128]
-		FI
-		IF k[j*4+3]
-			dst[i+255:i+192] := v[i+255:i+192]
-		ELSE
-			dst[i+255:i+192] := src[i+255:i+192]
-		FI
-	ENDFOR
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_swizzle_epi32" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v" type="__m512i"/>
-	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
-	<description>Performs a swizzle transformation of each of the four groups of packed 4x32-bit integer elements in "v" using swizzle parameter "s", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-CASE s OF
-_MM_SWIZ_REG_NONE:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_DCBA:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_CDAB:
-	FOR j := 0 to 7
-		i := j*64
-		IF k[j*2]
-			dst[i+31:i]	:= v[i+63:i+32]
-		ELSE
-			dst[i+31:i]	:= src[i+31:i]
-		FI
-		IF k[j*2+1]
-			dst[i+63:i+32] := v[i+31:i]
-		ELSE
-			dst[i+63:i+32] := src[i+63:i+32]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_BADC:
-	FOR j := 0 to 3
-		i := j*128
-		IF k[j*4]
-			dst[i+31:i]	 := v[i+95:i+64]
-		ELSE
-			dst[i+31:i]	 := src[i+31:i]
-		FI
-		IF k[j*4+1]
-			dst[i+63:i+32]  := v[i+127:i+96]
-		ELSE
-			dst[i+63:i+32]  := src[i+63:i+32]
-		FI
-		IF k[j*4+2]
-			dst[i+95:i+64]  := v[i+31:i]
-		ELSE
-			dst[i+95:i+64]  := src[i+95:i+64]
-		FI
-		IF k[j*4+3]
-			dst[i+127:i+96] := v[i+63:i+32]
-		ELSE
-			dst[i+127:i+96] := src[i+127:i+96]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_AAAA:
-	FOR j := 0 to 3
-		i := j*128
-		IF k[j*4]
-			dst[i+31:i]	 := v[i+31:i]
-		ELSE
-			dst[i+31:i]	 := src[i+31:i]
-		FI
-		IF k[j*4+1]
-			dst[i+63:i+32]  := v[i+31:i]
-		ELSE
-			dst[i+63:i+32]  := src[i+63:i+32]
-		FI
-		IF k[j*4+2]
-			dst[i+95:i+64]  := v[i+31:i]
-		ELSE
-			dst[i+95:i+64]  := src[i+95:i+64]
-		FI
-		IF k[j*4+3]
-			dst[i+127:i+96] := v[i+31:i]
-		ELSE
-			dst[i+127:i+96] := src[i+127:i+96]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_BBBB:
-	FOR j := 0 to 3
-		i := j*128
-		IF k[j*4]
-			dst[i+31:i]	 := v[i+63:i+32]
-		ELSE
-			dst[i+31:i]	 := src[i+31:i]
-		FI
-		IF k[j*4+1]
-			dst[i+63:i+32]  := v[i+63:i+32]
-		ELSE
-			dst[i+63:i+32]  := src[i+63:i+32]
-		FI
-		IF k[j*4+2]
-			dst[i+95:i+64]  := v[i+63:i+32]
-		ELSE
-			dst[i+95:i+64]  := src[i+95:i+64]
-		FI
-		IF k[j*4+3]
-			dst[i+127:i+96] := v[i+63:i+32]
-		ELSE
-			dst[i+127:i+96] := src[i+127:i+96]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_CCCC:
-	FOR j := 0 to 3
-		i := j*128
-		IF k[j*4]
-			dst[i+31:i]	 := v[i+95:i+64]
-		ELSE
-			dst[i+31:i]	 := src[i+31:i]
-		FI
-		IF k[j*4+1]
-			dst[i+63:i+32]  := v[i+95:i+64]
-		ELSE
-			dst[i+63:i+32]  := src[i+63:i+32]
-		FI
-		IF k[j*4+2]
-			dst[i+95:i+64]  := v[i+95:i+64]
-		ELSE
-			dst[i+95:i+64]  := src[i+95:i+64]
-		FI
-		IF k[j*4+3]
-			dst[i+127:i+96] := v[i+95:i+64]
-		ELSE
-			dst[i+127:i+96] := src[i+127:i+96]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_DDDD:
-	FOR j := 0 to 3
-		i := j*128
-		IF k[j*4]
-			dst[i+31:i]	 := v[i+127:i+96]
-		ELSE
-			dst[i+31:i]	 := src[i+31:i]
-		FI
-		IF k[j*4+1]
-			dst[i+63:i+32]  := v[i+127:i+96]
-		ELSE
-			dst[i+63:i+32]  := src[i+63:i+32]
-		FI
-		IF k[j*4+2]
-			dst[i+95:i+64]  := v[i+127:i+96]
-		ELSE
-			dst[i+95:i+64]  := src[i+95:i+64]
-		FI
-		IF k[j*4+3]
-			dst[i+127:i+96] := v[i+127:i+96]
-		ELSE
-			dst[i+127:i+96] := src[i+127:i+96]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_DACB:
-	FOR j := 0 to 3
-		i := j*128
-		IF k[j*4]
-			dst[i+31:i]	 := v[i+63:i+32]
-		ELSE
-			dst[i+31:i]	 := src[i+31:i]
-		FI
-		IF k[j*4+1]
-			dst[i+63:i+32]  := v[i+95:i+64]
-		ELSE
-			dst[i+63:i+32]  := src[i+63:i+32]
-		FI
-		IF k[j*4+2]
-			dst[i+95:i+64]  := v[i+31:i]
-		ELSE
-			dst[i+95:i+64]  := src[i+95:i+64]
-		FI
-		IF k[j*4+3]
-			dst[i+127:i+96] := v[i+127:i+96]
-		ELSE
-			dst[i+127:i+96] := src[i+127:i+96]
-		FI
-	ENDFOR
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_swizzle_epi64" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v" type="__m512i"/>
-	<parameter varname="s" type="_MM_SWIZZLE_ENUM"/>
-	<description>Performs a swizzle transformation of each of the four groups of packed 4x64-bit integer elements in "v" using swizzle parameter "s", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-CASE s OF
-_MM_SWIZ_REG_NONE:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_DCBA:
-	dst[511:0] := v[511:0]
-_MM_SWIZ_REG_CDAB:
-	FOR j := 0 to 3
-		i := j*64
-		IF k[j*2]
-			dst[i+63:i]	 := v[i+127:i+64]
-		ELSE
-			dst[i+63:i]	 := src[i+63:i]
-		FI
-		IF k[j*2+1]
-			dst[i+127:i+64] := v[i+63:i]
-		ELSE
-			dst[i+127:i+64] := src[i+127:i+64]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_BADC:
-	FOR j := 0 to 1
-		i := j*256
-		IF k[j*4]
-			dst[i+63:i]	  := v[i+191:i+128]
-		ELSE
-			dst[i+63:i]	  := src[i+63:i]
-		FI
-		IF k[j*4+1]
-			dst[i+127:i+64]  := v[i+255:i+192]
-		ELSE
-			dst[i+127:i+64]  := src[i+127:i+64]
-		FI
-		IF k[j*4+2]
-			dst[i+191:i+128] := v[i+63:i]
-		ELSE
-			dst[i+191:i+128] := src[i+191:i+128]
-		FI
-		IF k[j*4+3]
-			dst[i+255:i+192] := v[i+127:i+64]
-		ELSE
-			dst[i+255:i+192] := src[i+255:i+192]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_AAAA:
-	FOR j := 0 to 1
-		i := j*256
-		IF k[j*4]
-			dst[i+63:i]	  := v[i+63:i]
-		ELSE
-			dst[i+63:i]	  := src[i+63:i]
-		FI
-		IF k[j*4+1]
-			dst[i+127:i+64]  := v[i+63:i]
-		ELSE
-			dst[i+127:i+64]  := src[i+127:i+64]
-		FI
-		IF k[j*4+2]
-			dst[i+191:i+128] := v[i+63:i]
-		ELSE
-			dst[i+191:i+128] := src[i+191:i+128]
-		FI
-		IF k[j*4+3]
-			dst[i+255:i+192] := v[i+63:i]
-		ELSE
-			dst[i+255:i+192] := src[i+255:i+192]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_BBBB:
-	FOR j := 0 to 1
-		i := j*256
-		IF k[j*4]
-			dst[i+63:i]	  := v[i+127:i+63]
-		ELSE
-			dst[i+63:i]	  := src[i+63:i]
-		FI
-		IF k[j*4+1]
-			dst[i+127:i+64]  := v[i+127:i+63]
-		ELSE
-			dst[i+127:i+64]  := src[i+127:i+64]
-		FI
-		IF k[j*4+2]
-			dst[i+191:i+128] := v[i+127:i+63]
-		ELSE
-			dst[i+191:i+128] := src[i+191:i+128]
-		FI
-		IF k[j*4+3]
-			dst[i+255:i+192] := v[i+127:i+63]
-		ELSE
-			dst[i+255:i+192] := src[i+255:i+192]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_CCCC:
-	FOR j := 0 to 1
-		i := j*256
-		IF k[j*4]
-			dst[i+63:i]	  := v[i+191:i+128]
-		ELSE
-			dst[i+63:i]	  := src[i+63:i]
-		FI
-		IF k[j*4+1]
-			dst[i+127:i+64]  := v[i+191:i+128]
-		ELSE
-			dst[i+127:i+64]  := src[i+127:i+64]
-		FI
-		IF k[j*4+2]
-			dst[i+191:i+128] := v[i+191:i+128]
-		ELSE
-			dst[i+191:i+128] := src[i+191:i+128]
-		FI
-		IF k[j*4+3]
-			dst[i+255:i+192] := v[i+191:i+128]
-		ELSE
-			dst[i+255:i+192] := src[i+255:i+192]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_DDDD:
-	FOR j := 0 to 1
-		i := j*256
-		IF k[j*4]
-			dst[i+63:i]	  := v[i+255:i+192]
-		ELSE
-			dst[i+63:i]	  := src[i+63:i]
-		FI
-		IF k[j*4+1]
-			dst[i+127:i+64]  := v[i+255:i+192]
-		ELSE
-			dst[i+127:i+64]  := src[i+127:i+64]
-		FI
-		IF k[j*4+2]
-			dst[i+191:i+128] := v[i+255:i+192]
-		ELSE
-			dst[i+191:i+128] := src[i+191:i+128]
-		FI
-		IF k[j*4+3]
-			dst[i+255:i+192] := v[i+255:i+192]
-		ELSE
-			dst[i+255:i+192] := src[i+255:i+192]
-		FI
-	ENDFOR
-_MM_SWIZ_REG_DACB:
-	FOR j := 0 to 1
-		i := j*256
-		IF k[j*4]
-			dst[i+63:i]	  := v[i+127:i+64]
-		ELSE
-			dst[i+63:i]	  := src[i+63:i]
-		FI
-		IF k[j*4+1]
-			dst[i+127:i+64]  := v[i+191:i+128]
-		ELSE
-			dst[i+127:i+64]  := src[i+127:i+64]
-		FI
-		IF k[j*4+2]
-			dst[i+191:i+128] := v[i+63:i]
-		ELSE
-			dst[i+191:i+128] := src[i+191:i+128]
-		FI
-		IF k[j*4+3]
-			dst[i+255:i+192] := v[i+255:i+192]
-		ELSE
-			dst[i+255:i+192] := src[i+255:i+192]
-		FI
-	ENDFOR
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_extstore_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v" type="__m512"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Downconverts packed single-precision (32-bit) floating-point elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt". "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr := MEM[mt]		
-FOR j := 0 to 15
-	i := j*32
-	CASE conv OF
-	_MM_DOWNCONV_PS_NONE:
-		addr[i+31:i] := v[i+31:i]
-	_MM_DOWNCONV_PS_FLOAT16:
-		n := j*16
-		addr[n+15:n] := Float32ToFloat16(v[i+31:i])
-	_MM_DOWNCONV_PS_UINT8:
-		n := j*8
-		addr[n+7:n] := Float32ToUInt8(v[i+31:i])
-	_MM_DOWNCONV_PS_SINT8:
-		n := j*8
-		addr[n+7:n] := Float32ToSInt8(v[i+31:i])
-	_MM_DOWNCONV_PS_UINT16:
-		n := j*16
-		addr[n+15:n] := Float32ToUInt16(v[i+31:i])
-	_MM_DOWNCONV_PS_SINT16:
-		n := j*16
-		addr[n+15:n] := Float32ToSInt16(v[i+31:i])
-	ESAC
-ENDFOR
-	</operation>
-	<instruction name="vmovaps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_extstore_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Downconverts packed 32-bit integer elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt". "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr := MEM[mt]
-FOR j := 0 to 15
-	i := j*32
-	CASE conv OF
-	_MM_DOWNCONV_EPI32_NONE:
-		addr[i+31:i] := v[i+31:i]
-	_MM_DOWNCONV_EPI32_UINT8:
-		n := j*8
-		addr[n+7:n] := Int32ToUInt8(v[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT8:
-		n := j*8
-		addr[n+7:n] := Int32ToSInt8(v[i+31:i])
-	_MM_DOWNCONV_EPI32_UINT16:
-		n := j*16
-		addr[n+15:n] := Int32ToUInt16(v[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT16:
-		n := j*16
-		addr[n+15:n] := Int32ToSInt16(v[i+31:i])
-	ESAC
-ENDFOR
-	</operation>
-	<instruction name="vmovdqa32" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_extstore_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v" type="__m512d"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Downconverts packed double-precision (64-bit) floating-point elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt". "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr := MEM[mt]
-FOR j := 0 to 7
-	i := j*64
-	CASE conv OF
-	_MM_DOWNCONV_PS_NONE:
-		addr[i+63:i] := v[i+63:i]
-	ESAC
-ENDFOR
-	</operation>
-	<instruction name="vmovapd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_extstore_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Downconverts packed 64-bit integer elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt". "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr := MEM[mt]
-FOR j := 0 to 7
-	i := j*64
-	CASE conv OF
-	_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v[i+63:i]
-	ESAC
-ENDFOR
-	</operation>
-	<instruction name="vmovdqa64" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extstore_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v" type="__m512"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Downconverts packed single-precision (32-bit) floating-point elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt" using writemask "k" (elements are not written to memory when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		CASE conv OF
-		_MM_DOWNCONV_PS_NONE:
-			mt[i+31:i] := v[i+31:i]
-		_MM_DOWNCONV_PS_FLOAT16:
-			n := j*16
-			mt[n+15:n] := Float32ToFloat16(v[i+31:i])
-		_MM_DOWNCONV_PS_UINT8:
-			n := j*8
-			mt[n+7:n] := Float32ToUInt8(v[i+31:i])
-		_MM_DOWNCONV_PS_SINT8:
-			n := j*8
-			mt[n+7:n] := Float32ToSInt8(v[i+31:i])
-		_MM_DOWNCONV_PS_UINT16:
-			n := j*16
-			mt[n+15:n] := Float32ToUInt16(v[i+31:i])
-		_MM_DOWNCONV_PS_SINT16:
-			n := j*16
-			mt[n+15:n] := Float32ToSInt16(v[i+31:i])
-		ESAC
-	 FI
-ENDFOR
-	</operation>
-	<instruction name="vmovaps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extstore_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v" type="__m512d"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Downconverts packed double-precision (64-bit) floating-point elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt" (elements in "mt" are unaltered when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr := MEM[mt]		
-FOR j := 0 to 7
-	i := j*64
-	CASE conv OF
-	_MM_DOWNCONV_PD_NONE:
-		IF k[j]
-			mt[i+63:i] := v[i+63:i]
-		FI
-	ESAC
-ENDFOR
-	</operation>
-	<instruction name="vmovapd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extstore_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Downconverts packed 32-bit integer elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt" (elements in "mt" are unaltered when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr := MEM[mt]
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		CASE conv OF
-		_MM_DOWNCONV_EPI32_NONE:
-			addr[i+31:i] := v[i+31:i]
-		_MM_DOWNCONV_EPI32_UINT8:
-			n := j*8
-			addr[n+7:n] := Int32ToUInt8(v[i+31:i])
-		_MM_DOWNCONV_EPI32_SINT8:
-			n := j*8
-			addr[n+7:n] := Int32ToSInt8(v[i+31:i])
-		_MM_DOWNCONV_EPI32_UINT16:
-			n := j*16
-			addr[n+15:n] := Int32ToUInt16(v[i+31:i])
-		_MM_DOWNCONV_EPI32_SINT16:
-			n := j*16
-			addr[n+15:n] := Int32ToSInt16(v[i+31:i])
-		ESAC
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovdqa32" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extstore_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Downconverts packed 64-bit integer elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt" (elements in "mt" are unaltered when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-addr := MEM[mt]
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		CASE conv OF
-		_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v[i+63:i]
-		ESAC
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovdqa64" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_storenr_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v" type="__m512"/>
-	<description>Stores packed single-precision (32-bit) floating-point elements from "v" to memory address "mt" with a no-read hint to the processor.</description>
-	<operation>
-addr := MEM[mt]
-FOR j := 0 to 15
-	i := j*32
-	addr[i+31:i] := v[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name="vmovnraps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_storenr_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v" type="__m512d"/>
-	<description>Stores packed double-precision (64-bit) floating-point elements from "v" to memory address "mt" with a no-read hint to the processor.</description>
-	<operation>
-addr := MEM[mt]
-FOR j := 0 to 7
-	i := j*64
-	addr[i+63:i] := v[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name="vmovnrapd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_storenrngo_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v" type="__m512"/>
-	<description>Stores packed single-precision (32-bit) floating-point elements from "v" to memory address "mt" with a no-read hint and using a weakly-ordered memory consistency model (stores performed with this function are not globally ordered, and subsequent stores from the same thread can be observed before them).</description>
-	<operation>
-addr := MEM[mt]
-FOR j := 0 to 15
-	i := j*32
-	addr[i+31:i] := v[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name="vmovnrngoaps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_storenrngo_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v" type="__m512d"/>
-	<description>Stores packed double-precision (64-bit) floating-point elements from "v" to memory address "mt" with a no-read hint and using a weakly-ordered memory consistency model (stores performed with this function are not globally ordered, and subsequent stores from the same thread can be observed before them).</description>
-	<operation>
-addr := MEM[mt]
-FOR j := 0 to 7
-	i := j*64
-	addr[i+63:i] := v[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name="vmovnrngoapd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_adc_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="k2_res" type="__mmask16 *"/>
-	<description>Performs element-by-element addition of packed 32-bit integers in "v2" and "v3" and the corresponding bit in "k2", storing the result of the addition in "dst" and the result of the carry in "k2_res".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k2_res[j]   := Carry(v2[i+31:i] + v3[i+31:i] + k2[j])
-	dst[i+31:i] := v2[i+31:i] + v3[i+31:i] + k2[j]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpadcd" form="zmm {k}, k, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_adc_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="k2_res" type="__mmask16 *"/>
-	<description>Performs element-by-element addition of packed 32-bit integers in "v2" and "v3" and the corresponding bit in "k2", storing the result of the addition in "dst" and the result of the carry in "k2_res" using writemask "k1" (elements are copied from "v2" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k2_res[j]   := Carry(v2[i+31:i] + v3[i+31:i] + k2[j])
-		dst[i+31:i] := v2[i+31:i] + v3[i+31:i] + k2[j]
-	ELSE
-		dst[i+31:i] := v2[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpadcd" form="zmm {k}, k, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_addn_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="v3" type="__m512d"/>
-	<description>Performs element-by-element addition between packed double-precision (64-bit) floating-point elements in "v2" and "v3" and negates their sum, storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := -(v2[i+63:i] + v3[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vaddnpd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_addn_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="v3" type="__m512d"/>
-	<description>Performs element-by-element addition between packed double-precision (64-bit) floating-point elements in "v2" and "v3" and negates their sum, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(v2[i+63:i] + v3[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vaddnpd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_addn_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512"/>
-	<description>Performs element-by-element addition between packed single-precision (32-bit) floating-point elements in "v2" and "v3" and negates their sum, storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := -(v2[i+31:i] + v3[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vaddnps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_addn_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512"/>
-	<description>Performs element-by-element addition between packed single-precision (32-bit) floating-point elements in "v2" and "v3" and negates their sum, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(v2[i+31:i] + v3[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vaddnps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_addn_round_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="v3" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs element by element addition between packed double-precision (64-bit) floating-point elements in "v2" and "v3" and negates the sum, storing the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := -(v2[i+63:i] + v3[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vaddnpd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_addn_round_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="v3" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs element by element addition between packed double-precision (64-bit) floating-point elements in "v2" and "v3" and negates the sum, storing the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(v2[i+63:i] + v3[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vaddnpd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_addn_round_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs element by element addition between packed single-precision (32-bit) floating-point elements in "v2" and "v3" and negates the sum, storing the result in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := -(v2[i+31:i] + v3[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vaddnps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_addn_round_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs element by element addition between packed single-precision (32-bit) floating-point elements in "v2" and "v3" and negates the sum, storing the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(v2[i+31:i] + v3[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vaddnps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_subr_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="v3" type="__m512d"/>
-	<description>Performs element-by-element subtraction of packed double-precision (64-bit) floating-point elements in "v2" from "v3" storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := v3[i+63:i] - v2[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vsubrpd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_subr_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="v3" type="__m512d"/>
-	<description>Performs element-by-element subtraction of packed double-precision (64-bit) floating-point elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := v3[i+63:i] - v2[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vsubrpd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_subr_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512"/>
-	<description>Performs element-by-element subtraction of packed single-precision (32-bit) floating-point elements in "v2" from "v3" storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vsubrps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_subr_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512"/>
-	<description>Performs element-by-element subtraction of packed single-precision (32-bit) floating-point elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vsubrps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_subr_round_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="v3" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs element-by-element subtraction of packed double-precision (64-bit) floating-point elements in "v2" from "v3" storing the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := v3[i+63:i] - v2[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vsubrpd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_subr_round_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="v3" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs element-by-element subtraction of packed double-precision (64-bit) floating-point elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := v3[i+63:i] - v2[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vsubrpd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_subr_round_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs element-by-element subtraction of packed single-precision (32-bit) floating-point elements in "v2" from "v3" storing the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := v3[i+31:i] - v2[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vsubrps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_subr_round_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs element-by-element subtraction of packed single-precision (32-bit) floating-point elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vsubrps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_subr_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="v3" type="__m512i"/>
-	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v2" from "v3" storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubrd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_subr_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="v3" type="__m512i"/>
-	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set)</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubrd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_addsetc_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="k2_res" type="__mmask16 *"/>
-	<description>Performs element-by-element addition of packed 32-bit integer elements in "v2" and "v3", storing the resultant carry in "k2_res" (carry flag) and the addition results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
-	k2_res[j] := Carry(v2[i+31:i] + v3[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddsetcd" form="zmm {k}, k, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_addsetc_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="k_old" type="__mmask16"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="k2_res" type="__mmask16 *"/>
-	<description>Performs element-by-element addition of packed 32-bit integer elements in "v2" and "v3", storing the resultant carry in "k2_res" (carry flag) and the addition results in "dst" using writemask "k" (elements are copied from "v2" and "k_old" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
-	ELSE
-		dst[i+31:i] := v2[i+31:i]
-		k2_res[j] := k_old[j]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddsetcd" form="zmm {k}, k, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_addsets_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="sign" type="__mmask16 *"/>
-	<description>Performs an element-by-element addition of packed 32-bit integer elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
-	sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddsetsd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_addsets_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="sign" type="__mmask16 *"/>
-	<description>Performs an element-by-element addition of packed 32-bit integer elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag). Results are stored using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
-		sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddsetsd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_addsets_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512"/>
-	<parameter varname="sign" type="__mmask16 *"/>
-	<description>Performs an element-by-element addition of packed single-precision (32-bit) floating-point elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
-	sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vaddsetsps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_addsets_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512"/>
-	<parameter varname="sign" type="__mmask16 *"/>
-	<description>Performs an element-by-element addition of packed single-precision (32-bit) floating-point elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag). Results are stored using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
-		sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vaddsetsps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_addsets_round_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512"/>
-	<parameter varname="sign" type="__mmask16 *"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs an element-by-element addition of packed single-precision (32-bit) floating-point elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
-	sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vaddsetsps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_addsets_round_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512"/>
-	<parameter varname="sign" type="__mmask16 *"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs an element-by-element addition of packed single-precision (32-bit) floating-point elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag). Results are stored using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
-		sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vaddsetsps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_subsetb_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="borrow" type="__mmask16 *"/>
-	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v3" from "v2", storing the results in "dst" and the nth borrow bit in the nth position of "borrow" (borrow flag).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := v2[i+31:i] - v3[i+31:i]
-	borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubsetbd" form="zmm {k}, k, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_subsetb_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="k_old" type="__mmask16"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="borrow" type="__mmask16 *"/>
-	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v3" from "v2", storing the results in "dst" and the nth borrow bit in the nth position of "borrow" (borrow flag). Results are stored using writemask "k" (elements are copied from "v2" and "k_old" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := v2[i+31:i] - v3[i+31:i]
-		borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-		borrow[j] := k_old[j]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubsetbd" form="zmm {k}, k, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_subrsetb_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="borrow" type="__mmask16 *"/>
-	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v2" from "v3", storing the results in "dst" and "v2". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
-	borrow[j] := Borrow(v3[i+31:i] - v2[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubrsetbd" form="zmm {k}, k, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_subrsetb_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="k_old" type="__mmask16"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="borrow" type="__mmask16 *"/>
-	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v2" from "v3", storing the results in "dst" and "v2". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are written using writemask "k" (elements are copied from "k" to "k_old" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		diff := v3[i+31:i] - v2[i+31:i]
-		borrow[j] := Borrow(v3[i+31:i] - v2[i+31:i])
-		dst[i+31:i] := diff
-		v2[i+31:i] := diff
-	ELSE
-		borrow[j] := k_old[j]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubrsetbd" form="zmm {k}, k, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_sbb_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="borrow" type="__mmask16 *"/>
-	<description>Performs element-by-element three-input subtraction of packed 32-bit integer elements of "v3" as well as the corresponding bit from "k" from "v2". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are stored in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := v2[i+31:i] - v3[i+31:i] - k[j]
-	borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i] - k[j])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsbbd" form="zmm {k}, k, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_sbb_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="borrow" type="__mmask16 *"/>
-	<description>Performs element-by-element three-input subtraction of packed 32-bit integer elements of "v3" as well as the corresponding bit from "k2" from "v2". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are stored in "dst" using writemask "k1" (elements are copied from "v2" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		dst[i+31:i] := v2[i+31:i] - v3[i+31:i] - k2[j]
-		borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i] - k2[j])
-	ELSE
-		dst[i+31:i] := v2[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsbbd" form="zmm {k}, k, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_sbbr_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="borrow" type="__mmask16 *"/>
-	<description>Performs element-by-element three-input subtraction of packed 32-bit integer elements of "v2" as well as the corresponding bit from "k" from "v3". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are stored in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := v3[i+31:i] - v2[i+31:i] - k[j]
-	borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i] - k[j])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsbbrd" form="zmm {k}, k, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_sbbr_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<parameter varname="v3" type="__m512i"/>
-	<parameter varname="borrow" type="__mmask16 *"/>
-	<description>Performs element-by-element three-input subtraction of packed 32-bit integer elements of "v2" as well as the corresponding bit from "k2" from "v3". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are stored in "dst" using writemask "k1" (elements are copied from "v2" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		dst[i+31:i] := v3[i+31:i] - v2[i+31:i] - k2[j]
-		borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i] - k[j])
-	ELSE
-		dst[i+31:i] := v2[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsbbrd" form="zmm {k}, k, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_and_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="v3" type="__m512i"/>
-	<description>Performs element-by-element bitwise AND between packed 32-bit integer elements of "v2" and "v3", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := v2[i+31:i] &amp; v3[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpandd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_cvt_roundpd_pslo">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to packed single-precision (32-bit) floating-point elements, storing the results in "dst". Results are written to the lower half of "dst", and the upper half locations are set to '0'.
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k := j*32
-	dst[k+31:k] := Float64ToFloat32(v2[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2ps" form="zmm {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_cvt_roundpd_pslo">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to packed single-precision (32-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Results are written to the lower half of "dst", and the upper half locations are set to '0'.
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := Float64ToFloat32(v2[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2ps" form="zmm {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_cvtfxpnt_roundpd_epu32lo">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to packed 32-bit unsigned integer elements, storing the results in "dst". Results are written to the lower half of "dst", and the upper half locations are set to '0'.
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k := j*32
-	dst[k+31:k] := Float64ToInt32(v2[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtfxpntpd2udq" form="zmm {k}, zmm, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_cvtfxpnt_roundpd_epu32lo">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to packed 32-bit unsigned integer elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Results are written to the lower half of "dst", and the upper half locations are set to '0'.
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := Float64ToInt32(v2[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtfxpntpd2udq" form="zmm {k}, zmm, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_cvtpslo_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="v2" type="__m512"/>
-	<description>Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k := j*64
-	dst[k+63:k] := Float32ToFloat64(v2[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2pd" form="zmm {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_cvtpslo_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v2" type="__m512"/>
-	<description>Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[l+63:l] := Float32ToFloat64(v2[i+31:i])
-	ELSE
-		dst[l+63:l] := src[l+63:l]:
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2pd" form="zmm {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_cvtfxpnt_round_adjustps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
-	<description>Performs element-by-element conversion of packed single-precision (32-bit) floating-point elements in "v2" to packed 32-bit integer elements and performs an optional exponent adjust using "expadj", storing the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := Float32ToInt32(v2[i+31:i])
-	CASE expadj OF
-	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
-	_MM_EXPADJ_4:	 dst[i+31:i] = dst[i+31:i] * 2**4
-	_MM_EXPADJ_5:	 dst[i+31:i] = dst[i+31:i] * 2**5
-	_MM_EXPADJ_8:	 dst[i+31:i] = dst[i+31:i] * 2**8
-	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
-	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
-	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
-	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtfxpntps2dq" form="zmm {k}, zmm, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_cvtfxpnt_round_adjustps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
-	<description>Performs element-by-element conversion of packed single-precision (32-bit) floating-point elements in "v2" to packed 32-bit unsigned integer elements and performing an optional exponent adjust using "expadj", storing the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := Float32ToUInt32(v2[i+31:i])
-	CASE expadj OF
-	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i]  0
-	_MM_EXPADJ_4:	 dst[i+31:i] = dst[i+31:i]  4
-	_MM_EXPADJ_5:	 dst[i+31:i] = dst[i+31:i]  5
-	_MM_EXPADJ_8:	 dst[i+31:i] = dst[i+31:i]  8
-	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i]  16
-	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i]  24
-	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i]  31
-	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i]  32
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtfxpntps2udq" form="zmm {k}, zmm, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_cvtepi32lo_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="v2" type="__m512i"/>
-	<description>Performs element-by-element conversion of the lower half of packed 32-bit integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	dst[l+63:l] := Int32ToFloat64(v2[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtdq2pd" form="zmm {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_cvtepi32lo_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v2" type="__m512i"/>
-	<description>Performs element-by-element conversion of the lower half of packed 32-bit integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	n := j*64
-	IF k[j]
-		dst[k+63:k] := Int32ToFloat64(v2[i+31:i])
-	ELSE
-		dst[n+63:n] := src[n+63:n]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtdq2pd" form="zmm {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_cvtepu32lo_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="v2" type="__m512i"/>
-	<description>Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k := j*64
-	dst[k+63:k] := UInt32ToFloat64(v2[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtudq2pd" form="zmm {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_cvtepu32lo_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v2" type="__m512i"/>
-	<description>Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[l+63:l] := UInt32ToFloat64(v2[i+31:i])
-	ELSE
-		dst[l+63:l] := src[l+63:l]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtudq2pd" form="zmm {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_cvtfxpnt_round_adjustepu32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
-	<description>Performs element-by-element conversion of packed 32-bit unsigned integer elements in "v2" to packed single-precision (32-bit) floating-point elements and performing an optional exponent adjust using "expadj", storing the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := UInt32ToFloat32(v2[i+31:i])
-	CASE expadj OF
-	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
-	_MM_EXPADJ_4:	 dst[i+31:i] = dst[i+31:i] * 2**4
-	_MM_EXPADJ_5:	 dst[i+31:i] = dst[i+31:i] * 2**5
-	_MM_EXPADJ_8:	 dst[i+31:i] = dst[i+31:i] * 2**8
-	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
-	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
-	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
-	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtfxpntudq2ps" form="zmm {k}, zmm, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_cvtfxpnt_round_adjustepu32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
-	<description>Performs element-by-element conversion of packed 32-bit unsigned integer elements in "v2" to packed single-precision (32-bit) floating-point elements and performing an optional exponent adjust using "expadj", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := Int32ToFloat32(v2[i+31:i])
-		CASE expadj OF
-		_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
-		_MM_EXPADJ_4:	 dst[i+31:i] = dst[i+31:i] * 2**4
-		_MM_EXPADJ_5:	 dst[i+31:i] = dst[i+31:i] * 2**5
-		_MM_EXPADJ_8:	 dst[i+31:i] = dst[i+31:i] * 2**8
-		_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
-		_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
-		_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
-		_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
-		ESAC
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtfxpntudq2ps" form="zmm {k}, zmm, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_exp223_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="v2" type="__m512i"/>
-	<description>Approximates the base-2 exponent of the packed single-precision (32-bit) floating-point elements in "v2" with eight bits for sign and magnitude and 24 bits for the fractional part. Results are stored in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := exp223(v2[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vexp223ps" form="zmm {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_exp223_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v2" type="__m512i"/>
-	<description>Approximates the base-2 exponent of the packed single-precision (32-bit) floating-point elements in "v2" with eight bits for sign and magnitude and 24 bits for the fractional part. Results are stored in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := exp223(v2[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vexp223ps" form="zmm {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_fixupnan_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="v1" type="__m512d"/>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="v3" type="__m512i"/>
-	<description>Fixes up NaN's from packed double-precision (64-bit) floating-point elements in "v1" and "v2", storing the results in "dst" and storing the quietized NaN's from "v1" in "v3".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := FixupNaNs(v1[i+63:i], v2[i+63:i])
-	v3[i+63:i] := QuietizeNaNs(v1[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vfixupnanpd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_fixupnan_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="v1" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="v3" type="__m512i"/>
-	<description>Fixes up NaN's from packed double-precision (64-bit) floating-point elements in "v1" and "v2", storing the results in "dst" using writemask "k" (only elements whose corresponding mask bit is set are used in the computation). Quietized NaN's from "v1" are stored in "v3".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := FixupNaNs(v1[i+63:i], v2[i+63:i])
-		v3[i+63:i] := QuietizeNaNs(v1[i+63:i])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vfixupnanpd" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_fixupnan_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="v1" type="__m512"/>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512i"/>
-	<description>Fixes up NaN's from packed single-precision (32-bit) floating-point elements in "v1" and "v2", storing the results in "dst" and storing the quietized NaN's from "v1" in "v3".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := FixupNaNs(v1[i+31:i], v2[i+31:i])
-	v3[i+31:i] := QuietizeNaNs(v1[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vfixupnanps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_fixupnan_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="v1" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v2" type="__m512"/>
-	<parameter varname="v3" type="__m512i"/>
-	<description>Fixes up NaN's from packed single-precision (32-bit) floating-point elements in "v1" and "v2", storing the results in "dst" using writemask "k" (only elements whose corresponding mask bit is set are used in the computation). Quietized NaN's from "v1" are stored in "v3".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FixupNaNs(v1[i+31:i], v2[i+31:i])
-		v3[i+31:i] := QuietizeNaNs(v1[i+31:i])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vfixupnanps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_i32extgather_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 16 memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv" to 32-bit integer elements and stores them in "dst". AVX512 supports _MM_UPCONV_EPI32_NONE.</description>
-	<operation>
-FOR j := 0 to 15
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	CASE conv OF
-	_MM_UPCONV_EPI32_NONE:
-		dst[i+31:i] := addr[i+31:i]
-	_MM_UPCONV_EPI32_UINT8:
-		n := j*7
-		dst[i+31:i] := UInt8ToUInt32(addr[n+7:n])
-	_MM_UPCONV_EPI32_SINT8:
-		n := j*7
-		dst[i+31:i] := Int8ToInt32(addr[n+7:n])
-	_MM_UPCONV_EPI32_UINT16:
-		n := j*16
-		dst[i+31:i] := UInt16ToUInt32(addr[n+15:n])
-	_MM_UPCONV_EPI32_SINT16:
-		n := j*16
-		dst[i+31:i] := Int16ToInt32(addr[n+15:n])
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpgatherdd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_i32extgather_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 16 single-precision (32-bit) memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv" to 32-bit integer elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). AVX512 supports _MM_UPCONV_EPI32_NONE.</description>
-	<operation>
-FOR j := 0 to 15
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	IF k[j]
-		CASE conv OF
-		_MM_UPCONV_EPI32_NONE:
-			dst[i+31:i] := addr[i+31:i]
-		_MM_UPCONV_EPI32_UINT8:
-			n := j*7
-			dst[i+31:i] := UInt8ToUInt32(addr[n+7:n])
-		_MM_UPCONV_EPI32_SINT8:
-			n := j*7
-			dst[i+31:i] := Int8ToInt32(addr[n+7:n])
-		_MM_UPCONV_EPI32_UINT16:
-			n := j*16
-			dst[i+31:i] := UInt16ToUInt32(addr[n+15:n])
-		_MM_UPCONV_EPI32_SINT16:
-			n := j*16
-			dst[i+31:i] := Int16ToInt32(addr[n+15:n])
-		ESAC
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpgatherdd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_i32loextgather_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 8 double-precision (64-bit) memory locations starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" using "conv" to 64-bit integer elements and stores them in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	CASE conv OF
-	_MM_UPCONV_EPI64_NONE: dst[i+63:i] := addr[i+63:i]
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpgatherdq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_i32loextgather_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 8 double-precision (64-bit) memory locations starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" using "conv" to 64-bit integer elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	IF k[j]
-		CASE conv OF
-		_MM_UPCONV_EPI64_NONE: dst[i+63:i] := addr[i+63:i]
-		ESAC
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpgatherdq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_i32extgather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 16 memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv" to single-precision (32-bit) floating-point elements and stores them in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	CASE conv OF
-	_MM_UPCONV_PS_NONE:
-		dst[i+31:i] := addr[i+31:i]
-	_MM_UPCONV_PS_FLOAT16:
-		n := j*16
-		dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
-	_MM_UPCONV_PS_UINT8:
-		n := j*8
-		dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
-	_MM_UPCONV_PS_SINT8:
-		n := j*8
-		dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
-	_MM_UPCONV_PS_UINT16:
-		n := j*16
-		dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
-	_MM_UPCONV_PS_SINT16:
-		n := j*16
-		dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgatherdps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_i32extgather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 16 single-precision (32-bit) memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv" to single-precision (32-bit) floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	IF k[j]
-		CASE conv OF
-		_MM_UPCONV_PS_NONE:
-			dst[i+31:i] := addr[i+31:i]
-		_MM_UPCONV_PS_FLOAT16:
-			n := j*16
-			dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
-		_MM_UPCONV_PS_UINT8:
-			n := j*8
-			dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
-		_MM_UPCONV_PS_SINT8:
-			n := j*8
-			dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
-		_MM_UPCONV_PS_UINT16:
-			n := j*16
-			dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
-		_MM_UPCONV_PS_SINT16:
-			n := j*16
-			dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
-		ESAC
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgatherdps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_i32loextgather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 8 double-precision (64-bit) floating-point elements in memory locations starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" using "conv" to 64-bit floating-point elements and stores them in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	CASE conv OF
-	_MM_UPCONV_PD_NONE: dst[i+63:i] := addr[i+63:i]
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgatherdpd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_i32loextgather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 8 double-precision (64-bit) floating-point elements in memory locations starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" using "conv" to 64-bit floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	IF k[j]
-		CASE conv OF
-		_MM_UPCONV_PD_NONE: dst[i+63:i] := addr[i+63:i]
-		ESAC
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgatherdpd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_prefetch_i32extgather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address "mv" and 32-bit integer index vector "index" with scale "scale" to L1 or L2 level of cache depending on the value of "hint". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.
-The "conv" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the "conv" parameter specified for the subsequent gather intrinsic.</description>
-	<operation>
-FOR j := 0 to 15
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	CASE hint OF
-	_MM_HINT_T0: PrefetchL1WithT0Hint(addr[i+31:i])
-	_MM_HINT_T1: PrefetchL2WithT1Hint(addr[i+31:i])
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgatherpf0dps" form="m512 {k}" xed=""/>
-	<instruction name="vgatherpf1dps" form="m512 {k}" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_prefetch_i32extgather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address "mv" and 32-bit integer index vector "index" with scale "scale" to L1 or L2 level of cache depending on the value of "hint". Gathered elements are merged in cache using writemask "k" (elements are brought into cache only when their corresponding mask bits are set). The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.
-The "conv" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the "conv" parameter specified for the subsequent gather intrinsic.</description>
-	<operation>
-FOR j := 0 to 15
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	IF k[j] THEN
-		CASE hint OF
-		_MM_HINT_T0: PrefetchL1WithT0Hint(addr[i+31:i])
-		_MM_HINT_T1: PrefetchL2WithT1Hint(addr[i+31:i])
-		ESAC
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgatherpf0dps" form="m512 {k}" xed=""/>
-	<instruction name="vgatherpf1dps" form="m512 {k}" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32extscatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="scale" type="int"/>
-	<description>Down-converts 16 packed single-precision (32-bit) floating-point elements in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv".</description>
-	<operation>
-FOR j := 0 to 15
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	CASE conv OF
-	_MM_DOWNCONV_PS_NONE:
-		n := j*32
-		addr[i+31:i] := v1[n+31:n]
-	_MM_DOWNCONV_PS_FLOAT16:
-		i := j*16
-		addr[i+15:i] := Float32ToFloat16(v1[n+31:n])
-	_MM_DOWNCONV_PS_UINT8:
-		i := j*8
-		addr[i+7:i] := Float32ToUInt8(v1[n+31:n])
-	_MM_DOWNCONV_PS_SINT8:
-		i := j*8
-		addr[i+7:i] := Float32ToSInt8(v1[n+31:n])
-	_MM_DOWNCONV_PS_UINT16:
-		i := j*8
-		addr[i+15:i] := Float32ToUInt16(v1[n+31:n])
-	_MM_DOWNCONV_PS_SINT16:
-		i := j*8
-		addr[i+15:i] := Float32ToSInt16(v1[n+31:n])
-	ESAC
-ENDFOR
-	</operation>
-	<instruction name="vscatterdps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32extscatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts 16 packed single-precision (32-bit) floating-point elements in "v1" according to "conv" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using writemask "k" (elements are written only when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		CASE conv OF
-		_MM_DOWNCONV_PS_NONE:
-			n := j*32
-			addr[i+31:i] := v1[n+31:n]
-		_MM_DOWNCONV_PS_FLOAT16:
-			i := j*16
-			addr[i+15:i] := Float32ToFloat16(v1[n+31:n])
-		_MM_DOWNCONV_PS_UINT8:
-			i := j*8
-			addr[i+7:i] := Float32ToUInt8(v1[n+31:n])
-		_MM_DOWNCONV_PS_SINT8:
-			i := j*8
-			addr[i+7:i] := Float32ToSInt8(v1[n+31:n])
-		_MM_DOWNCONV_PS_UINT16:
-			i := j*8
-			addr[i+15:i] := Float32ToUInt16(v1[n+31:n])
-		_MM_DOWNCONV_PS_SINT16:
-			i := j*8
-			addr[i+15:i] := Float32ToSInt16(v1[n+31:n])
-		ESAC
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vscatterdps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32loextscatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512d"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv".</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	CASE conv OF
-	_MM_DOWNCONV_PD_NONE: addr[i+63:i] := v1[i+63:i]
-	ESAC
-ENDFOR
-	</operation>
-	<instruction name="vscatterdpd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32loextscatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512d"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory.</description>
-	<operation>
-FOR j := 0 to 7
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		i := j*64
-		CASE conv OF
-		_MM_DOWNCONV_PD_NONE: addr[i+63:i] := v1[i+63:i]
-		ESAC
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vscatterdpd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32loextscatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts 8 packed 64-bit integer elements in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv".</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	CASE conv OF
-	_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v1[i+63:i]
-	ESAC
-ENDFOR
-	</operation>
-	<instruction name="vpscatterdq" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32loextscatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts 8 packed 64-bit integer elements in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using "conv". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory.</description>
-	<operation>
-FOR j := 0 to 7
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		i := j*64
-		CASE conv OF
-		_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v1[i+63:i]
-		ESAC
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vpscatterdq" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_prefetch_i32extscatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address "mv" and 32-bit integer index vector "index" with scale "scale" to L1 or L2 level of cache depending on the value of "hint", with a request for exclusive ownership. The "hint" parameter may be one of the following: _MM_HINT_T0 = 1 for prefetching to L1 cache, _MM_HINT_T1 = 2 for prefetching to L2 cache, _MM_HINT_T2 = 3 for prefetching to L2 cache non-temporal, _MM_HINT_NTA = 0 for prefetching to L1 cache non-temporal. The "conv" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the "conv" parameter specified for the subsequent scatter intrinsic.</description>
-	<operation>
-cachev := 0
-FOR j := 0 to 15
-	i := j*32
-	addr := MEM[mv + index[j] * scale]
-	CASE hint OF
-	_MM_HINT_T0: PrefetchL1WithT0Hint(addr[i+31:i])
-	_MM_HINT_T1: PrefetchL2WithT1Hint(addr[i+31:i])
-	_MM_HINT_T2: PrefetchL2WithT1HintNonTemporal(addr[i+31:i])
-	_MM_HINT_NTA: PrefetchL1WithT0HintNonTemporal(addr[i+31:i])
-	ESAC
-ENDFOR
-	</operation>
-	<instruction name="vscatterpf0dps" form="m512 {k}" xed=""/>
-	<instruction name="vscatterpf1dps" form="m512 {k}" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_prefetch_i32extscatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address "mv" and 32-bit integer index vector "index" with scale "scale" to L1 or L2 level of cache depending on the value of "hint". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.
-The "conv" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the "conv" parameter specified for the subsequent gather intrinsic. Only those elements whose corresponding mask bit in "k" is set are loaded into cache.</description>
-	<operation>
-cachev := 0
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		CASE hint OF
-		_MM_HINT_T0: PrefetchL1WithT0Hint(addr[i+31:i])
-		_MM_HINT_T1: PrefetchL2WithT1Hint(addr[i+31:i])
-		ESAC
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vscatterpf0dps" form="m512 {k}" xed=""/>
-	<instruction name="vscatterpf1dps" form="m512 {k}" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_extloadunpackhi_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI32_NONE:   RETURN MEM[addr + 4*offset]
-	_MM_UPCONV_EPI32_UINT8:  RETURN UInt8ToInt32(MEM[addr + offset])
-	_MM_UPCONV_EPI32_SINT8:  RETURN SInt8ToInt32(MEM[addr + offset])
-	_MM_UPCONV_EPI32_UINT16: RETURN UInt16ToInt32(MEM[addr + 2*offset])
-	_MM_UPCONV_EPI32_SINT16: RETURN SInt16ToInt32(MEM[addr + 2*offset])
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI32_NONE:   RETURN 4
-	_MM_UPCONV_EPI32_UINT8:  RETURN 1
-	_MM_UPCONV_EPI32_SINT8:  RETURN 1
-	_MM_UPCONV_EPI32_UINT16: RETURN 2
-	_MM_UPCONV_EPI32_SINT16: RETURN 2
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-upSize := UPCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 15
-	IF foundNext64BytesBoundary == false
-		IF (addr + (loadOffset + 1)*upSize % 64) == 0
-			foundNext64BytesBoundary := true
-		FI
-	ELSE
-		i := j*32
-		dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
-	FI
-	loadOffset := loadOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_extloadunpackhi_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI32_NONE:   RETURN MEM[addr + 4*offset]
-	_MM_UPCONV_EPI32_UINT8:  RETURN UInt8ToInt32(MEM[addr + offset])
-	_MM_UPCONV_EPI32_SINT8:  RETURN SInt8ToInt32(MEM[addr + offset])
-	_MM_UPCONV_EPI32_UINT16: RETURN UInt16ToInt32(MEM[addr + 2*offset])
-	_MM_UPCONV_EPI32_SINT16: RETURN SInt16ToInt32(MEM[addr + 2*offset])
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI32_NONE:   RETURN 4
-	_MM_UPCONV_EPI32_UINT8:  RETURN 1
-	_MM_UPCONV_EPI32_SINT8:  RETURN 1
-	_MM_UPCONV_EPI32_UINT16: RETURN 2
-	_MM_UPCONV_EPI32_SINT16: RETURN 2
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-upSize := UPCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 15
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF (addr + (loadOffset + 1)*upSize % 64) == 0
-				foundNext64BytesBoundary := true
-			FI
-		ELSE
-			i := j*32
-			dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
-		FI
-		loadOffset := loadOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_extloadunpacklo_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI32_NONE:   RETURN MEM[addr + 4*offset]
-	_MM_UPCONV_EPI32_UINT8:  RETURN UInt8ToInt32(MEM[addr + offset])
-	_MM_UPCONV_EPI32_SINT8:  RETURN SInt8ToInt32(MEM[addr + offset])
-	_MM_UPCONV_EPI32_UINT16: RETURN UInt16ToInt32(MEM[addr + 2*offset])
-	_MM_UPCONV_EPI32_SINT16: RETURN SInt16ToInt32(MEM[addr + 2*offset])
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI32_NONE:   RETURN 4
-	_MM_UPCONV_EPI32_UINT8:  RETURN 1
-	_MM_UPCONV_EPI32_SINT8:  RETURN 1
-	_MM_UPCONV_EPI32_UINT16: RETURN 2
-	_MM_UPCONV_EPI32_SINT16: RETURN 2
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-upSize := UPCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
-	loadOffset := loadOffset + 1
-	IF (mt + loadOffset * upSize) % 64 == 0
-		break
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackld" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_extloadunpacklo_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI32_NONE:   RETURN MEM[addr + 4*offset]
-	_MM_UPCONV_EPI32_UINT8:  RETURN UInt8ToInt32(MEM[addr + offset])
-	_MM_UPCONV_EPI32_SINT8:  RETURN SInt8ToInt32(MEM[addr + offset])
-	_MM_UPCONV_EPI32_UINT16: RETURN UInt16ToInt32(MEM[addr + 2*offset])
-	_MM_UPCONV_EPI32_SINT16: RETURN SInt16ToInt32(MEM[addr + 2*offset])
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI32_NONE:   RETURN 4
-	_MM_UPCONV_EPI32_UINT8:  RETURN 1
-	_MM_UPCONV_EPI32_SINT8:  RETURN 1
-	_MM_UPCONV_EPI32_UINT16: RETURN 2
-	_MM_UPCONV_EPI32_SINT16: RETURN 2
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-upSize := UPCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 15
-	IF k[j]
-		i := j*32
-		dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
-		loadOffset := loadOffset + 1
-		IF (mt + loadOffset * upSize) % 64 == 0
-			break
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackld" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_extloadunpackhi_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI64_NONE:   RETURN MEM[addr + 8*offset]
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI64_NONE:   RETURN 8
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-upSize := UPCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 7
-	IF foundNext64BytesBoundary == false
-		IF (addr + (loadOffset + 1)*upSize) == 0
-			foundNext64BytesBoundary := true
-		FI
-	ELSE
-		i := j*64
-		dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
-	FI
-	loadOffset := loadOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_extloadunpackhi_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI64_NONE:   RETURN MEM[addr + 8*offset]
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI64_NONE:   RETURN 8
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-upSize := UPCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 7
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF (addr + (loadOffset + 1)*upSize) == 0
-				foundNext64BytesBoundary := true
-			FI
-		ELSE
-			i := j*64
-			dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
-		FI
-		loadOffset := loadOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_extloadunpacklo_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI64_NONE:   RETURN MEM[addr + 8*offset]
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI64_NONE:   RETURN 8
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-upSize := UPCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
-	loadOffset := loadOffset + 1
-	IF (addr + loadOffset*upSize % 64) == 0
-		BREAK
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpacklq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_extloadunpacklo_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI64_NONE:   RETURN MEM[addr + 8*offset]
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_EPI64_NONE:   RETURN 8
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-upSize := UPCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 7
-	IF k[j]
-		i := j*64
-		dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
-		loadOffset := loadOffset + 1
-		IF (addr + loadOffset*upSize % 64) == 0
-			BREAK
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpacklq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_extloadunpackhi_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PS_NONE:	   RETURN MEM[addr + 4*offset]
-	_MM_UPCONV_PS_FLOAT16: RETURN Float16ToFloat32(MEM[addr + 4*offset])
-	_MM_UPCONV_PS_UINT8:   RETURN UInt8ToFloat32(MEM[addr + offset])
-	_MM_UPCONV_PS_SINT8:   RETURN SInt8ToFloat32(MEM[addr + offset])
-	_MM_UPCONV_PS_UINT16:  RETURN UInt16ToFloat32(MEM[addr + 2*offset])
-	_MM_UPCONV_PS_SINT16:  RETURN SInt16ToFloat32(MEM[addr + 2*offset])
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PS_NONE:	   RETURN 4
-	_MM_UPCONV_PS_FLOAT16: RETURN 2
-	_MM_UPCONV_PS_UINT8:   RETURN 1
-	_MM_UPCONV_PS_SINT8:   RETURN 1
-	_MM_UPCONV_PS_UINT16:  RETURN 2
-	_MM_UPCONV_PS_SINT16:  RETURN 2
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-upSize := UPCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 15
-	IF foundNext64BytesBoundary == false
-		IF (addr + (loadOffset + 1)*upSize % 64) == 0
-			foundNext64BytesBoundary := true
-		FI
-	ELSE
-		i := j*32
-		dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
-	FI
-	loadOffset := loadOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_extloadunpackhi_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PS_NONE:	   RETURN MEM[addr + 4*offset]
-	_MM_UPCONV_PS_FLOAT16: RETURN Float16ToFloat32(MEM[addr + 4*offset])
-	_MM_UPCONV_PS_UINT8:   RETURN UInt8ToFloat32(MEM[addr + offset])
-	_MM_UPCONV_PS_SINT8:   RETURN SInt8ToFloat32(MEM[addr + offset])
-	_MM_UPCONV_PS_UINT16:  RETURN UInt16ToFloat32(MEM[addr + 2*offset])
-	_MM_UPCONV_PS_SINT16:  RETURN SInt16ToFloat32(MEM[addr + 2*offset])
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PS_NONE:	   RETURN 4
-	_MM_UPCONV_PS_FLOAT16: RETURN 2
-	_MM_UPCONV_PS_UINT8:   RETURN 1
-	_MM_UPCONV_PS_SINT8:   RETURN 1
-	_MM_UPCONV_PS_UINT16:  RETURN 2
-	_MM_UPCONV_PS_SINT16:  RETURN 2
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-upSize := UPCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 15
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF (addr + (loadOffset + 1)*upSize % 64) == 0
-				foundNext64BytesBoundary := true
-			FI
-		ELSE
-			i := j*32
-			dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
-		FI
-		loadOffset := loadOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_extloadunpacklo_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PS_NONE:	   RETURN MEM[addr + 4*offset]
-	_MM_UPCONV_PS_FLOAT16: RETURN Float16ToFloat32(MEM[addr + 4*offset])
-	_MM_UPCONV_PS_UINT8:   RETURN UInt8ToFloat32(MEM[addr + offset])
-	_MM_UPCONV_PS_SINT8:   RETURN SInt8ToFloat32(MEM[addr + offset])
-	_MM_UPCONV_PS_UINT16:  RETURN UInt16ToFloat32(MEM[addr + 2*offset])
-	_MM_UPCONV_PS_SINT16:  RETURN SInt16ToFloat32(MEM[addr + 2*offset])
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PS_NONE:	   RETURN 4
-	_MM_UPCONV_PS_FLOAT16: RETURN 2
-	_MM_UPCONV_PS_UINT8:   RETURN 1
-	_MM_UPCONV_PS_SINT8:   RETURN 1
-	_MM_UPCONV_PS_UINT16:  RETURN 2
-	_MM_UPCONV_PS_SINT16:  RETURN 2
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-upSize := UPCONVERTSIZE(conv)
-addr = MEM[mt]
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
-	loadOffset := loadOffset + 1
-	IF (mt + loadOffset * upSize) % 64 == 0
-		break
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpacklps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_extloadunpacklo_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PS_NONE:    RETURN MEM[addr + 4*offset]
-	_MM_UPCONV_PS_FLOAT16: RETURN Float16ToFloat32(MEM[addr + 4*offset])
-	_MM_UPCONV_PS_UINT8:   RETURN UInt8ToFloat32(MEM[addr + offset])
-	_MM_UPCONV_PS_SINT8:   RETURN SInt8ToFloat32(MEM[addr + offset])
-	_MM_UPCONV_PS_UINT16:  RETURN UInt16ToFloat32(MEM[addr + 2*offset])
-	_MM_UPCONV_PS_SINT16:  RETURN SInt16ToFloat32(MEM[addr + 2*offset])
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PS_NONE:	   RETURN 4
-	_MM_UPCONV_PS_FLOAT16: RETURN 2
-	_MM_UPCONV_PS_UINT8:   RETURN 1
-	_MM_UPCONV_PS_SINT8:   RETURN 1
-	_MM_UPCONV_PS_UINT16:  RETURN 2
-	_MM_UPCONV_PS_SINT16:  RETURN 2
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-upSize := UPCONVERTSIZE(conv)
-addr = MEM[mt]
-FOR j := 0 to 15
-	IF k[j]
-		i := j*32
-		dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
-		loadOffset := loadOffset + 1
-		IF (mt + loadOffset * upSize) % 64 == 0
-			break
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpacklps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_extloadunpackhi_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PD_NONE: RETURN MEM[addr + 8*offset]
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PD_NONE: RETURN 8
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-upSize := UPCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 7
-	IF foundNext64BytesBoundary == false
-		IF (addr + (loadOffset + 1)*upSize) % 64 == 0
-			foundNext64BytesBoundary := true
-		FI
-	ELSE
-		i := j*64
-		dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
-	FI
-	loadOffset := loadOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhpd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_extloadunpackhi_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PD_NONE: RETURN MEM[addr + 8*offset]
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PD_NONE: RETURN 8
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-upSize := UPCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 7
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF (addr + (loadOffset + 1)*upSize) % 64 == 0
-				foundNext64BytesBoundary := true
-			FI
-		ELSE
-			i := j*64
-			dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
-		FI
-		loadOffset := loadOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhpd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_extloadunpacklo_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PD_NONE:	RETURN MEM[addr + 8*offset]
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PD_NONE:	RETURN 8
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-upSize := UPCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
-	loadOffset := loadOffset + 1
-	IF (mt + loadOffset * upSize) % 64 == 0
-		break
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpacklpd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_extloadunpacklo_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mt" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elemenst are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-UPCONVERT(address, offset, convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PD_NONE:	RETURN MEM[addr + 8*offset]
-	ESAC
-}
-
-UPCONVERTSIZE(convertTo) {
-	CASE conv OF
-	_MM_UPCONV_PD_NONE:	RETURN 8
-	ESAC
-}
-
-dst[511:0] := src[511:0]
-loadOffset := 0
-upSize := UPCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 7
-	IF k[j]
-		i := j*64
-		dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
-		loadOffset := loadOffset + 1
-		IF (mt + loadOffset * upSize) % 64 == 0
-			break
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpacklpd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorehi_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_DOWNCONV_EPI32_NONE:   RETURN element[i+31:i]
-	_MM_DOWNCONV_EPI32_UINT8:  RETURN UInt32ToUInt8(element[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT8:  RETURN SInt32ToSInt8(element[i+31:i])
-	_MM_DOWNCONV_EPI32_UINT16: RETURN UInt32ToUInt16(element[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT16: RETURN SInt32ToSInt16(element[i+31:i])
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_DOWNCONV_EPI32_NONE:   RETURN 4
-	_MM_DOWNCONV_EPI32_UINT8:  RETURN 1
-	_MM_DOWNCONV_EPI32_SINT8:  RETURN 1
-	_MM_DOWNCONV_EPI32_UINT16: RETURN 2
-	_MM_DOWNCONV_EPI32_SINT16: RETURN 2
-	ESAC
-}
-
-storeOffset := 0
-foundNext64BytesBoundary := false
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 15
-	IF foundNext64BytesBoundary == false
-		IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
-			foundNext64BytesBoundary = true
-		FI
-	ELSE
-		i := j*32
-		tmp := DOWNCONVERT(v1[i+31:i], conv)
-		storeAddr := addr + storeOffset * downSize
-		CASE downSize OF
-		4: MEM[storeAddr] := tmp[31:0]
-		2: MEM[storeAddr] := tmp[15:0]
-		1: MEM[storeAddr] := tmp[7:0]
-		ESAC
-	FI
-	storeOffset := storeOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorehi_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresonding mask bit is not set).</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_DOWNCONV_EPI32_NONE:   RETURN element[i+31:i]
-	_MM_DOWNCONV_EPI32_UINT8:  RETURN UInt32ToUInt8(element[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT8:  RETURN SInt32ToSInt8(element[i+31:i])
-	_MM_DOWNCONV_EPI32_UINT16: RETURN UInt32ToUInt16(element[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT16: RETURN SInt32ToSInt16(element[i+31:i])
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_DOWNCONV_EPI32_NONE:   RETURN 4
-	_MM_DOWNCONV_EPI32_UINT8:  RETURN 1
-	_MM_DOWNCONV_EPI32_SINT8:  RETURN 1
-	_MM_DOWNCONV_EPI32_UINT16: RETURN 2
-	_MM_DOWNCONV_EPI32_SINT16: RETURN 2
-	ESAC
-}
-
-storeOffset := 0
-foundNext64BytesBoundary := false
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 15
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
-				foundNext64BytesBoundary = true
-			FI
-		ELSE
-			i := j*32
-			tmp := DOWNCONVERT(v1[i+31:i], conv)
-			storeAddr := addr + storeOffset * downSize
-			CASE downSize OF
-			4: MEM[storeAddr] := tmp[31:0]
-			2: MEM[storeAddr] := tmp[15:0]
-			1: MEM[storeAddr] := tmp[7:0]
-			ESAC
-		FI
-		storeOffset := storeOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorelo_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_DOWNCONV_EPI32_NONE:   RETURN element[i+31:i]
-	_MM_DOWNCONV_EPI32_UINT8:  RETURN UInt32ToUInt8(element[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT8:  RETURN SInt32ToSInt8(element[i+31:i])
-	_MM_DOWNCONV_EPI32_UINT16: RETURN UInt32ToUInt16(element[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT16: RETURN SInt32ToSInt16(element[i+31:i])
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_DOWNCONV_EPI32_NONE:   RETURN 4
-	_MM_DOWNCONV_EPI32_UINT8:  RETURN 1
-	_MM_DOWNCONV_EPI32_SINT8:  RETURN 1
-	_MM_DOWNCONV_EPI32_UINT16: RETURN 2
-	_MM_DOWNCONV_EPI32_SINT16: RETURN 2
-	ESAC
-}
-
-storeOffset := 0
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 15
-	i := j*32
-	tmp := DOWNCONVERT(v1[i+31:i], conv)
-	storeAddr := addr + storeOffset * downSize
-	CASE downSize OF
-	4: MEM[storeAddr] := tmp[31:0]
-	2: MEM[storeAddr] := tmp[15:0]
-	1: MEM[storeAddr] := tmp[7:0]
-	ESAC
-	storeOffset := storeOffset + 1
-	IF ((addr + storeOffset * downSize) % 64) == 0
-		BREAK
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstoreld" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorelo_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are written to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_DOWNCONV_EPI32_NONE:   RETURN element[i+31:i]
-	_MM_DOWNCONV_EPI32_UINT8:  RETURN UInt32ToUInt8(element[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT8:  RETURN SInt32ToSInt8(element[i+31:i])
-	_MM_DOWNCONV_EPI32_UINT16: RETURN UInt32ToUInt16(element[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT16: RETURN SInt32ToSInt16(element[i+31:i])
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_DOWNCONV_EPI32_NONE:   RETURN 4
-	_MM_DOWNCONV_EPI32_UINT8:  RETURN 1
-	_MM_DOWNCONV_EPI32_SINT8:  RETURN 1
-	_MM_DOWNCONV_EPI32_UINT16: RETURN 2
-	_MM_DOWNCONV_EPI32_SINT16: RETURN 2
-	ESAC
-}
-
-storeOffset := 0
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 15
-	IF k[j]
-		i := j*32
-		tmp := DOWNCONVERT(v1[i+31:i], conv)
-		storeAddr := addr + storeOffset * downSize
-		CASE downSize OF
-		4: MEM[storeAddr] := tmp[31:0]
-		2: MEM[storeAddr] := tmp[15:0]
-		1: MEM[storeAddr] := tmp[7:0]
-		ESAC
-		storeOffset := storeOffset + 1
-		IF ((addr + storeOffset * downSize) % 64) == 0
-			BREAK
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstoreld" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorehi_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_EPI64_NONE: RETURN element[i+63:i]
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_EPI64_NONE: RETURN 8
-	ESAC
-}
-
-storeOffset := 0
-foundNext64BytesBoundary := false
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 7
-	IF foundNext64BytesBoundary == false
-		IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
-			foundNext64BytesBoundary = true
-		FI
-	ELSE
-		i := j*64
-		tmp := DOWNCONVERT(v1[i+63:i], conv)
-		storeAddr := addr + storeOffset * downSize
-		CASE downSize OF
-		8: MEM[storeAddr] := tmp[63:0]
-		ESAC
-	FI
-	storeOffset := storeOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehq" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorehi_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (mt-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresonding mask bit is not set).</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_EPI64_NONE: RETURN element[i+63:i]
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_EPI64_NONE: RETURN 8
-	ESAC
-}
-
-storeOffset := 0
-foundNext64BytesBoundary := false
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 7
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
-				foundNext64BytesBoundary = true
-			FI
-		ELSE
-			i := j*64
-			tmp := DOWNCONVERT(v1[i+63:i], conv)
-			storeAddr := addr + storeOffset * downSize
-			CASE downSize OF
-			8: MEM[storeAddr] := tmp[63:0]
-			ESAC
-		FI
-		storeOffset := storeOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehq" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorelo_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_EPI64_NONE: RETURN element[i+63:i]
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_EPI64_NONE: RETURN 8
-	ESAC
-}
-
-storeOffset := 0
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 7
-	i := j*63
-	tmp := DOWNCONVERT(v1[i+63:i], conv)
-	storeAddr := addr + storeOffset * downSize
-	CASE downSize OF
-	8: MEM[storeAddr] := tmp[63:0]
-	ESAC
-	storeOffset := storeOffset + 1
-	IF ((addr + storeOffset * downSize) % 64) == 0
-		BREAK
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorelq" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorelo_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped whent he corresponding mask bit is not set).</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_EPI64_NONE: RETURN element[i+63:i]
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_EPI64_NONE: RETURN 8
-	ESAC
-}
-
-storeOffset := 0
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 7
-	IF k[j]
-		i := j*63
-		tmp := DOWNCONVERT(v1[i+63:i], conv)
-		storeAddr := addr + storeOffset * downSize
-		CASE downSize OF
-		8: MEM[storeAddr] := tmp[63:0]
-		ESAC
-		storeOffset := storeOffset + 1
-		IF ((addr + storeOffset * downSize) % 64) == 0
-			BREAK
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorelq" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorehi_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v1" type="__m512"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PS_NONE:	   RETURN element[i+31:i]
-	_MM_UPCONV_PS_FLOAT16: RETURN Float32ToFloat16(element[i+31:i])
-	_MM_UPCONV_PS_UINT8:   RETURN UInt32ToUInt8(element[i+31:i])
-	_MM_UPCONV_PS_SINT8:   RETURN SInt32ToSInt8(element[i+31:i])
-	_MM_UPCONV_PS_UINT16:  RETURN UInt32ToUInt16(element[i+31:i])
-	_MM_UPCONV_PS_SINT16:  RETURN SInt32ToSInt16(element[i+31:i])
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PS_NONE:	   RETURN 4
-	_MM_UPCONV_PS_FLOAT16: RETURN 2
-	_MM_UPCONV_PS_UINT8:   RETURN 1
-	_MM_UPCONV_PS_SINT8:   RETURN 1
-	_MM_UPCONV_PS_UINT16:  RETURN 2
-	_MM_UPCONV_PS_SINT16:  RETURN 2
-	ESAC
-}
-
-storeOffset := 0
-foundNext64BytesBoundary := false
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 15
-	IF foundNext64BytesBoundary == false
-		IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
-			foundNext64BytesBoundary = true
-		FI
-	ELSE
-		i := j*32
-		tmp := DOWNCONVERT(v1[i+31:i], conv)
-		storeAddr := addr + storeOffset * downSize
-		CASE downSize OF
-		4: MEM[storeAddr] := tmp[31:0]
-		2: MEM[storeAddr] := tmp[15:0]
-		1: MEM[storeAddr] := tmp[7:0]
-		ESAC
-	FI
-	storeOffset := storeOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorehi_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v1" type="__m512"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PS_NONE:	   RETURN element[i+31:i]
-	_MM_UPCONV_PS_FLOAT16: RETURN Float32ToFloat16(element[i+31:i])
-	_MM_UPCONV_PS_UINT8:   RETURN UInt32ToUInt8(element[i+31:i])
-	_MM_UPCONV_PS_SINT8:   RETURN SInt32ToSInt8(element[i+31:i])
-	_MM_UPCONV_PS_UINT16:  RETURN UInt32ToUInt16(element[i+31:i])
-	_MM_UPCONV_PS_SINT16:  RETURN SInt32ToSInt16(element[i+31:i])
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PS_NONE:    RETURN 4
-	_MM_UPCONV_PS_FLOAT16: RETURN 2
-	_MM_UPCONV_PS_UINT8:   RETURN 1
-	_MM_UPCONV_PS_SINT8:   RETURN 1
-	_MM_UPCONV_PS_UINT16:  RETURN 2
-	_MM_UPCONV_PS_SINT16:  RETURN 2
-	ESAC
-}
-
-storeOffset := 0
-foundNext64BytesBoundary := false
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 15
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
-				foundNext64BytesBoundary = true
-			FI
-		ELSE
-			i := j*32
-			tmp := DOWNCONVERT(v1[i+31:i], conv)
-			storeAddr := addr + storeOffset * downSize
-			CASE downSize OF
-			4: MEM[storeAddr] := tmp[31:0]
-			2: MEM[storeAddr] := tmp[15:0]
-			1: MEM[storeAddr] := tmp[7:0]
-			ESAC
-		FI
-		storeOffset := storeOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorelo_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v1" type="__m512"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PS_NONE:	   RETURN element[i+31:i]
-	_MM_UPCONV_PS_FLOAT16: RETURN Float32ToFloat16(element[i+31:i])
-	_MM_UPCONV_PS_UINT8:   RETURN UInt32ToUInt8(element[i+31:i])
-	_MM_UPCONV_PS_SINT8:   RETURN SInt32ToSInt8(element[i+31:i])
-	_MM_UPCONV_PS_UINT16:  RETURN UInt32ToUInt16(element[i+31:i])
-	_MM_UPCONV_PS_SINT16:  RETURN SInt32ToSInt16(element[i+31:i])
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PS_NONE:	   RETURN 4
-	_MM_UPCONV_PS_FLOAT16: RETURN 2
-	_MM_UPCONV_PS_UINT8:   RETURN 1
-	_MM_UPCONV_PS_SINT8:   RETURN 1
-	_MM_UPCONV_PS_UINT16:  RETURN 2
-	_MM_UPCONV_PS_SINT16:  RETURN 2
-	ESAC
-}
-
-storeOffset := 0
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 15
-	i := j*32
-	tmp := DOWNCONVERT(v1[i+31:i], conv)
-	storeAddr := addr + storeOffset * downSize
-	CASE downSize OF
-	4: MEM[storeAddr] := tmp[31:0]
-	2: MEM[storeAddr] := tmp[15:0]
-	1: MEM[storeAddr] := tmp[7:0]
-	ESAC
-	storeOffset := storeOffset + 1
-	IF ((addr + storeOffset * downSize) % 64) == 0
-		BREAK
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorelps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorelo_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v1" type="__m512"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PS_NONE:	   RETURN element[i+31:i]
-	_MM_UPCONV_PS_FLOAT16: RETURN Float32ToFloat16(element[i+31:i])
-	_MM_UPCONV_PS_UINT8:   RETURN UInt32ToUInt8(element[i+31:i])
-	_MM_UPCONV_PS_SINT8:   RETURN SInt32ToSInt8(element[i+31:i])
-	_MM_UPCONV_PS_UINT16:  RETURN UInt32ToUInt16(element[i+31:i])
-	_MM_UPCONV_PS_SINT16:  RETURN SInt32ToSInt16(element[i+31:i])
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PS_NONE:    RETURN 4
-	_MM_UPCONV_PS_FLOAT16: RETURN 2
-	_MM_UPCONV_PS_UINT8:   RETURN 1
-	_MM_UPCONV_PS_SINT8:   RETURN 1
-	_MM_UPCONV_PS_UINT16:  RETURN 2
-	_MM_UPCONV_PS_SINT16:  RETURN 2
-	ESAC
-}
-
-storeOffset := 0
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 15
-	IF k[j]
-		i := j*32
-		tmp := DOWNCONVERT(v1[i+31:i], conv)
-		storeAddr := addr + storeOffset * downSize
-		CASE downSize OF
-		4: MEM[storeAddr] := tmp[31:0]
-		2: MEM[storeAddr] := tmp[15:0]
-		1: MEM[storeAddr] := tmp[7:0]
-		ESAC
-		storeOffset := storeOffset + 1
-		IF ((addr + storeOffset * downSize) % 64) == 0
-			BREAK
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorelps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorehi_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v1" type="__m512d"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PD_NONE: RETURN element[i+63:i]
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PD_NONE: RETURN 8
-	ESAC
-}
-
-storeOffset := 0
-foundNext64BytesBoundary := false
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 7
-	IF foundNext64BytesBoundary == false
-		IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
-			foundNext64BytesBoundary = true
-		FI
-	ELSE
-		i := j*64
-		tmp := DOWNCONVERT(v1[i+63:i], conv)
-		storeAddr := addr + storeOffset * downSize
-		CASE downSize OF
-		8: MEM[storeAddr] := tmp[63:0]
-		ESAC
-	FI
-	storeOffset := storeOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehpd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorehi_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v1" type="__m512d"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PD_NONE: RETURN element[i+63:i]
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PD_NONE: RETURN 8
-	ESAC
-}
-
-storeOffset := 0
-foundNext64BytesBoundary := false
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt-64
-FOR j := 0 to 7
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
-				foundNext64BytesBoundary = true
-			FI
-		ELSE
-			i := j*64
-			tmp := DOWNCONVERT(v1[i+63:i], conv)
-			storeAddr := addr + storeOffset * downSize
-			CASE downSize OF
-			8: MEM[storeAddr] := tmp[63:0]
-			ESAC
-		FI
-		storeOffset := storeOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehpd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_extpackstorelo_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="v1" type="__m512d"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PD_NONE: RETURN element[i+63:i]
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PD_NONE: RETURN 8
-	ESAC
-}
-
-storeOffset := 0
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 7
-	i := j*63
-	tmp := DOWNCONVERT(v1[i+63:i], conv)
-	storeAddr := addr + storeOffset * downSize
-	CASE downSize OF
-	8: MEM[storeAddr] := tmp[63:0]
-	ESAC
-	storeOffset := storeOffset + 1
-	IF ((addr + storeOffset * downSize) % 64) == 0
-		BREAK
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorelpd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_extpackstorelo_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v1" type="__m512d"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-DOWNCONVERT(element, convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PD_NONE: RETURN element[i+63:i]
-	ESAC
-}
-
-DOWNCONVERTSIZE(convertTo) {
-	CASE converTo OF
-	_MM_UPCONV_PD_NONE: RETURN 8
-	ESAC
-}
-
-storeOffset := 0
-downSize := DOWNCONVERTSIZE(conv)
-addr = mt
-FOR j := 0 to 7
-	IF k[j]
-		i := j*63
-		tmp := DOWNCONVERT(v1[i+63:i], conv)
-		storeAddr := addr + storeOffset * downSize
-		CASE downSize OF
-		8: MEM[storeAddr] := tmp[63:0]
-		ESAC
-		storeOffset := storeOffset + 1
-		IF ((addr + storeOffset * downSize) % 64) == 0
-			BREAK
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorelpd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_cvtpd_pslo">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="v2" type="__m512d"/>
-	<description>Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to single-precision (32-bit) floating-point elements and stores them in "dst". The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k := j*32
-	dst[k+31:k] := Float64ToFloat32(v2[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2ps" form="zmm {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_cvtpd_pslo">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v2" type="__m512d"/>
-	<description>Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to single-precision (32-bit) floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := Float64ToFloat32(v2[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2ps" form="zmm {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_i32logather_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>Loads 8 64-bit integer elements from memory starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" and stores them in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	addr := MEM[mv + index[j] * scale]
-	dst[i+63:i] := addr[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpgatherdq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512i" name="_mm512_mask_i32logather_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>Loads 8 64-bit integer elements from memory starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		dst[i+63:i] := addr[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpgatherdq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_i32logather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>Loads 8 double-precision (64-bit) floating-point elements stored at memory locations starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" them in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	addr := MEM[mv + index[j] * scale]
-	dst[i+63:i] := addr[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgatherdpd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_i32logather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<description>Loads 8 double-precision (64-bit) floating-point elements from memory starting at location "mv" at packed 32-bit integer indices stored in the lower half of "index" scaled by "scale" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		dst[i+63:i] := addr[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgatherdpd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_prefetch_i32gather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF/KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const*"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetches 16 single-precision (32-bit) floating-point elements in memory starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
-	<operation>
-cachev := 0
-FOR j := 0 to 15
-	i := j*32
-	addr := MEM[mv + index[j] * scale]
-	cachev[i+31:i] := addr[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name="vgatherpf0dps" form="m512 {k}" xed=""/>
-	<instruction name="vgatherpf1dps" form="m512 {k}" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32loscatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void*"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512d"/>
-	<parameter varname="scale" type="int"/>
-	<description>Stores 8 packed double-precision (64-bit) floating-point elements in "v1" and to memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale".</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	addr[i+63:i] := v1[k+63:j]
-ENDFOR
-	</operation>
-	<instruction name="vscatterdpd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32loscatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512d"/>
-	<parameter varname="scale" type="int"/>
-	<description>Stores 8 packed double-precision (64-bit) floating-point elements in "v1" to memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory.</description>
-	<operation>
-FOR j := 0 to 7
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		i := j*64
-		addr[i+63:i] := v1[k+63:j]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vscatterdpd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_i32loscatter_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void*"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<description>Stores 8 packed 64-bit integer elements located in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale".</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	addr[i+63:i] := v1[k+63:j]
-ENDFOR
-	</operation>
-	<instruction name="vpscatterdq" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i32loscatter_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<description>Stores 8 packed 64-bit integer elements located in "v1" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale" using writemask "k" (elements whose corresponding mask bit is not set are not written to memory).</description>
-	<operation>
-FOR j := 0 to 7
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		addr[i+63:i] := v1[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vpscatterdq" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_prefetch_i32scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void*"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetches 16 single-precision (32-bit) floating-point elements in memory starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	addr := MEM[mv + index[j] * scale]
-	CASE hint OF
-	_MM_HINT_T0: PrefetchL1WithT0Hint(addr[i+31:i])
-	_MM_HINT_T1: PrefetchL2WithT1Hint(addr[i+31:i])
-	_MM_HINT_T2: PrefetchL2WithT1HintNonTemporal(addr[i+31:i])
-	_MM_HINT_NTA: PrefetchL1WithT0HintNonTemporal(addr[i+31:i])
-	ESAC
-ENDFOR
-	</operation>
-	<instruction name="vscatterpf0dps" form="m512 {k}" xed=""/>
-	<instruction name="vscatterpf1dps" form="m512 {k}" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_prefetch_i32scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512PF/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Prefetches 16 single-precision (32-bit) floating-point elements in memory starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. Only those elements whose corresponding mask bit in "k" is set are loaded into the desired cache.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		CASE hint OF
-		_MM_HINT_T0: PrefetchL1WithT0Hint(addr[i+31:i])
-		_MM_HINT_T1: PrefetchL2WithT1Hint(addr[i+31:i])
-		_MM_HINT_T2: PrefetchL2WithT1HintNonTemporal(addr[i+31:i])
-		_MM_HINT_NTA: PrefetchL1WithT0HintNonTemporal(addr[i+31:i])
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vscatterpf0dps" form="m512 {k}" xed=""/>
-	<instruction name="vscatterpf1dps" form="m512 {k}" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_loadunpackhi_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64 and expands them into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src".</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 15
-	IF foundNext64BytesBoundary == false
-		IF (addr + (loadOffset + 1)*4 % 64) == 0
-			foundNext64BytesBoundary := true
-		FI
-	ELSE
-		i := j*32
-		tmp := MEM[addr + loadOffset*4]
-		dst[i+31:i] := tmp[i+31:i]
-	FI
-	loadOffset := loadOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_loadunpackhi_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mt" type="void const *"/>
-	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64 and expands them into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 15
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF (addr + (loadOffset + 1)*4 % 64) == 0
-				foundNext64BytesBoundary := true
-			FI
-		ELSE
-			i := j*32
-			tmp := MEM[addr + loadOffset*4]
-			dst[i+31:i] := tmp[i+31:i]
-		FI
-		loadOffset := loadOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_loadunpacklo_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src".</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-addr = mt
-FOR j := 0 to 15
-	i := j*32
-	tmp := MEM[addr + loadOffset*4]
-	dst[i+31:i] := tmp[i+31:i]
-	loadOffset := loadOffset + 1
-	IF (mt + loadOffset * 4) % 64 == 0
-		break
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackld" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_loadunpacklo_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt and expands them into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-addr = mt
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		tmp := MEM[addr + loadOffset*4]
-		dst[i+31:i] := tmp[i+31:i]
-		loadOffset := loadOffset + 1
-		IF (mt + loadOffset * 4) % 64 == 0
-			break
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackld" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_loadunpackhi_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64 and expands them into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 7
-	IF foundNext64BytesBoundary == false
-		IF (addr + (loadOffset + 1)*8) == 0
-			foundNext64BytesBoundary := true
-		FI
-	ELSE
-		i := j*64
-		tmp := MEM[addr + loadOffset*8]
-		dst[i+63:i] := tmp[i+63:i]
-	FI
-	loadOffset := loadOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_loadunpackhi_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64 and expands them into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 7
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF (addr + (loadOffset + 1)*8) == 0
-				foundNext64BytesBoundary := true
-			FI
-		ELSE
-			i := j*64
-			tmp := MEM[addr + loadOffset*8]
-			dst[i+63:i] := tmp[i+63:i]
-		FI
-		loadOffset := loadOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_loadunpacklo_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt and expands them into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-addr = mt
-FOR j := 0 to 7
-	i := j*64
-	tmp := MEM[addr + loadOffset*8]
-	dst[i+63:i] := tmp[i+63:i]
-	loadOffset := loadOffset + 1
-	IF (addr + loadOffset*8 % 64) == 0
-		break
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpacklq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_loadunpacklo_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt and expands them into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-addr = mt
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		tmp := MEM[addr + loadOffset*8]
-		dst[i+63:i] := tmp[i+63:i]
-		loadOffset := loadOffset + 1
-		IF (addr + loadOffset*8 % 64) == 0
-			break
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpacklq" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_loadunpackhi_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64 and expands them into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 15
-	IF foundNext64BytesBoundary == false
-		IF (addr + (loadOffset + 1)*4 % 64) == 0
-			foundNext64BytesBoundary := true
-		FI
-	ELSE
-		i := j*32
-		tmp := MEM[addr + loadOffset*4]
-		dst[i+31:i] := tmp[i+31:i]
-	FI
-	loadOffset := loadOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_loadunpackhi_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the high-64-byte-aligned portion of the doubleword stream starting at element-aligned address mt-64 and expands them into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 15
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF (addr + (loadOffset + 1)*4 % 64) == 0
-				foundNext64BytesBoundary := true
-			FI
-		ELSE
-			i := j*32
-			tmp := MEM[addr + loadOffset*4]
-			dst[i+31:i] := tmp[i+31:i]
-		FI
-		loadOffset := loadOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_loadunpacklo_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the low-64-byte-aligned portion of the doubleword stream starting at element-aligned address mt and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src".</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-addr = mt
-FOR j := 0 to 15
-	i := j*32
-	tmp := MEM[addr + loadOffset*4]
-	dst[i+31:i] := tmp[i+31:i]
-	loadOffset := loadOffset + 1
-	IF (mt + loadOffset * 4) % 64 == 0
-		BREAK
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpacklps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_loadunpacklo_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the low-64-byte-aligned portion of the doubleword stream starting at element-aligned address mt and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-addr = mt
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		tmp := MEM[addr + loadOffset*4]
-		dst[i+31:i] := tmp[i+31:i]
-		loadOffset := loadOffset + 1
-		IF (mt + loadOffset * 4) % 64 == 0
-			break
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpacklps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_loadunpackhi_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64 and expands them into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 7
-	IF foundNext64BytesBoundary == false
-		IF (addr + (loadOffset + 1)*8) % 64 == 0
-			foundNext64BytesBoundary := true
-		FI
-	ELSE
-		i := j*64
-		tmp := MEM[addr + loadOffset*8]
-		dst[i+63:i] := tmp[i+63:i]
-	FI
-	loadOffset := loadOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhpd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_loadunpackhi_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64 and expands them into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 7
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF (addr + (loadOffset + 1)*8) % 64 == 0
-				foundNext64BytesBoundary := true
-			FI
-		ELSE
-			i := j*64
-			tmp := MEM[addr + loadOffset*8]
-			dst[i+63:i] := tmp[i+63:i]
-		FI
-		loadOffset := loadOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpackhpd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_loadunpacklo_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt and expands them into packed double-precision (64-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-addr = mt
-FOR j := 0 to 7
-	i := j*64
-	tmp := MEM[addr + loadOffset*8]
-	dst[i+63:i] := tmp[i+63:i]
-	loadOffset := loadOffset + 1
-	IF ((addr + 8*loadOffset) % 64) == 0
-		BREAK
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpacklpd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_loadunpacklo_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mt" type="void const*"/>
-	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt and expands them into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-dst[511:0] := src[511:0]
-loadOffset := 0
-addr = mt
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		tmp := MEM[addr + loadOffset*8]
-		dst[i+63:i] := tmp[i+63:i]
-		loadOffset := loadOffset + 1
-		IF ((addr + 8*loadOffset) % 64) == 0
-			BREAK
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vloadunpacklpd" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_packstorehi_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="v1" type="__m512i"/>
-	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
-	<operation>
-storeOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 15
-	IF foundNext64BytesBoundary == false
-		IF ((addr + (storeOffset + 1)*4) % 64) == 0
-			foundNext64BytesBoundary = true
-		FI
-	ELSE
-		i := j*32
-		MEM[addr + storeOffset*4] := v1[i+31:i]
-	FI
-	storeOffset := storeOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorehi_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v1" type="__m512i"/>
-	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-storeOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 15
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF ((addr + (storeOffset + 1)*4) % 64) == 0
-				foundNext64BytesBoundary = true
-			FI
-		ELSE
-			i := j*32
-			MEM[addr + storeOffset*4] := v1[i+31:i]
-		FI
-		storeOffset := storeOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_packstorelo_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="v1" type="__m512i"/>
-	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
-	<operation>
-storeOffset := 0
-addr = mt
-FOR j := 0 to 15
-	i := j*32
-	MEM[addr + storeOffset*4] := v1[i+31:i]
-	storeOffset := storeOffset + 1
-	IF ((addr + storeOffset*4) % 64) == 0
-		BREAK
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstoreld" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorelo_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v1" type="__m512i"/>
-	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-storeOffset := 0
-addr = mt
-FOR j := 0 to 15
-	IF k[j]
-		i := j*32
-		MEM[addr + storeOffset*4] := v1[i+31:i]
-		storeOffset := storeOffset + 1
-		IF ((addr + storeOffset*4) % 64) == 0
-			BREAK
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstoreld" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_packstorehi_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="v1" type="__m512i"/>
-	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
-	<operation>
-storeOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 7
-	IF foundNext64BytesBoundary == false
-		IF ((addr + (storeOffset + 1)*8) % 64) == 0
-			foundNext64BytesBoundary = true
-		FI
-	ELSE
-		i := j*64
-		MEM[addr + storeOffset*8] := v1[i+63:i]
-	FI
-	storeOffset := storeOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehq" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorehi_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v1" type="__m512i"/>
-	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-storeOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 7
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF ((addr + (storeOffset + 1)*8) % 64) == 0
-				foundNext64BytesBoundary = true
-			FI
-		ELSE
-			i := j*64
-			MEM[addr + storeOffset*8] := v1[i+63:i]
-		FI
-		storeOffset := storeOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehq" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_packstorelo_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="v1" type="__m512i"/>
-	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
-	<operation>
-storeOffset := 0
-addr = mt
-FOR j := 0 to 7
-	i := j*64
-	MEM[addr + storeOffset*8] := v1[i+63:i]
-	storeOffset := storeOffset + 1
-	IF ((addr + storeOffset*8) % 64) == 0
-		BREAK
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorelq" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorelo_epi64">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v1" type="__m512i"/>
-	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-storeOffset := 0
-addr = mt
-FOR j := 0 to 7
-	IF k[j]
-		i := j*64
-		MEM[addr + storeOffset*8] := v1[i+63:i]
-		storeOffset := storeOffset + 1
-		IF ((addr + storeOffset*8) % 64) == 0
-			BREAK
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorelq" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_packstorehi_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="v1" type="__m512"/>
-	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
-	<operation>
-storeOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 15
-	IF foundNext64BytesBoundary == false
-		IF ((addr + (storeOffset + 1)*4) % 64) == 0
-			foundNext64BytesBoundary = true
-		FI
-	ELSE
-		i := j*32
-		MEM[addr + storeOffset*4] := v1[i+31:i]
-	FI
-	storeOffset := storeOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorehi_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v1" type="__m512"/>
-	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-storeOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 15
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF ((addr + (storeOffset + 1)*4) % 64) == 0
-				foundNext64BytesBoundary = true
-			FI
-		ELSE
-			i := j*32
-			MEM[addr + storeOffset*4] := v1[i+31:i]
-		FI
-		storeOffset := storeOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_packstorelo_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="v1" type="__m512"/>
-	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
-	<operation>
-storeOffset := 0
-addr = mt
-FOR j := 0 to 15
-	i := j*32
-	MEM[addr + storeOffset*4] := v1[i+31:i]
-	storeOffset := storeOffset + 1
-	IF ((addr + storeOffset*4) % 64) == 0
-		BREAK
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorelps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorelo_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v1" type="__m512"/>
-	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-storeOffset := 0
-addr = mt
-FOR j := 0 to 15
-	IF k[j]
-		i := j*32
-		MEM[addr + storeOffset*4] := v1[i+31:i]
-		storeOffset := storeOffset + 1
-		IF ((addr + storeOffset*4) % 64) == 0
-			BREAK
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorelps" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_packstorehi_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="v1" type="__m512d"/>
-	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
-	<operation>
-storeOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 7
-	IF foundNext64BytesBoundary == false
-		IF ((addr + (storeOffset + 1)*8) % 64) == 0
-			foundNext64BytesBoundary = true
-		FI
-	ELSE
-		i := j*64
-		MEM[addr + storeOffset*4] := v1[i+63:i]
-	FI
-	storeOffset := storeOffset + 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehpd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorehi_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v1" type="__m512d"/>
-	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-storeOffset := 0
-foundNext64BytesBoundary := false
-addr = mt-64
-FOR j := 0 to 7
-	IF k[j]
-		IF foundNext64BytesBoundary == false
-			IF ((addr + (storeOffset + 1)*8) % 64) == 0
-				foundNext64BytesBoundary = true
-			FI
-		ELSE
-			i := j*64
-			MEM[addr + storeOffset*4] := v1[i+63:i]
-		FI
-		storeOffset := storeOffset + 1
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorehpd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_packstorelo_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="v1" type="__m512d"/>
-	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
-	<operation>
-storeOffset := 0
-addr = mt
-FOR j := 0 to 7
-	i := j*64
-	MEM[addr + storeOffset*8] := v1[i+63:i]
-	storeOffset := storeOffset + 1
-	IF ((addr + storeOffset*8) % 64) == 0
-		BREAK
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorelpd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_packstorelo_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mt" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v1" type="__m512d"/>
-	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
-	<operation>
-storeOffset := 0
-addr = mt
-FOR j := 0 to 7
-	IF k[j]
-		i := j*64
-		MEM[addr + storeOffset*8] := v1[i+63:i]
-		storeOffset := storeOffset + 1
-		IF ((addr + storeOffset*8) % 64) == 0
-			BREAK
-		FI
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackstorelpd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="unsigned int" name="_mm_countbits_32">
-	<CPUID>KNCNI</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="r1" type="unsigned int"/>
-	<description>Counts the number of set bits in 32-bit unsigned integer "r1", returning the results in "dst".</description>
-	<operation>
-dst[31:0] := PopCount(r1[31:0])
-	</operation>
-	<instruction name="popcnt" form="r32, r32" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="unsigned __int64" name="_mm_countbits_64">
-	<CPUID>KNCNI</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="r1" type="unsigned __int64"/>
-	<description>Counts the number of set bits in 64-bit unsigned integer "r1", returning the results in "dst".</description>
-	<operation>
-dst[63:0] := PopCount(r1[63:0])
-	</operation>
-	<instruction name="popcnt" form="r64, r64" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kmovlhb">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<description>Inserts the low byte of mask "k2" into the high byte of "dst", and copies the low byte of "k1" to the low byte of "dst".</description>
-	<operation>
-dst[7:0] := k1[7:0]
-dst[15:8] := k2[7:0]
-	</operation>
-	<instruction name="kmerge2l1l" form="k, k" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_cvtfxpnt_roundpd_epi32lo">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs an element-by-element conversion of elements in packed double-precision (64-bit) floating-point vector "v2" to 32-bit integer elements, storing them in the lower half of "dst". The elements in the upper half of "dst" are set to 0.
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k := j*32
-	dst[k+31:k] := Float64ToInt32(v2[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtfxpntpd2dq" form="zmm {k}, m512, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_cvtfxpnt_roundpd_epi32lo">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v2" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Performs an element-by-element conversion of elements in packed double-precision (64-bit) floating-point vector "v2" to 32-bit integer elements, storing them in the lower half of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The elements in the upper half of "dst" are set to 0.
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := Float64ToInt32(v2[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtfxpntpd2dq" form="zmm {k}, m512, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_cvtfxpnt_round_adjustepi32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="v2" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
-	<description>Performs element-by-element conversion of packed 32-bit integer elements in "v2" to packed single-precision (32-bit) floating-point elements and performing an optional exponent adjust using "expadj", storing the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := Int32ToFloat32(v2[i+31:i])
-	CASE expadj OF
-	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
-	_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
-	_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
-	_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
-	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
-	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
-	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
-	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtfxpntdq2ps" form="zmm {k}, m512, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_abs_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512"/>
-	<description>Finds the absolute value of each packed single-precision (32-bit) floating-point element in "v2", storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ABS(v2[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpandd" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-
-<intrinsic tech="AVX-512/KNC" rettype="__m512" name="_mm512_mask_abs_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="v2" type="__m512"/>
-	<description>Finds the absolute value of each packed single-precision (32-bit) floating-point element in "v2", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ABS(v2[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpandd" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_abs_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="v2" type="__m512d"/>
-	<description>Finds the absolute value of each packed double-precision (64-bit) floating-point element in "v2", storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ABS(v2[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpandq" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-
-<intrinsic tech="AVX-512/KNC" rettype="__m512d" name="_mm512_mask_abs_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="v2" type="__m512d"/>
-	<description>Finds the absolute value of each packed double-precision (64-bit) floating-point element in "v2", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ABS(v2[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpandq" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_log2ae23_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a" with absolute error of 2^(-23) and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := Log2ae23(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vlog2ps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_log2ae23_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a" with absolute error of 2^(-23) and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := Log2ae23(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vlog2ps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_fmadd_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="c" type="__m512i"/>
-	<description>Multiply packed 32-bit integer elements in "a" and "b", add the intermediate result to packed elements in "c" and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmadd231d" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_fmadd_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="c" type="__m512i"/>
-	<description>Multiply packed 32-bit integer elements in "a" and "b", add the intermediate result to packed elements in "c" and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmadd231d" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask3_fmadd_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="c" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<description>Multiply packed 32-bit integer elements in "a" and "b", add the intermediate result to packed elements in "c" and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmadd231d" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_fmadd233_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply packed 32-bit integer elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	base := (j &amp; ~0x3) * 32
-	scale[31:0] := b[base+63:base+32]
-	bias[31:0]  := b[base+31:base]
-	dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmadd233d" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_fmadd233_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply packed 32-bit integer elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		base := (j &amp; ~0x3) * 32
-		scale[31:0] := b[base+63:base+32]
-		bias[31:0]  := b[base+31:base]
-		dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmadd233d" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_fmadd233_round_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	base := (j &amp; ~0x3) * 32
-	scale[31:0] := b[base+63:base+32]
-	bias[31:0]  := b[base+31:base]
-	dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vfmadd233ps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_fmadd233_round_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		base := (j &amp; ~0x3) * 32
-		scale[31:0] := b[base+63:base+32]
-		bias[31:0]  := b[base+31:base]
-		dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vfmadd233ps" form="zmm {k}, zmm, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_maxabs_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Determines the maximum of the absolute elements of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := FpMax(Abs(a[i+31:i]), Abs(b[i+31:i]))
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgmaxabsps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_maxabs_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Determines the maximum of the absolute elements of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FpMax(Abs(a[i+31:i]), Abs(b[i+31:i]))
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgmaxabsps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_gmax_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Determines the maximum of each pair of corresponding elements in packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := FpMax(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgmaxps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_gmax_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Determines the maximum of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FpMax(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgmaxps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_gmaxabs_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Determines the maximum of the absolute elements of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := FpMax(Abs(a[i+31:i]), Abs(b[i+31:i]))
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgmaxabsps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_gmaxabs_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Determines the maximum of the absolute elements of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FpMax(Abs(a[i+31:i]), Abs(b[i+31:i]))
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgmaxabsps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_gmax_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Determines the maximum of each pair of corresponding elements in packed double-precision (64-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := FpMax(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgmaxpd" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_gmax_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Determines the maximum of each pair of corresponding elements of packed double-precision (64-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := FpMax(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgmaxpd" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_gmin_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Determines the minimum of each pair of corresponding elements in packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := FpMin(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgminps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_gmin_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Determines the maximum of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FpMin(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgminps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_gmin_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Determines the minimum of each pair of corresponding elements in packed double-precision (64-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := FpMin(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgminpd" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_gmin_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Determines the maximum of each pair of corresponding elements of packed double-precision (64-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := FpMin(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vgminpd" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mulhi_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Performs element-by-element multiplication between packed 32-bit integer elements in "a" and "b" and stores the high 32 bits of each result into "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) &gt;&gt; 32
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulhd" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_mulhi_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Performs element-by-element multiplication between packed 32-bit integer elements in "a" and "b" and stores the high 32 bits of each result into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) &gt;&gt; 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulhd" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mulhi_epu32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Performs element-by-element multiplication between packed unsigned 32-bit integer elements in "a" and "b" and stores the high 32 bits of each result into "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) &gt;&gt; 32
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulhud" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_mulhi_epu32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Performs element-by-element multiplication between packed unsigned 32-bit integer elements in "a" and "b" and stores the high 32 bits of each result into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) &gt;&gt; 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulhud" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_permute4f128_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
-	<description>Permutes 128-bit blocks of the packed 32-bit integer vector "a" using constant "imm8". The results are stored in "dst".</description>
-	<operation>
-SELECT4(src, control) {
-	CASE control[1:0] OF
-	0: tmp[127:0] := src[127:0]
-	1: tmp[127:0] := src[255:128]
-	2: tmp[127:0] := src[383:256]
-	3: tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-FOR j := 0 to 3
-	i := j*128
-	n := j*2
-	dst[i+127:i] := SELECT4(a[511:0], imm8[n+1:n])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermf32x4" form="zmm {k}, m512, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_permute4f128_epi32">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
-	<description>Permutes 128-bit blocks of the packed 32-bit integer vector "a" using constant "imm8". The results are stored in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control) {
-	CASE control[1:0] OF
-	0: tmp[127:0] := src[127:0]
-	1: tmp[127:0] := src[255:128]
-	2: tmp[127:0] := src[383:256]
-	3: tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp[511:0] := 0
-FOR j := 0 to 4
-	i := j*128
-	n := j*2
-	tmp[i+127:i] := SELECT4(a[511:0], imm8[n+1:n])
-ENDFOR
-FOR j := 0 to 15
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermf32x4" form="zmm {k}, m512, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_rcp23_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Approximates the reciprocals of packed single-precision (32-bit) floating-point elements in "a" to 23 bits of precision, storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrcp23ps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_rcp23_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Approximates the reciprocals of packed single-precision (32-bit) floating-point elements in "a" to 23 bits of precision, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrcp23ps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_round_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value using "expadj" and in the direction of "rounding", and store the results as packed single-precision floating-point elements in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ROUND(a[i+31:i])
-	CASE expadj OF
-	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
-	_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
-	_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
-	_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
-	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
-	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
-	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
-	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vroundps" form="zmm {k}, m512, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_round_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value using "expadj" and in the direction of "rounding", and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ROUND(a[i+31:i])
-		CASE expadj OF
-		_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
-		_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
-		_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
-		_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
-		_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
-		_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
-		_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
-		_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
-		ESAC
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vroundps" form="zmm {k}, m512, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_roundfxpnt_adjust_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
-	<description>Performs element-by-element rounding of packed single-precision (32-bit) floating-point elements in "a" using "expadj" and in the direction of "rounding" and stores results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ROUND(a[i+31:i])
-	CASE expadj OF
-	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
-	_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
-	_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
-	_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
-	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
-	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
-	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
-	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrndfxpntps" form="zmm {k}, m512, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_roundfxpnt_adjust_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="rounding" type="int"/>
-	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
-	<description>Performs element-by-element rounding of packed single-precision (32-bit) floating-point elements in "a" using "expadj" and in the direction of "rounding" and stores results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ROUND(a[i+31:i])
-		CASE expadj OF
-		_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
-		_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
-		_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
-		_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
-		_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
-		_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
-		_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
-		_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
-		ESAC
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrndfxpntps" form="zmm {k}, m512, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_roundfxpnt_adjust_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
-	<description>Performs element-by-element rounding of packed double-precision (64-bit) floating-point elements in "a" using "expadj" and in the direction of "rounding" and stores results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ROUND(a[i+63:i])
-	CASE expadj OF
-	_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
-	_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
-	_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
-	_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
-	_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
-	_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
-	_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
-	_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrndfxpntpd" form="zmm {k}, m512, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_roundfxpnt_adjust_pd">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<parameter varname="expadj" type="_MM_EXP_ADJ_ENUM"/>
-	<description>Performs element-by-element rounding of packed double-precision (64-bit) floating-point elements in "a" using "expadj" and in the direction of "rounding" and stores results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ROUND(a[i+63:i])
-		CASE expadj OF
-		_MM_EXPADJ_NONE: dst[i+31:i] = dst[i+31:i] * 2**0
-		_MM_EXPADJ_4:	dst[i+31:i] = dst[i+31:i] * 2**4
-		_MM_EXPADJ_5:	dst[i+31:i] = dst[i+31:i] * 2**5
-		_MM_EXPADJ_8:	dst[i+31:i] = dst[i+31:i] * 2**8
-		_MM_EXPADJ_16:   dst[i+31:i] = dst[i+31:i] * 2**16
-		_MM_EXPADJ_24:   dst[i+31:i] = dst[i+31:i] * 2**24
-		_MM_EXPADJ_31:   dst[i+31:i] = dst[i+31:i] * 2**31
-		_MM_EXPADJ_32:   dst[i+31:i] = dst[i+31:i] * 2**32
-		ESAC
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrndfxpntpd" form="zmm {k}, m512, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_rsqrt23_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Calculates the reciprocal square root of packed single-precision (32-bit) floating-point elements in "a" to 23 bits of accuracy and stores the result in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := Sqrt(1.0 / a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrsqrt23ps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_rsqrt23_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Calculates the reciprocal square root of packed single-precision (32-bit) floating-point elements in "a" to 23 bits of accuracy and stores the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := Sqrt(1.0 / a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrsqrt23ps" form="zmm {k}, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_scale_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Scales each single-precision (32-bit) floating-point element in "a" by multiplying it by 2**exponent, where the exponent is the corresponding 32-bit integer element in "b", storing results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] * Pow(2, b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vscaleps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_scale_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Scales each single-precision (32-bit) floating-point element in "a" by multiplying it by 2**exponent, where the exponenet is the corresponding 32-bit integer element in "b", storing results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] * Pow(2, b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vscaleps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_scale_round_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scales each single-precision (32-bit) floating-point element in "a" by multiplying it by 2**exponent, where the exponenet is the corresponding 32-bit integer element in "b", storing results in "dst". Intermediate elements are rounded using "rounding".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] * Pow(2, b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vscaleps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_scale_round_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Scales each single-precision (32-bit) floating-point element in "a" by multiplying it by 2**exp, where the exp is the corresponding 32-bit integer element in "b", storing results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Results are rounded using constant "rounding".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] * Pow(2, b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vscaleps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_acos_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ACOS(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_acos_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ACOS(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_acos_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ACOS(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_acos_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ACOS(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_acosh_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ACOSH(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_acosh_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ACOSH(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_acosh_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ACOSH(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_acosh_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ACOSH(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_asin_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ASIN(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_asin_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ASIN(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_asin_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ASIN(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_asin_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ASIN(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_asinh_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ASINH(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_asinh_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ASINH(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_asinh_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ASINH(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_asinh_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ASINH(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_atan2_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ATAN(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_atan2_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ATAN(a[i+63:i] / b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_atan2_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ATAN(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_atan2_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ATAN(a[i+31:i] / b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_atan_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" and store the results in "dst" expressed in radians.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ATAN(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_atan_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ATAN(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_atan_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ATAN(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_atan_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ATAN(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_atanh_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" and store the results in "dst" expressed in radians.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ATANH(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_atanh_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ATANH(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_atanh_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse hyperblic tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ATANH(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_atanh_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ATANH(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_cbrt_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := CubeRoot(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_cbrt_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := CubeRoot(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_cbrt_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := CubeRoot(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_cbrt_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := CubeRoot(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_cdfnorm_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := CDFNormal(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_cdfnorm_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := CDFNormal(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_cdfnorm_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := CDFNormal(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_cdfnorm_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := CDFNormal(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_cdfnorminv_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := InverseCDFNormal(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_cdfnorminv_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := InverseCDFNormal(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_cdfnorminv_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := InverseCDFNormal(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_cdfnorminv_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := InverseCDFNormal(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_ceil_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := CEIL(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_ceil_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := CEIL(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_ceil_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := CEIL(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_ceil_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := CEIL(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_cos_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := COS(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_cos_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := COS(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_cos_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := COS(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_cos_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := COS(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_cosd_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := COSD(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_cosd_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := COSD(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_cosd_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := COSD(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_cosd_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := COSD(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_cosh_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := COSH(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_cosh_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := COSH(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_cosh_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := COSH(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_cosh_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := COSH(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_erf_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ERF(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_erf_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ERF(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_erfc_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := 1.0 - ERF(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_erfc_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := 1.0 - ERF(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_erf_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ERF(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_erf_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ERF(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_erfc_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := 1.0 - ERF(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_erfc_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := 1.0 - ERF(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_erfinv_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := 1.0 / ERF(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_erfinv_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := 1.0 / ERF(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_erfinv_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := 1.0 / ERF(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_erfinv_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := 1.0 / ERF(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_erfcinv_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i]))
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_erfcinv_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i]))
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_erfcinv_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := 1.0 / (1.0 - ERF(a[i+31:i]))
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_erfcinv_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Probability/Statistics</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := 1.0 / (1.0 - ERF(a[i+31:i]))
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_exp10_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := 10^(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_exp10_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := 10^(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_exp10_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := 10^(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_exp10_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := 10^(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_exp2_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := 2^(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_exp2_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := 2^(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_exp2_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := 2^(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_exp2_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := 2^(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_exp_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := e^(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_exp_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := e^(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_exp_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := e^(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_exp_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := e^(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_expm1_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := e^(a[i+63:i]) - 1.0
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_expm1_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := e^(a[i+63:i]) - 1.0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_expm1_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := e^(a[i+31:i]) - 1.0
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_expm1_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := e^(a[i+31:i]) - 1.0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_floor_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := FLOOR(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_floor_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := FLOOR(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_floor_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := FLOOR(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_floor_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FLOOR(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_hypot_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := SQRT(a[i+63:i]^2 + b[i+63:i]^2)
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_hypot_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SQRT(a[i+63:i]^2 + b[i+63:i]^2)
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_hypot_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := SQRT(a[i+31:i]^2 + b[i+31:i]^2)
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_hypot_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SQRT(a[i+31:i]^2 + b[i+31:i]^2)
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epi32" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_mask_div_epi32" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epi8" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := 8*j
-	dst[i+7:i] := TRUNCATE(a[i+7:i] / b[i+7:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epi16" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	dst[i+15:i] := TRUNCATE(a[i+15:i] / b[i+15:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epi64" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	dst[i+63:i] := TRUNCATE(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_invsqrt_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := InvSQRT(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_invsqrt_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := InvSQRT(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_invsqrt_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := InvSQRT(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_invsqrt_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := InvSQRT(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epi32" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_mask_rem_epi32" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epi8" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := 8*j
-	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epi16" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epi64" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_log10_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := log10(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_log10_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := log10(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_log10_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := log10(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_log10_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := log10(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_log1p_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ln(1.0 + a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_log1p_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ln(1.0 + a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_log1p_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ln(1.0 + a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_log1p_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ln(1.0 + a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_log2_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := log2(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_log2_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := log2(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_log2_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := log2(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-	<instruction name="vlog2ps" form="zmm {k}, zmm" xed=""/>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_log2_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := log2(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vlog2ps" form="zmm {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_log_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ln(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_log_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ln(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_log_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ln(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_log_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ln(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_logb_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_logb_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_logb_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_logb_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_nearbyint_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Rounds each packed double-precision (64-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := NearbyInt(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_nearbyint_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Rounds each packed double-precision (64-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := NearbyInt(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_nearbyint_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Rounds each packed single-precision (32-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := NearbyInt(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_nearbyint_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Rounds each packed single-precision (32-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := NearbyInt(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_pow_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := (a[i+63:i])^(b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_pow_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i])^(b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_pow_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := (a[i+31:i])^(b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_pow_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i])^(b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_recip_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Computes the reciprocal of packed double-precision (64-bit) floating-point elements in "a", storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := (1 / a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_recip_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Computes the reciprocal of packed double-precision (64-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (1 / a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_recip_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Computes the reciprocal of packed single-precision (32-bit) floating-point elements in "a", storing the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := (1 / a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_recip_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Elementary Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Computes the reciprocal of packed single-precision (32-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (1 / a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_rint_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Rounds the packed double-precision (64-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := RoundToNearestEven(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_rint_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Rounds the packed double-precision (64-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RoundToNearestEven(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_rint_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Rounds the packed single-precision (32-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := RoundToNearestEven(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_rint_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Rounds the packed single-precision (32-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RoundToNearestEven(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_svml_round_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ROUND(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_svml_round_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ROUND(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i] 
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_sin_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := SIN(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_sin_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SIN(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_sin_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := SIN(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_sin_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SIN(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_sinh_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := SINH(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_sinh_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SINH(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_sinh_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := SINH(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_sinh_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SINH(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_sind_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := SIND(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_sind_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SIND(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_sind_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := SIND(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_sind_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SIND(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_tan_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := TAN(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_tan_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := TAN(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_tan_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := TAN(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_tan_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := TAN(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_tand_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := TAND(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_tand_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := TAND(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_tand_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := TAND(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_tand_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := TAND(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_tanh_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := TANH(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_tanh_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := TANH(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_tanh_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := TANH(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_tanh_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := TANH(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_trunc_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := TRUNCATE(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_trunc_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := TRUNCATE(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_trunc_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := TRUNCATE(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_trunc_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := TRUNCATE(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epu32" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_mask_div_epu32" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epu8" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := 8*j
-	dst[i+7:i] := TRUNCATE(a[i+7:i] / b[i+7:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epu16" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	dst[i+15:i] := TRUNCATE(a[i+15:i] / b[i+15:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_div_epu64" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	dst[i+63:i] := TRUNCATE(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epu32" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_mask_rem_epu32" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epu8" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := 8*j
-	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epu16" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512i" name="_mm512_rem_epu64" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 64*j
-	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="float" name="_mm512_reduce_gmin_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Determines the minimum element of the packed single-precision (32-bit) floating-point elements stored in "a" and stores the result in "dst".</description>
-	<operation>
-min = a[31:0]
-FOR j := 1 to 15
-	i := j*32
-	dst = FpMin(min, a[i+31:i])
-ENDFOR
-dst := min
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="float" name="_mm512_mask_reduce_gmin_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Determines the minimum element of the packed single-precision (32-bit) floating-point elements stored in "a" and stores the result in "dst" using writemask "k" (elements are ignored when the corresponding mask bit is not set).</description>
-	<operation>
-min = a[31:0]
-FOR j := 1 to 15
-	i := j*32
-	IF k[j]
-		CONTINUE
-	ELSE
-		dst = FpMin(min, a[i+31:i])
-	FI
-ENDFOR
-dst := min
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="double" name="_mm512_reduce_gmin_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Determines the minimum element of the packed double-precision (64-bit) floating-point elements stored in "a" and stores the result in "dst".</description>
-	<operation>
-min = a[63:0]
-FOR j := 1 to 7
-	i := j*64
-	dst = FpMin(min, a[i+63:i])
-ENDFOR
-dst := min
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="double" name="_mm512_mask_reduce_gmin_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Determines the minimum element of the packed double-precision (64-bit) floating-point elements stored in "a" and stores the result in "dst". Bitmask "k" is used to exclude certain elements (elements are ignored when the corresponding mask bit is not set).</description>
-	<operation>
-min = a[63:0]
-FOR j := 1 to 7
-	i := j*64
-	IF k[j]
-		CONTINUE
-	ELSE
-		dst = FpMin(min, a[i+63:i])
-	FI
-ENDFOR
-dst := min
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="float" name="_mm512_reduce_gmax_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512"/>
-	<description>Determines the maximum element of the packed single-precision (32-bit) floating-point elements stored in "a" and stores the result in "dst".</description>
-	<operation>
-max = a[31:0]
-FOR j := 1 to 15
-	i := j*32
-	dst = FpMax(max, a[i+31:i])
-ENDFOR
-dst := max
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="float" name="_mm512_mask_reduce_gmax_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Determines the maximum element of the packed single-precision (32-bit) floating-point elements stored in "a" and stores the result in "dst". Bitmask "k" is used to exclude certain elements (elements are ignored when the corresponding mask bit is not set).</description>
-	<operation>
-max = a[31:0]
-FOR j := 1 to 15
-	i := j*32
-	IF k[j]
-		CONTINUE
-	ELSE
-		dst = FpMax(max, a[i+31:i])
-	FI
-ENDFOR
-dst := max
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="double" name="_mm512_reduce_gmax_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Determines the maximum element of the packed double-precision (64-bit) floating-point elements stored in "a" and stores the result in "dst".</description>
-	<operation>
-max = a[63:0]
-FOR j := 1 to 7
-	i := j*64
-	dst = FpMax(max, a[i+63:i])
-ENDFOR
-dst := max
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="double" name="_mm512_mask_reduce_gmax_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Special Math Functions</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Determines the maximum element of the packed double-precision (64-bit) floating-point elements stored in "a" and stores the result in "dst". Bitmask "k" is used to exclude certain elements (elements are ignored when the corresponding mask bit is not set).</description>
-	<operation>
-max = a[63:0]
-FOR j := 1 to 7
-	i := j*64
-	IF k[j]
-		CONTINUE
-	ELSE
-		dst = FpMax(max, a[i+63:i])
-	FI
-ENDFOR
-dst := max
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="int" name="_mm_tzcnti_32">
-	<CPUID>KNCNI</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="a" type="int"/>
-	<parameter varname="x" type="unsigned int"/>
-	<description>Counts the number of trailing bits in unsigned 32-bit integer "x" starting at bit "a" storing the result in "dst".</description>
-	<operation>
-count := 0
-FOR j := a to 31
-	IF NOT(x[j]  1)
-		count := count + 1
-	FI
-ENDFOR
-dst := count
-	</operation>
-	<instruction name="tzcnti" form="r32, r32" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__int64" name="_mm_tzcnti_64">
-	<CPUID>KNCNI</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="a" type="__int64"/>
-	<parameter varname="x" type="unsigned __int64"/>
-	<description>Counts the number of trailing bits in unsigned 64-bit integer "x" starting at bit "a" storing the result in "dst".</description>
-	<operation>
-count := 0
-FOR j := a to 63
-	IF NOT(x[j]  1)
-		count := count + 1
-	FI
-ENDFOR
-dst := count
-	</operation>
-	<instruction name="tzcnti" form="r64, r64" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm_delay_32">
-	<CPUID>KNCNI</CPUID>
-	<category>General Support</category>
-	<parameter varname="r1" type="unsigned int"/>
-	<description>Stalls a thread without blocking other threads for 32-bit unsigned integer "r1" clock cycles.</description>
-	<operation>
-BlockThread(r1)
-	</operation>
-	<instruction name="delay" form="r32" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm_delay_64">
-	<CPUID>KNCNI</CPUID>
-	<category>General Support</category>
-	<parameter varname="r1" type="unsigned __int64"/>
-	<description>Stalls a thread without blocking other threads for 64-bit unsigned integer "r1" clock cycles.</description>
-	<operation>
-BlockThread(r1)
-	</operation>
-	<instruction name="delay" form="r64" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm_spflt_32">
-	<CPUID>KNCNI</CPUID>
-	<category>General Support</category>
-	<parameter varname="r1" type="unsigned int"/>
-	<description>Set performance monitoring filtering mask to 32-bit unsigned integer "r1".</description>
-	<operation>
-SetPerfMonMask(r1[31:0])
-	</operation>
-	<instruction name="spflt" form="r" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm_spflt_64">
-	<CPUID>KNCNI</CPUID>
-	<category>General Support</category>
-	<parameter varname="r1" type="unsigned __int64"/>
-	<description>Set performance monitoring filtering mask to 64-bit unsigned integer "r1".</description>
-	<operation>
-SetPerfMonMask(r1[63:0])
-	</operation>
-	<instruction name="spflt" form="r" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm_clevict">
-	<CPUID>KNCNI</CPUID>
-	<category>General Support</category>
-	<parameter varname="ptr" type="const void *"/>
-	<parameter varname="level" type="int"/>
-	<description>Evicts the cache line containing the address "ptr" from cache level "level" (can be either 0 or 1).</description>
-	<operation>
-CacheLineEvict(ptr, level)
-	</operation>
-	<instruction name="clevict0" form="m" xed=""/>
-	<instruction name="clevict1" form="m" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kandnr">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<description>Performs a bitwise AND operation between NOT of "k2" and "k1", storing the result in "dst".</description>
-	<operation>
-dst[15:0] := NOT(k2[15:0]) &amp; k1[15:0]
-	</operation>
-	<instruction name="kandnr" form="k, k" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kswapb">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<description>Moves high byte from "k2" to low byte of "k1", and moves low byte of "k2" to high byte of "k1".</description>
-	<operation>
-tmp[7:0] := k2[15:8]
-k2[15:8] := k1[7:0]
-k1[7:0]  := tmp[7:0]
-
-tmp[7:0] := k2[7:0]
-k2[7:0]  := k1[15:8]
-k1[15:8] := tmp[7:0]
-	</operation>
-	<instruction name="kmerge2l1h" form="k, k" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="int" name="_mm512_kortestz">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<description>Performs bitwise OR between "k1" and "k2", storing the result in "dst". ZF flag is set if "dst" is 0.</description>
-	<operation>
-dst[15:0] := k1[15:0] | k2[15:0]
-IF dst = 0
-	SetZF()
-FI
-	</operation>
-	<instruction name="kortest" form="k, k" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="int" name="_mm512_kortestc">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<description>Performs bitwise OR between "k1" and "k2", storing the result in "dst". CF flag is set if "dst" consists of all 1's.</description>
-	<operation>
-dst[15:0] := k1[15:0] | k2[15:0]
-IF PopCount(dst[15:0]) = 16
-	SetCF()
-FI
-	</operation>
-	<instruction name="kortest" form="k, k" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm512_kortestz">
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Mask</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<description>Performs bitwise OR between "k1" and "k2", storing the result in "dst". ZF flag is set if "dst" is 0.</description>
-	<operation>
-dst[15:0] := k1[15:0] | k2[15:0]
-IF dst = 0
-	SetZF()
-FI
-	</operation>
-	<instruction name="kortestw" form="k, k" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm512_kortestc">
-	<type>Mask</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Mask</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<description>Performs bitwise OR between "k1" and "k2", storing the result in "dst". CF flag is set if "dst" consists of all 1's.</description>
-	<operation>
-dst[15:0] := k1[15:0] | k2[15:0]
-IF PopCount(dst[15:0]) = 16
-	SetCF()
-FI
-	</operation>
-	<instruction name="kortestw" form="k, k" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="int" name="_mm512_mask2int">
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<description>Converts bit mask "k1" into an integer value, storing the results in "dst".</description>
-	<operation>
-dst := ZeroExtend(k1)
-	</operation>
-	<header>immintrin.h</header>
-	<instruction name="kmov" form="r32, k" xed=""/>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="int" name="_mm512_mask2int">
-	<CPUID>AVX512F</CPUID>
-	<category>Mask</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<description>Converts bit mask "k1" into an integer value, storing the results in "dst".</description>
-	<operation>
-dst := ZeroExtend(k1)
-	</operation>
-	<header>immintrin.h</header>
-	<instruction name="kmovw" form="r32, k" xed=""/>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_int2mask">
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="mask" type="int"/>
-	<description>Converts integer "mask" into bitmask, storing the result in "dst".</description>
-	<operation>
-dst := mask[15:0]
-	</operation>
-	<header>immintrin.h</header>
-	<instruction name="kmov" form="k, r32" xed=""/>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_int2mask">
-	<CPUID>AVX512F</CPUID>
-	<category>Mask</category>
-	<parameter varname="mask" type="int"/>
-	<description>Converts integer "mask" into bitmask, storing the result in "dst".</description>
-	<operation>
-dst := mask[15:0]
-	</operation>
-	<header>immintrin.h</header>
-	<instruction name="kmovw" form="k, r32" xed=""/>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__int64" name="_mm512_kconcathi_64">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<description>Packs masks "k1" and "k2" into the high 32 bits of "dst". The rest of "dst" is set to 0.</description>
-	<operation>
-dst[63:48] := k1[15:0]
-dst[47:32] := k2[15:0]
-dst[31:0]  := 0
-	</operation>
-	<instruction name="kconcath" form="r, k, k" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__int64" name="_mm512_kconcatlo_64">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<description>Packs masks "k1" and "k2" into the low 32 bits of "dst". The rest of "dst" is set to 0.</description>
-	<operation>
-dst[31:16] := k1[15:0]
-dst[15:0]  := k2[15:0]
-dst[63:32] := 0
-	</operation>
-	<instruction name="kconcatl" form="r, k, k" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kextract_64">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="a" type="__int64"/>
-	<parameter varname="b" type="const int"/>
-	<description>Extracts 16-bit value "b" from 64-bit integer "a", storing the result in "dst".</description>
-	<operation>
-CASE b of
-0: dst[15:0] := a[63:48]
-1: dst[15:0] := a[47:32]
-2: dst[15:0] := a[31:16]
-3: dst[15:0] := a[15:0]
-ESAC
-dst[MAX:15] := 0
-	</operation>
-	<instruction name="kextract" form="k, r, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_fmadd233_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	base := (j &amp; ~0x3) * 32
-	scale[31:0] := b[base+63:base+32]
-	bias[31:0]  := b[base+31:base]
-	dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vfmadd233ps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_fmadd233_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		base := (j &amp; ~0x3) * 32
-		scale[31:0] := b[base+63:base+32]
-		bias[31:0]  := b[base+31:base]
-		dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vfmadd233ps" form="zmm {k}, zmm, m512" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_i64extgather_epi32lo" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const*"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 8 single-precision (32-bit) memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to 32-bit integer elements and stores them in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	CASE conv OF
-	_MM_UPCONV_EPI32_NONE:
-		dst[i+31:i] := addr[i+31:i]
-	_MM_UPCONV_EPI32_UINT8:
-		n := j*8
-		dst[i+31:i] := UInt8ToInt32(addr[n+7:n])
-	_MM_UPCONV_EPI32_SINT8:
-		n := j*8
-		dst[i+31:i] := SInt8ToInt32(addr[n+7:n])
-	_MM_UPCONV_EPI32_UINT16:
-		n := j*16
-		dst[i+31:i] := UInt16ToInt32(addr[n+15:n])
-	_MM_UPCONV_EPI32_SINT16:
-		n := j*16
-		dst[i+31:i] := SInt16ToInt32(addr[n+15:n])
-	ESAC
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_i64extgather_epi32lo" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const*"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI32_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 8 single-precision (32-bit) memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to 32-bit integer elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	IF k[j]
-		CASE conv OF
-		_MM_UPCONV_EPI32_NONE:
-			dst[i+31:i] := addr[i+31:i]
-		_MM_UPCONV_EPI32_UINT8:
-			n := j*8
-			dst[i+31:i] := UInt8ToInt32(addr[n+7:n])
-		_MM_UPCONV_EPI32_SINT8:
-			n := j*8
-			dst[i+31:i] := SInt8ToInt32(addr[n+7:n])
-		_MM_UPCONV_EPI32_UINT16:
-			n := j*16
-			dst[i+31:i] := UInt16ToInt32(addr[n+15:n])
-		_MM_UPCONV_EPI32_SINT16:
-			n := j*16
-			dst[i+31:i] := SInt16ToInt32(addr[n+15:n])
-		ESAC
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_i64extgather_epi64" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const*"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 8 double-precision (64-bit) memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to 64-bit integer elements and stores them in "dst". "hint" indicates to the processor whether the load is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	addr := MEM[mv + index[j] * scale]
-	CASE conv OF
-	_MM_UPCONV_EPI64_NONE: dst[i+63:i] := addr[i+63:i]
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_i64extgather_epi64" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const*"/>
-	<parameter varname="conv" type="_MM_UPCONV_EPI64_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 8 double-precision (64-bit) memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to 64-bit integer elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the load is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		CASE conv OF
-		_MM_UPCONV_EPI64_NONE: dst[i+63:i] := addr[i+63:i]
-		ESAC
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_i64extgather_pslo" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 8 memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to single-precision (32-bit) floating-point elements and stores them in the lower half of "dst". "hint" indicates to the processor whether the load is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	CASE conv OF
-	_MM_UPCONV_PS_NONE:
-		dst[i+31:i] := addr[i+31:i]
-	_MM_UPCONV_PS_FLOAT16:
-		n := j*16
-		dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
-	_MM_UPCONV_PS_UINT8:
-		n := j*8
-		dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
-	_MM_UPCONV_PS_SINT8:
-		n := j*8
-		dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
-	_MM_UPCONV_PS_UINT16:
-		n := j*16
-		dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
-	_MM_UPCONV_PS_SINT16:
-		n := j*16
-		dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
-	ESAC
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_i64extgather_pslo" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PS_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 8 memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to single-precision (32-bit) floating-point elements and stores them in the lower half of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the load is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	IF k[j]
-		CASE conv OF
-		_MM_UPCONV_PS_NONE:
-			dst[i+31:i] := addr[i+31:i]
-		_MM_UPCONV_PS_FLOAT16:
-			n := j*16
-			dst[i+31:i] := Float16ToFloat32(addr[n+15:n])
-		_MM_UPCONV_PS_UINT8:
-			n := j*8
-			dst[i+31:i] := UInt8ToFloat32(addr[n+7:n])
-		_MM_UPCONV_PS_SINT8:
-			n := j*8
-			dst[i+31:i] := SInt8ToFloat32(addr[n+7:n])
-		_MM_UPCONV_PS_UINT16:
-			n := j*16
-			dst[i+31:i] := UInt16ToFloat32(addr[n+15:n])
-		_MM_UPCONV_PS_SINT16:
-			n := j*16
-			dst[i+31:i] := SInt16ToFloat32(addr[n+15:n])
-		ESAC
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_i64extgather_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 8 double-precision (64-bit) floating-point elements stored in memory starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to 64-bit floating-point elements and stores them in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	CASE conv OF
-	_MM_UPCONV_PD_NONE: dst[i+63:i] := addr[i+63:i]
-	ESAC
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512d" name="_mm512_mask_i64extgather_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="conv" type="_MM_UPCONV_PD_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Up-converts 8 double-precision (64-bit) floating-point elements stored in memory starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using "conv" to 64-bit floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	IF k[j]
-		CASE conv OF
-		_MM_UPCONV_PD_NONE: dst[i+63:i] := addr[i+63:i]
-		ESAC
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_i32extscatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts 16 packed 32-bit integer elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale". "hint" indicates to the processor whether the data is non-temporal. AVX512 supports _MM_DOWNCONV_EPI32_NONE.</description>
-	<operation>
-FOR j := 0 to 15
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	CASE conv OF
-	_MM_DOWNCONV_EPI32_NONE:
-		addr[i+31:i] := v1[i+31:i]
-	_MM_DOWNCONV_EPI32_UINT8:
-		n := j*8
-		addr[n+7:n] := UInt32ToUInt8(v1[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT8:
-		n := j*8
-		addr[n+7:n] := SInt32ToSInt8(v1[i+31:i])
-	_MM_DOWNCONV_EPI32_UINT16:
-		n := j*16
-		addr[n+15:n] := UInt32ToUInt16(v1[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT16:
-		n := j*16
-		addr[n+15:n] := SInt32ToSInt16(v1[n+15:n])
-	ESAC
-ENDFOR
-	</operation>
-	<instruction name="vpscatterdd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512/KNC" rettype="void" name="_mm512_mask_i32extscatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F/KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts 16 packed 32-bit integer elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 32-bit integer indices stored in "index" scaled by "scale". Elements are written using writemask "k" (elements are only written when the corresponding mask bit is set; otherwise, elements are left unchanged in memory). "hint" indicates to the processor whether the data is non-temporal. AVX512 supports _MM_DOWNCONV_EPI32_NONE.</description>
-	<operation>
-FOR j := 0 to 15
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	IF k[j]
-		CASE conv OF
-		_MM_DOWNCONV_EPI32_NONE:
-			addr[i+31:i] := v1[i+31:i]
-		_MM_DOWNCONV_EPI32_UINT8:
-			n := j*8
-			addr[n+7:n] := UInt32ToUInt8(v1[i+31:i])
-		_MM_DOWNCONV_EPI32_SINT8:
-			n := j*8
-			addr[n+7:n] := SInt32ToSInt8(v1[i+31:i])
-		_MM_DOWNCONV_EPI32_UINT16:
-			n := j*16
-			addr[n+15:n] := UInt32ToUInt16(v1[i+31:i])
-		_MM_DOWNCONV_EPI32_SINT16:
-			n := j*16
-			addr[n+15:n] := SInt32ToSInt16(v1[n+15:n])
-		ESAC
-	FI 
-ENDFOR
-	</operation>
-	<instruction name="vpscatterdd" form="m512 {k}, zmm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_i64extscatter_pslo" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts 8 packed single-precision (32-bit) floating-point elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	CASE conv OF
-	_MM_DOWNCONV_PS_NONE:
-		addr[i+31:i] := v1[i+31:i]
-	_MM_DOWNCONV_PS_FLOAT16:
-		n := j*16
-		addr[n+15:n] := Float32ToFloat16(v1[i+31:i])
-	_MM_DOWNCONV_PS_UINT8:
-		n := j*8
-		addr[n+7:n] := Float32ToUInt8(v1[i+31:i])
-	_MM_DOWNCONV_PS_SINT8:
-		n := j*8
-		addr[n+7:n] := Float32ToSInt8(v1[i+31:i])
-	_MM_DOWNCONV_PS_UINT16:
-		n := j*16
-		addr[n+15:n] := Float32ToUInt16(v1[i+31:i])
-	_MM_DOWNCONV_PS_SINT16:
-		n := j*16
-		addr[n+15:n] := Float32ToSInt16(v1[i+31:i])
-	ESAC
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i64extscatter_pslo" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PS_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts 8 packed single-precision (32-bit) floating-point elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". Elements are only written when the corresponding mask bit is set in "k"; otherwise, elements are unchanged in memory. "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	IF k[j]
-		CASE conv OF
-		_MM_DOWNCONV_PS_NONE:
-			addr[i+31:i] := v[i+31:i]
-		_MM_DOWNCONV_PS_FLOAT16:
-			n := j*16
-			addr[n+15:n] := Float32ToFloat16(v1[i+31:i])
-		_MM_DOWNCONV_PS_UINT8:
-			n := j*8
-			addr[n+7:n] := Float32ToUInt8(v1[i+31:i])
-		_MM_DOWNCONV_PS_SINT8:
-			n := j*8
-			addr[n+7:n] := Float32ToSInt8(v1[i+31:i])
-		_MM_DOWNCONV_PS_UINT16:
-			n := j*16
-			addr[n+15:n] := Float32ToUInt16(v1[i+31:i])
-		_MM_DOWNCONV_PS_SINT16:
-			n := j*16
-			addr[n+15:n] := Float32ToSInt16(v1[i+31:i])
-		ESAC
-	FI
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_i64extscatter_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512d"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	CASE conv OF
-	_MM_DOWNCONV_EPI64_NONE:
-		addr[i+63:i] := v1[i+63:i]
-	ESAC
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i64extscatter_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512d"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_PD_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". Elements are written to memory using writemask "k" (elements are not stored to memory when the corresponding mask bit is not set; the memory location is left unchagned). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	IF k[j]
-		CASE conv OF
-		_MM_DOWNCONV_EPI64_NONE:
-			addr[i+63:i] := v1[i+63:i]
-		ESAC
-	FI
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_i64extscatter_epi32lo" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts the low 8 packed 32-bit integer elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	CASE conv OF
-	_MM_DOWNCONV_EPI32_NONE:
-		addr[i+31:i] := v1[i+31:i]
-	_MM_DOWNCONV_EPI32_UINT8:
-		n := j*8
-		addr[n+7:n] := UInt32ToUInt8(v1[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT8:
-		n := j*8
-		addr[n+7:n] := SInt32ToSInt8(v1[i+31:i])
-	_MM_DOWNCONV_EPI32_UINT16:
-		n := j*16
-		addr[n+15:n] := UInt32ToUInt16(v1[i+31:i])
-	_MM_DOWNCONV_EPI32_SINT16:
-		n := j*16
-		addr[n+15:n] := SInt32ToSInt16(v1[n+15:n])
-	ESAC
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i64extscatter_epi32lo" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI32_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts the low 8 packed 32-bit integer elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". Elements are written to memory using writemask "k" (elements are only written when the corresponding mask bit is set; otherwise, the memory location is left unchanged). "hint" indicates to the processor whether the data is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	IF k[j]
-		CASE conv OF
-		_MM_DOWNCONV_EPI32_NONE:
-			addr[i+31:i] := v1[i+31:i]
-		_MM_DOWNCONV_EPI32_UINT8:
-			n := j*8
-			addr[n+7:n] := UInt32ToUInt8(v1[i+31:i])
-		_MM_DOWNCONV_EPI32_SINT8:
-			n := j*8
-			addr[n+7:n] := SInt32ToSInt8(v1[i+31:i])
-		_MM_DOWNCONV_EPI32_UINT16:
-			n := j*16
-			addr[n+15:n] := UInt32ToUInt16(v1[i+31:i])
-		_MM_DOWNCONV_EPI32_SINT16:
-			n := j*16
-			addr[n+15:n] := SInt32ToSInt16(v1[n+15:n])
-		ESAC
-	FI
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_i64extscatter_epi64" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts 8 packed 64-bit integer elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". "hint" indicates to the processor whether the load is non-temporal.</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*64
-	CASE conv OF
-	_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v1[i+63:i]
-	ESAC
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i64extscatter_epi64" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="conv" type="_MM_DOWNCONV_EPI64_ENUM"/>
-	<parameter varname="scale" type="int"/>
-	<parameter varname="hint" type="int"/>
-	<description>Down-converts 8 packed 64-bit integer elements in "v1" using "conv" and stores them in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory.</description>
-	<operation>
-FOR j := 0 to 7
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		i := j*64
-		CASE conv OF
-		_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v1[i+63:i]
-		ESAC
-	FI
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mullox_epi64" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiplies elements in packed 64-bit integer vectors "a" and "b" together, storing the lower 64 bits of the result in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i] * b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mullox_epi64" sequence="true">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiplies elements in packed 64-bit integer vectors "a" and "b" together, storing the lower 64 bits of the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] * b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_permute4f128_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
-	<description>Permutes 128-bit blocks of the packed single-precision (32-bit) floating-point elements in "a" using constant "imm8". The results are stored in "dst".</description>
-	<operation>
-SELECT4(src, control) {
-	CASE control[1:0] OF
-	0: tmp[127:0] := src[127:0]
-	1: tmp[127:0] := src[255:128]
-	2: tmp[127:0] := src[383:256]
-	3: tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-FOR j := 0 to 3
-	i := j*128
-	n := j*2
-	dst[i+127:i] := SELECT4(a[511:0], imm8[n+1:n])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermf32x4" form="zmm {k}, m512, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_permute4f128_ps">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
-	<description>Permutes 128-bit blocks of the packed single-precision (32-bit) floating-point elements in "a" using constant "imm8". The results are stored in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control) {
-	CASE control[1:0] OF
-	0: tmp[127:0] := src[127:0]
-	1: tmp[127:0] := src[255:128]
-	2: tmp[127:0] := src[383:256]
-	3: tmp[127:0] := src[511:384]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp[511:0] := 0
-FOR j := 0 to 4
-	i := j*128
-	n := j*2
-	tmp[i+127:i] := SELECT4(a[511:0], imm8[n+1:n])
-ENDFOR
-FOR j := 0 to 15
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermf32x4" form="zmm {k}, m512, imm" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_sincos_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="cos_res" type="__m512d *"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Computes the sine and cosine of the packed double-precision (64-bit) floating-point elements in "a" and stores the results of the sine computation in "dst" and the results of the cosine computation in "cos_res".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := SIN(a[i+63:i])
-	cos_res[i+63:i] := COS(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-cos_res[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512d" name="_mm512_mask_sincos_pd" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="cos_res" type="__m512d *"/>
-	<parameter varname="sin_src" type="__m512d"/>
-	<parameter varname="cos_src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Computes the sine and cosine of the packed double-precision (64-bit) floating-point elements in "a" and stores the results of the sine computation in "dst" and the results of the cosine computation in "cos_res". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SIN(a[i+63:i])
-		cos_res[i+63:i] := COS(a[i+63:i])
-	ELSE
-		dst[i+63:i] := sin_src[i+63:i]
-		cos_res[i+63:i] := cos_src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-cos_res[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_sincos_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="cos_res" type="__m512 *"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Computes the sine and cosine of the packed single-precision (32-bit) floating-point elements in "a" and stores the results of the sine computation in "dst" and the results of the cosine computation in "cos_res".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := SIN(a[i+31:i])
-	cos_res[i+31:i] := COS(a[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-cos_res[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="SVML" rettype="__m512" name="_mm512_mask_sincos_ps" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Trigonometry</category>
-	<parameter varname="cos_res" type="__m512 *"/>
-	<parameter varname="sin_src" type="__m512"/>
-	<parameter varname="cos_src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<description>Computes the sine and cosine of the packed single-precision (32-bit) floating-point elements in "a" and stores the results of the sine computation in "dst" and the results of the cosine computation in "cos_res". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SIN(a[i+31:i])
-		cos_res[i+31:i] := COS(a[i+31:i])
-	ELSE
-		dst[i+31:i] := sin_src[i+31:i]
-		cos_res[i+31:i] := cos_src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-cos_res[MAX:512] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_i64gather_epi32lo" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="scale" type="int"/>
-	<description>Loads 8 32-bit integer memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" to "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	dst[i+31:i] := addr[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512i" name="_mm512_mask_i64gather_epi32lo" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="scale" type="int"/>
-	<description>Loads 8 32-bit integer memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		dst[i+31:i] := addr[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_i64gather_pslo" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="scale" type="int"/>
-	<description>Loads 8 single-precision (32-bit) floating-point memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" to "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	addr := MEM[mv + index[j] * scale]
-	i := j*32
-	dst[i+31:i] := addr[i+31:i]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__m512" name="_mm512_mask_i64gather_pslo" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="mv" type="void const *"/>
-	<parameter varname="scale" type="int"/>
-	<description>Loads 8 single-precision (32-bit) floating-point memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		dst[i+31:i] := addr[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_i64scatter_pslo" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v" type="__m512"/>
-	<parameter varname="scale" type="int"/>
-	<description>Stores 8 packed single-precision (32-bit) floating-point elements in "v" in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	addr := MEM[mv + index[j] * scale]
-	addr[i+31:i] := v[i+31:i]
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i64scatter_pslo" sequence="true">
-	<type>Floating Point</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512"/>
-	<parameter varname="scale" type="int"/>
-	<description>Stores 8 packed single-precision (32-bit) floating-point elements in "v1" in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using writemask "k" (elements are only written to memory when the corresponding mask bit is set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		addr[i+31:i] := v1[i+31:i]
-	FI	
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_i64scatter_epi32lo" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<description>Stores 8 packed 32-bit integer elements in "v1" in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	addr := MEM[mv + index[j] * scale]
-	addr[i+31:i] := v1[i+31:i]
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="void" name="_mm512_mask_i64scatter_epi32lo" sequence="true">
-	<type>Integer</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Store</category>
-	<parameter varname="mv" type="void *"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="index" type="__m512i"/>
-	<parameter varname="v1" type="__m512i"/>
-	<parameter varname="scale" type="int"/>
-	<description>Stores 8 packed 32-bit integer elements in "v1" in memory locations starting at location "mv" at packed 64-bit integer indices stored in "index" scaled by "scale" using writemask "k" (elements are only written to memory when the corresponding mask bit is set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		addr := MEM[mv + index[j] * scale]
-		addr[i+31:i] := v1[i+31:i]
-	FI	
-ENDFOR
-	</operation>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kmerge2l1h">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<description>Move the high element from "k1" to the low element of "k1", and insert the low element of "k2" into the high element of "k1".</description>
-	<operation>
-tmp[7:0] := k1[15:8]
-k1[15:8] := k2[7:0]
-k1[7:0]  := tmp[7:0]
-	</operation>
-	<instruction name="kmerge2l1h" form="k, k" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="KNC" rettype="__mmask16" name="_mm512_kmerge2l1l">
-	<type>Mask</type>
-	<CPUID>KNCNI</CPUID>
-	<category>Mask</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="k2" type="__mmask16"/>
-	<description>Insert the low element of "k2" into the high element of "k1".</description>
-	<operation>
-k1[15:8] := k2[7:0]
-	</operation>
-	<instruction name="kmerge2l1l" form="k, k" xed=""/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='TRUE' rettype='float' name='_mm512_cvtss_f32'>
-	<type>Floating Point</type>
-	<CPUID>AVX512</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m512' />
-	<description>Copy the lower single-precision (32-bit) floating-point element of "a" to "dst".</description>
-	<operation>dst[31:0] := a[31:0]</operation>
-	<instruction name='movss' form='m32, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='TRUE' rettype='double' name='_mm512_cvtsd_f64'>
-	<type>Floating Point</type>
-	<CPUID>AVX512</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m512d'/>
-	<description>Copy the lower double-precision (64-bit) floating-point element of "a" to "dst".</description>
-	<operation>dst[63:0] := a[63:0]</operation>
-	<instruction name='movsd' form='m64, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='TRUE' rettype='int' name='_mm512_cvtsi512_si32'>
-	<type>Integer</type>
-	<CPUID>AVX512</CPUID>
-	<category>Convert</category>
-	<parameter varname='a' type='__m512i'/>
-	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
-	<operation>
-dst[31:0] := a[31:0]
-	</operation>
-	<instruction name='movd' form='r32, xmm'/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='int' name='_mm512_4dpwssd_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX512_4VNNIW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='src' type='_m512i'/>
-	<parameter varname='a0' type='_m512i'/>
-	<parameter varname='a1' type='_m512i'/>
-	<parameter varname='a2' type='_m512i'/>
-	<parameter varname='a3' type='_m512i'/>
-	<parameter varname='b' type='_m128i *'/>
-	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	FOR m := 0 to 3
-		lim_base := m*32
-		i := j*32
-		tl := b[lim_base+15:lim_base]
-		tu := b[lim_base+31:lim_base+16]
-		lword := a{m}[i+15:i] * tl
-		uword := a{m}[i+31:i+16] * tu
-		dst[i+31:i] := src[i+31:i] + lword + uword
-	ENDFOR
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vp4dpwssd' form='zmm {k}, zmm+3, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='int' name='_mm512_mask_4dpwssd_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX512_4VNNIW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='src' type='_m512i'/>
-	<parameter varname='k' type='_mmask16'/>
-	<parameter varname='a0' type='_m512i'/>
-	<parameter varname='a1' type='_m512i'/>
-	<parameter varname='a2' type='_m512i'/>
-	<parameter varname='a3' type='_m512i'/>
-	<parameter varname='b' type='_m128i *'/>
-	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation with mask, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set)..</description>
-	<operation>
-FOR j := 0 to 15
-	IF mask[j]
-		FOR m := 0 to 3
-			lim_base := m*32
-			i := j*32
-			tl := b[lim_base+15:lim_base]
-			tu := b[lim_base+31:lim_base+16]
-			lword := a{m}[i+15:i] * tl
-			uword := a{m}[i+31:i+16] * tu
-			dst[i+31:i] := src[i+31:i] + lword + uword
-		ENDFOR
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vp4dpwssd' form='zmm {k}, zmm+3, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='int' name='_mm512_maskz_4dpwssd_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX512_4VNNIW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='k' type='_mmask16'/>
-	<parameter varname='src' type='_m512i'/>
-	<parameter varname='a0' type='_m512i'/>
-	<parameter varname='a1' type='_m512i'/>
-	<parameter varname='a2' type='_m512i'/>
-	<parameter varname='a3' type='_m512i'/>
-	<parameter varname='b' type='_m128i *'/>
-	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation with mask, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	IF mask[j]
-		FOR m := 0 to 3
-			lim_base := m*32
-			i := j*32
-			tl := b[lim_base+15:lim_base]
-			tu := b[lim_base+31:lim_base+16]
-			lword := a{m}[i+15:i] * tl
-			uword := a{m}[i+31:i+16] * tu
-			dst[i+31:i] := src[i+31:i] + lword + uword
-		ENDFOR
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vp4dpwssd' form='zmm {k}, zmm+3, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='int' name='_mm512_4dpwssds_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX512_4VNNIW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='src' type='_m512i'/>
-	<parameter varname='a0' type='_m512i'/>
-	<parameter varname='a1' type='_m512i'/>
-	<parameter varname='a2' type='_m512i'/>
-	<parameter varname='a3' type='_m512i'/>
-	<parameter varname='b' type='_m128i *'/>
-	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation and signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	FOR m := 0 to 3
-		lim_base := m*32
-		i := j*32
-		tl := b[lim_base+15:lim_base]
-		tu := b[lim_base+31:lim_base+16]
-		lword := a{m}[i+15:i] * tl
-		uword := a{m}[i+31:i+16] * tu
-		dst[i+31:i] := SIGNED_DWORD_SATURATE(src[i+31:i] + lword + uword)
-	ENDFOR
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vp4dpwssds' form='zmm {k}, zmm+3, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='int' name='_mm512_mask_4dpwssds_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX512_4VNNIW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='src' type='_m512i'/>
-	<parameter varname='k' type='_mmask16'/>
-	<parameter varname='a0' type='_m512i'/>
-	<parameter varname='a1' type='_m512i'/>
-	<parameter varname='a2' type='_m512i'/>
-	<parameter varname='a3' type='_m512i'/>
-	<parameter varname='b' type='_m128i *'/>
-	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation with mask and signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set)..</description>
-	<operation>
-FOR j := 0 to 15
-	IF mask[i]
-		FOR m := 0 to 3
-			lim_base := m*32
-			i := j*32
-			tl := b[lim_base+15:lim_base]
-			tu := b[lim_base+31:lim_base+16]
-			lword := a{m}[i+15:i] * tl
-			uword := a{m}[i+31:i+16] * tu
-			dst[i+31:i] := SIGNED_DWORD_SATURATE(src[i+31:i] + lword + uword)
-		ENDFOR
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vp4dpwssds' form='zmm {k}, zmm+3, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='int' name='_mm512_maskz_4dpwssds_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX512_4VNNIW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='src' type='_m512i'/>
-	<parameter varname='k' type='_mmask16'/>
-	<parameter varname='a0' type='_m512i'/>
-	<parameter varname='a1' type='_m512i'/>
-	<parameter varname='a2' type='_m512i'/>
-	<parameter varname='a3' type='_m512i'/>
-	<parameter varname='b' type='_m128i *'/>
-	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation with mask and signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set)..</description>
-	<operation>
-FOR j := 0 to 15
-	IF mask[i]
-		FOR m := 0 to 3
-			lim_base := m*32
-			i := j*32
-			tl := b[lim_base+15:lim_base]
-			tu := b[lim_base+31:lim_base+16]
-			lword := a{m}[i+15:i] * tl
-			uword := a{m}[i+31:i+16] * tu
-			dst[i+31:i] := SIGNED_DWORD_SATURATE(src[i+31:i] + lword + uword)
-		ENDFOR
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vp4dpwssds' form='zmm {k}, zmm+3, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512' name='_mm512_4fmadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX512_4FMAPS </CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='_m512'/>
-	<parameter varname='b0' type='_m512i'/>
-	<parameter varname='b1' type='_m512i'/>
-	<parameter varname='b2' type='_m512i'/>
-	<parameter varname='b3' type='_m512i'/>
-	<parameter varname='c' type='_m128i *'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by the 4 corresponding packed elements in "c", accumulating with the corresponding elements in "a". Store the results in "dst".</description>
-	<operation>
-dst := a
-FOR m := 0 to 3
-	FOR j := 0 to 15
-		i = j*32
-		n = m*32
-		dst[i+31:i] := RoundFPControl_MXCSR(dst[i+31:i] + b{m}[i+31:i] * c[n+31:n])
-	ENDFOR
-ENDFOR
-dst[MAX:512] := 0 
-	</operation>
-	<instruction name='v4fmaddps' form='zmm {k}, zmm+3, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512' name='_mm512_mask_4fmadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX512_4FMAPS</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='_m512'/>
-	<parameter varname='k' type='_mmask16'/>
-	<parameter varname='b0' type='_m512i'/>
-	<parameter varname='b1' type='_m512i'/>
-	<parameter varname='b2' type='_m512i'/>
-	<parameter varname='b3' type='_m512i'/>
-	<parameter varname='c' type='_m128i *'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by the 4 corresponding packed elements in "c", accumulating with the corresponding elements in "a". Store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-dst := a
-FOR m := 0 to 3
-	FOR j := 0 to 15
-		i = j*32
-		n = m*32
-		IF mask[j]
-			dst[i+31:i] := RoundFPControl_MXCSR(dst[i+31:i] + b{m}[i+31:i] * c[n+31:n])
-		FI
-	ENDFOR
-ENDFOR
-dst[MAX:512] := 0 
-	</operation>
-	<instruction name='v4fmaddps' form='zmm {k}, zmm+3, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512' name='_mm512_maskz_4fmadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX512_4FMAPS</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='_m512'/>
-	<parameter varname='k' type='_mmask16'/>
-	<parameter varname='b0' type='_m512i'/>
-	<parameter varname='b1' type='_m512i'/>
-	<parameter varname='b2' type='_m512i'/>
-	<parameter varname='b3' type='_m512i'/>
-	<parameter varname='c' type='_m128i *'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by the 4 corresponding packed elements in "c", accumulating with the corresponding elements in "a". Store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-dst := a
-FOR m := 0 to 3
-	FOR j := 0 to 15
-		i = j*32
-		n = m*32
-		IF mask[j]
-			dst[i+31:i] := RoundFPControl_MXCSR(dst[i+31:i] + b{m}[i+31:i] * c[n+31:n])
-		ELSE
-			dst[i+31:i] := 0
-		FI
-	ENDFOR
-ENDFOR
-dst[MAX:512] := 0 
-	</operation>
-	<instruction name='v4fmaddps' form='zmm {k}, zmm+3, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512' name='_mm512_4fnmadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX512_4FMAPS</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='_m512'/>
-	<parameter varname='b0' type='_m512i'/>
-	<parameter varname='b1' type='_m512i'/>
-	<parameter varname='b2' type='_m512i'/>
-	<parameter varname='b3' type='_m512i'/>
-	<parameter varname='c' type='_m128i *'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by the 4 corresponding packed elements in "c", accumulating the negated intermediate result with the corresponding elements in "a". Store the results in "dst".</description>
-	<operation>
-dst := a
-FOR m := 0 to 3
-	FOR j := 0 to 15
-		i = j*32
-		n = m*32
-		dst[i+31:i] := RoundFPControl_MXCSR(dst[i+31:i] - b{m}[i+31:i] * c[n+31:n])
-	ENDFOR
-ENDFOR
-dst[MAX:512] := 0 
-	</operation>
-	<instruction name='v4fnmaddps' form='zmm {k}, zmm+3, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512' name='_mm512_mask_4fnmadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX512_4FMAPS</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='_m512'/>
-	<parameter varname='k' type='_mmask16'/>
-	<parameter varname='b0' type='_m512i'/>
-	<parameter varname='b1' type='_m512i'/>
-	<parameter varname='b2' type='_m512i'/>
-	<parameter varname='b3' type='_m512i'/>
-	<parameter varname='c' type='_m128i *'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by the 4 corresponding packed elements in "c", accumulating the negated intermediate result with the corresponding elements in "a". Store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-dst := a
-FOR m := 0 to 3
-	FOR j := 0 to 15
-		i = j*32
-		n = m*32
-		IF mask[j]
-			dst[i+31:i] := RoundFPControl_MXCSR(dst[i+31:i] - b{m}[i+31:i] * c[n+31:n])
-		FI
-	ENDFOR
-ENDFOR
-dst[MAX:512] := 0 
-	</operation>
-	<instruction name='v4fnmaddps' form='zmm {k}, zmm+3, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512' name='_mm512_maskz_4fnmadd_ps'>
-	<type>Floating Point</type>
-	<CPUID>AVX512_4FMAPS</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname='a' type='_m512'/>
-	<parameter varname='k' type='_mmask16'/>
-	<parameter varname='b0' type='_m512i'/>
-	<parameter varname='b1' type='_m512i'/>
-	<parameter varname='b2' type='_m512i'/>
-	<parameter varname='b3' type='_m512i'/>
-	<parameter varname='c' type='_m128i *'/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by the 4 corresponding packed elements in "c", accumulating the negated intermediate result with the corresponding elements in "a". Store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-dst := a
-FOR m := 0 to 3
-	FOR j := 0 to 15
-		i = j*32
-		n = m*32
-		IF mask[j]
-			dst[i+31:i] := RoundFPControl_MXCSR(dst[i+31:i] - b{m}[i+31:i] * c[n+31:n])
-		ELSE
-			dst[i+31:i] := 0
-		FI
-	ENDFOR
-ENDFOR
-dst[MAX:512] := 0 
-	</operation>
-	<instruction name='v4fnmaddps' form='zmm {k}, zmm+3, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m128' name='_mm_4fmadd_ss'>
-	<type>Floating Point</type>
-	<CPUID>AVX512_4FMAPS</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b0" type="__m128"/>
-	<parameter varname="b1" type="__m128"/>
-	<parameter varname="b2" type="__m128"/>
-	<parameter varname="b3" type="__m128"/>
-	<parameter varname="c" type="__m128 *"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by corresponding element in "c", accumulating  with the lower element in "a". Store the result in the lower element of "dst".</description>
-	<operation>
-dst := a
-FOR j := 0 to 3
-	i := j*32
-	dst[31:0] := RoundFPControl_MXCSR(dst[31:0] + b{j}[31:0] * c[i+31:i])
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name='v4fmaddss' form='xmm {k}, xmm, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m128' name='_mm_mask_4fmadd_ss'>
-	<type>Floating Point</type>
-	<CPUID>AVX512_4FMAPS</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b0" type="__m128"/>
-	<parameter varname="b1" type="__m128"/>
-	<parameter varname="b2" type="__m128"/>
-	<parameter varname="b3" type="__m128"/>
-	<parameter varname="c" type="__m128 *"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by corresponding element in "c", accumulating  with the lower element in "a". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set).</description>
-	<operation>
-dst := a
-IF k[0]
-	FOR j := 0 to 3
-		i := j*32
-		dst[31:0] := RoundFPControl_MXCSR(dst[31:0] + b{j}[31:0] * c[i+31:i])
-	ENDFOR
-FI
-dst[MAX:32] := 0
-	</operation>
-	<instruction name='v4fmaddss' form='xmm {k}, xmm, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m128' name='_mm_maskz_4fmadd_ss'>
-	<type>Floating Point</type>
-	<CPUID>AVX512_4FMAPS</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b0" type="__m128"/>
-	<parameter varname="b1" type="__m128"/>
-	<parameter varname="b2" type="__m128"/>
-	<parameter varname="b3" type="__m128"/>
-	<parameter varname="c" type="__m128 *"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by corresponding element in "c", accumulating  with the lower element in "a". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set).</description>
-	<operation>
-dst := a
-IF k[0]
-	FOR j := 0 to 3
-		i := j*32
-		dst[31:0] := RoundFPControl_MXCSR(dst[31:0] + b{j}[31:0] * c[i+31:i])
-	ENDFOR
-ELSE
-	dst[31:0] := 0
-FI
-dst[MAX:32] := 0
-	</operation>
-	<instruction name='v4fmaddss' form='xmm {k}, xmm, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m128' name='_mm_4fnmadd_ss'>
-	<type>Floating Point</type>
-	<CPUID>AVX512_4FMAPS</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b0" type="__m128"/>
-	<parameter varname="b1" type="__m128"/>
-	<parameter varname="b2" type="__m128"/>
-	<parameter varname="b3" type="__m128"/>
-	<parameter varname="c" type="__m128 *"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by corresponding element in "c", accumulating  the negated intermediate result with the lower element in "a". Store the result in the lower element of "dst".</description>
-	<operation>
-dst := a
-FOR j := 0 to 3
-	i := j*32
-	dst[31:0] := RoundFPControl_MXCSR(dst[31:0] - b{j}[31:0] * c[i+31:i])
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name='v4fnmaddss' form='xmm {k}, xmm, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m128' name='_mm_mask_4fnmadd_ss'>
-	<type>Floating Point</type>
-	<CPUID>AVX512_4FMAPS</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b0" type="__m128"/>
-	<parameter varname="b1" type="__m128"/>
-	<parameter varname="b2" type="__m128"/>
-	<parameter varname="b3" type="__m128"/>
-	<parameter varname="c" type="__m128 *"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by corresponding element in "c", accumulating  the negated intermediate result with the lower element in "a". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set).</description>
-	<operation>
-dst := a
-IF k[0]
-	FOR j := 0 to 3
-		i := j*32
-		dst[31:0] := RoundFPControl_MXCSR(dst[31:0] - b{j}[31:0] * c[i+31:i])
-	ENDFOR
-FI
-dst[MAX:32] := 0
-	</operation>
-	<instruction name='v4fnmaddss' form='xmm {k}, xmm, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m128' name='_mm_maskz_4fnmadd_ss'>
-	<type>Floating Point</type>
-	<CPUID>AVX512_4FMAPS</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b0" type="__m128"/>
-	<parameter varname="b1" type="__m128"/>
-	<parameter varname="b2" type="__m128"/>
-	<parameter varname="b3" type="__m128"/>
-	<parameter varname="c" type="__m128 *"/>
-	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "b0" through "b3" by corresponding element in "c", accumulating  the negated intermediate result with the lower element in "a". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set).</description>
-	<operation>
-dst := a
-IF k[0]
-	FOR j := 0 to 3
-		i := j*32
-		dst[31:0] := RoundFPControl_MXCSR(dst[31:0] - b{j}[31:0] * c[i+31:i])
-	ENDFOR
-ELSE
-	dst[31:0] := 0
-FI
-dst[MAX:32] := 0
-	</operation>
-	<instruction name='v4fnmaddss' form='xmm {k}, xmm, m128' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512i' name='_mm512_popcnt_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX512VPOPCNTDQ</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpopcntd' form='zmm {k}, zmm' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512i' name='_mm512_mask_popcnt_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX512VPOPCNTDQ</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := POPCNT(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpopcntd' form='zmm {k}, zmm' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512i' name='_mm512_maskz_popcnt_epi32'>
-	<type>Integer</type>
-	<CPUID>AVX512VPOPCNTDQ</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := POPCNT(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpopcntd' form='zmm {k}, zmm' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512i' name='_mm512_popcnt_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX512VPOPCNTDQ</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpopcntq' form='zmm {k}, zmm' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512i' name='_mm512_mask_popcnt_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX512VPOPCNTDQ</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := POPCNT(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpopcntq' form='zmm {k}, zmm' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech='AVX-512' vexEq='FALSE' rettype='__m512i' name='_mm512_maskz_popcnt_epi64'>
-	<type>Integer</type>
-	<CPUID>AVX512VPOPCNTDQ</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := POPCNT(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name='vpopcntq' form='zmm {k}, zmm' xed=''/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_kunpackd">
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__mmask64"/>
-	<parameter varname="b" type="__mmask64"/>
-	<description>Unpack and interleave 32 bits from masks "a" and "b", and store the 64-bit result in "k".</description>
-	<operation>
-k[31:0] := a[31:0]
-k[63:32] := b[31:0]
-k[MAX:64] := 0
-	</operation>
-	<instruction name="kunpckdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_kunpackw">
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__mmask32"/>
-	<parameter varname="b" type="__mmask32"/>
-	<description>Unpack and interleave 16 bits from masks "a" and "b", and store the 32-bit result in "k".</description>
-	<operation>
-k[15:0] := a[15:0]
-k[31:16] := b[15:0]
-k[MAX:32] := 0
-	</operation>
-	<instruction name="kunpckwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_add_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vaddpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_add_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vaddpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_add_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vaddpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_add_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vaddpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_add_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vaddps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_add_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vaddps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_add_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vaddps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_add_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vaddps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_alignr_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 32 bytes (8 elements) in "dst".</description>
-	<operation>
-temp[511:256] := a[255:0]
-temp[255:0] := b[255:0]
-temp[511:0] := temp[511:0] &gt;&gt; (32*count)
-dst[255:0] := temp[255:0]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="valignd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_alignr_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 32 bytes (8 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-temp[511:256] := a[255:0]
-temp[255:0] := b[255:0]
-temp[511:0] := temp[511:0] &gt;&gt; (32*count)
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := temp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="valignd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_alignr_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 32 bytes (8 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-temp[511:256] := a[255:0]
-temp[255:0] := b[255:0]
-temp[511:0] := temp[511:0] &gt;&gt; (32*count)
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := temp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="valignd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_alignr_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 16 bytes (4 elements) in "dst".</description>
-	<operation>
-temp[255:128] := a[127:0]
-temp[127:0] := b[127:0]
-temp[255:0] := temp[255:0] &gt;&gt; (32*count)
-dst[127:0] := temp[127:0]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="valignd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_alignr_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 16 bytes (4 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-temp[255:128] := a[127:0]
-temp[127:0] := b[127:0]
-temp[255:0] := temp[255:0] &gt;&gt; (32*count)
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := temp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="valignd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_alignr_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "count" 32-bit elements, and store the low 16 bytes (4 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-temp[255:128] := a[127:0]
-temp[127:0] := b[127:0]
-temp[255:0] := temp[255:0] &gt;&gt; (32*count)
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := temp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="valignd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_alignr_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 32 bytes (4 elements) in "dst".</description>
-	<operation>
-temp[511:256] := a[255:0]
-temp[255:0] := b[255:0]
-temp[511:0] := temp[511:0] &gt;&gt; (64*count)
-dst[255:0] := temp[255:0]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="valignq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_alignr_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 32 bytes (4 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-temp[511:256] := a[255:0]
-temp[255:0] := b[255:0]
-temp[511:0] := temp[511:0] &gt;&gt; (64*count)
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := temp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="valignq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_alignr_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 32 bytes (4 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-temp[511:256] := a[255:0]
-temp[255:0] := b[255:0]
-temp[511:0] := temp[511:0] &gt;&gt; (64*count)
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := temp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="valignq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_alignr_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 16 bytes (2 elements) in "dst".</description>
-	<operation>
-temp[255:128] := a[127:0]
-temp[127:0] := b[127:0]
-temp[255:0] := temp[255:0] &gt;&gt; (64*count)
-dst[127:0] := temp[127:0]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="valignq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_alignr_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 16 bytes (2 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-temp[255:128] := a[127:0]
-temp[127:0] := b[127:0]
-temp[255:0] := temp[255:0] &gt;&gt; (64*count)
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := temp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="valignq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_alignr_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "count" 64-bit elements, and store the low 16 bytes (2 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-temp[255:128] := a[127:0]
-temp[127:0] := b[127:0]
-temp[255:0] := temp[255:0] &gt;&gt; (64*count)
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := temp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="valignq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_andnot_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vandnpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_andnot_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vandnpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_andnot_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vandnpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_andnot_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vandnpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_andnot_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vandnpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_andnot_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vandnpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_andnot_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vandnpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_andnot_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vandnps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_andnot_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vandnps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_andnot_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vandnps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_andnot_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vandnps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_andnot_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vandnps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_andnot_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vandnps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_andnot_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vandnps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_and_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_and_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0 
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_and_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_and_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_and_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_and_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_and_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_and_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_and_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_and_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_and_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_and_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_and_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_and_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_blend_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := b[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vblendmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_blend_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := b[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vblendmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_blend_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vblendmps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_blend_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vblendmps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_broadcast_f32x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	n := (j mod 2)*32
-	dst[i+31:i] := a[n+31:n]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcastf32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_broadcast_f32x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	n := (j mod 2)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcastf32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_broadcast_f32x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	n := (j mod 2)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcastf32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_broadcast_f32x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 2)*32
-	dst[i+31:i] := a[n+31:n]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcastf32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_broadcast_f32x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 2)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcastf32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_broadcast_f32x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 2)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcastf32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_broadcast_f32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	n := (j mod 4)*32
-	dst[i+31:i] := a[n+31:n]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcastf32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_broadcast_f32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	n := (j mod 4)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcastf32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_broadcast_f32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	n := (j mod 4)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcastf32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_broadcast_f32x8">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<description>Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 8)*32
-	dst[i+31:i] := a[n+31:n]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcastf32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_broadcast_f32x8">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 8)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcastf32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_broadcast_f32x8">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 8)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcastf32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_broadcast_f64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	n := (j mod 2)*64
-	dst[i+63:i] := a[n+63:n]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcastf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_broadcast_f64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	n := (j mod 2)*64
-	IF k[j]
-		dst[i+63:i] := a[n+63:n]
-	ELSE
-		dst[i+63:i] := src[n+63:n]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcastf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_broadcast_f64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	n := (j mod 2)*64
-	IF k[j]
-		dst[i+63:i] := a[n+63:n]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcastf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_broadcast_f64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	n := (j mod 2)*64
-	dst[i+63:i] := a[n+63:n]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcastf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_broadcast_f64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	n := (j mod 2)*64
-	IF k[j]
-		dst[i+63:i] := a[n+63:n]
-	ELSE
-		dst[i+63:i] := src[n+63:n]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcastf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_broadcast_f64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	n := (j mod 2)*64
-	IF k[j]
-		dst[i+63:i] := a[n+63:n]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcastf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_broadcast_i32x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	n := (j mod 2)*32
-	dst[i+31:i] := a[n+31:n]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcasti32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcast_i32x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	n := (j mod 2)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := src[n+31:n]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcasti32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcast_i32x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	n := (j mod 2)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcasti32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcast_i32x2">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 2)*32
-	dst[i+31:i] := a[n+31:n]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcasti32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcast_i32x2">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 2)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := src[n+31:n]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcasti32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcast_i32x2">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 2)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcasti32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_broadcast_i32x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	n := (j mod 2)*32
-	dst[i+31:i] := a[n+31:n]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vbroadcasti32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_broadcast_i32x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	n := (j mod 2)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := src[n+31:n]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vbroadcasti32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_broadcast_i32x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	n := (j mod 2)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vbroadcasti32x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_broadcast_i32x4">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	n := (j mod 4)*32
-	dst[i+31:i] := a[n+31:n]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcasti32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcast_i32x4">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	n := (j mod 4)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := src[n+31:n]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcasti32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcast_i32x4">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	n := (j mod 4)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcasti32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcast_i32x8">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 8)*32
-	dst[i+31:i] := a[n+31:n]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcasti32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcast_i32x8">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 8)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := src[n+31:n]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcasti32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcast_i32x8">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	n := (j mod 8)*32
-	IF k[j]
-		dst[i+31:i] := a[n+31:n]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcasti32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_broadcast_i64x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	n := (j mod 2)*64
-	dst[i+63:i] := a[n+63:n]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcasti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcast_i64x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	n := (j mod 2)*64
-	IF k[j]
-		dst[i+63:i] := a[n+63:n]
-	ELSE
-		dst[i+63:i] := src[n+63:n]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcasti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcast_i64x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	n := (j mod 2)*64
-	IF k[j]
-		dst[i+63:i] := a[n+63:n]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcasti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcast_i64x2">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	n := (j mod 2)*64
-	dst[i+63:i] := a[n+63:n]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcasti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcast_i64x2">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	n := (j mod 2)*64
-	IF k[j]
-		dst[i+63:i] := a[n+63:n]
-	ELSE
-		dst[i+63:i] := src[n+63:n]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcasti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcast_i64x2">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	n := (j mod 2)*64
-	IF k[j]
-		dst[i+63:i] := a[n+63:n]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vbroadcasti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_broadcastsd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcastsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_broadcastsd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcastsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_broadcastss_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcastss"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_broadcastss_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vbroadcastss"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_broadcastss_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vbroadcastss"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_broadcastss_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vbroadcastss"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmp_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vcmppd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmp_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vcmppd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vcmppd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vcmppd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmp_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 7
-	i := j*32
-	k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vcmpps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmp_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vcmpps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vcmpps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _CMP_EQ_OQ
-1: OP := _CMP_LT_OS
-2: OP := _CMP_LE_OS
-3: OP := _CMP_UNORD_Q 
-4: OP := _CMP_NEQ_UQ
-5: OP := _CMP_NLT_US
-6: OP := _CMP_NLE_US
-7: OP := _CMP_ORD_Q
-8: OP := _CMP_EQ_UQ
-9: OP := _CMP_NGE_US
-10: OP := _CMP_NGT_US
-11: OP := _CMP_FALSE_OQ
-12: OP := _CMP_NEQ_OQ
-13: OP := _CMP_GE_OS
-14: OP := _CMP_GT_OS
-15: OP := _CMP_TRUE_UQ
-16: OP := _CMP_EQ_OS
-17: OP := _CMP_LT_OQ
-18: OP := _CMP_LE_OQ
-19: OP := _CMP_UNORD_S
-20: OP := _CMP_NEQ_US
-21: OP := _CMP_NLT_UQ
-22: OP := _CMP_NLE_UQ
-23: OP := _CMP_ORD_S
-24: OP := _CMP_EQ_US
-25: OP := _CMP_NGE_UQ 
-26: OP := _CMP_NGT_UQ 
-27: OP := _CMP_FALSE_OS 
-28: OP := _CMP_NEQ_OS 
-29: OP := _CMP_GE_OQ
-30: OP := _CMP_GT_OQ
-31: OP := _CMP_TRUE_US
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vcmpps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_compress_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
-	<operation>
-size := 64
-m := 0
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-dst[255:m] := src[255:m]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcompresspd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_compressstoreu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-size := 64
-m := base_addr
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		MEM[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vcompresspd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_compress_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
-	<operation>
-size := 64
-m := 0
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-dst[255:m] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcompresspd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_compress_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
-	<operation>
-size := 64
-m := 0
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-dst[127:m] := src[127:m]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcompresspd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_compressstoreu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-size := 64
-m := base_addr
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		MEM[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vcompresspd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_compress_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
-	<operation>
-size := 64
-m := 0
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-dst[127:m] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcompresspd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_compress_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
-	<operation>
-size := 32
-m := 0
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-dst[255:m] := src[255:m]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcompressps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_compressstoreu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-size := 32
-m := base_addr
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		MEM[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vcompressps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_compress_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
-	<operation>
-size := 32
-m := 0
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-dst[255:m] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcompressps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_compress_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
-	<operation>
-size := 32
-m := 0
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-dst[127:m] := src[127:m]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcompressps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_compressstoreu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-size := 32
-m := base_addr
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		MEM[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vcompressps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_compress_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
-	<operation>
-size := 32
-m := 0
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-dst[127:m] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcompressps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_cvtepi32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	m := j*64
-	IF k[j]
-		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
-	ELSE
-		dst[m+63:m] := src[m+63:m]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtdq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_cvtepi32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	m := j*64
-	IF k[j]
-		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
-	ELSE
-		dst[m+63:m] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtdq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_cvtepi32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	m := j*64
-	IF k[j]
-		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
-	ELSE
-		dst[m+63:m] := src[m+63:m]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtdq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_cvtepi32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	m := j*64
-	IF k[j]
-		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
-	ELSE
-		dst[m+63:m] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtdq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_cvtepi32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtdq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_cvtepi32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtdq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvtepi32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtdq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvtepi32_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtdq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtpd2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtpd2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtpd2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtpd2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_mask_cvtpd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtpd2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_maskz_cvtpd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtpd2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvtpd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtpd2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvtpd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtpd2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvtpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvt_roundpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvt_roundpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvt_roundpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[k+63:k])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtpd2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtpd2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtpd2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[k+63:k])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtpd2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	l := j*64
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtpd2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtpd2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvtpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvt_roundpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvt_roundpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvt_roundpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_cvtph_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	m := j*16
-	IF k[j]
-		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtph2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_cvtph_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	m := j*16
-	IF k[j]
-		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtph2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvtph_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	m := j*16
-	IF k[j]
-		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtph2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvtph_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	m := j*16
-	IF k[j]
-		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtph2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtps2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtps2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvt_roundps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 32*j
-	IF k[j]
-		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2ph"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 32*j
-	IF k[j]
-		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2ph"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvt_roundps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 32*j
-	IF k[j]
-		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2ph"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 32*j
-	IF k[j]
-		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2ph"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvt_roundps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := 16*j
-	l := 32*j
-	IF k[j]
-		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtps2ph"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := 16*j
-	l := 32*j
-	IF k[j]
-		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtps2ph"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvt_roundps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := 16*j
-	l := 32*j
-	IF k[j]
-		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtps2ph"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtps_ph">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := 16*j
-	l := 32*j
-	IF k[j]
-		dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtps2ph"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvtps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvt_roundps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvt_roundps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	 [round_note]
-	 </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvt_roundps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvtps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtps2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtps2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtps2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_UnsignedInt32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvtps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvt_roundps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvt_roundps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvt_roundps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_cvtepi64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_cvtepi64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_cvtepi64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvt_roundepi64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvtepi64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvt_roundepi64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvtepi64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvt_roundepi64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvtepi64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvtepi64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_cvtepi64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_cvtepi64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_cvtepi64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_mask_cvtepi64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_maskz_cvtepi64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_cvt_roundepi64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_cvtepi64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_cvt_roundepi64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_cvtepi64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_cvt_roundepi64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_cvtepi64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvtepi64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvtepi64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvtepi64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvttpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttpd2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvttpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttpd2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvttpd2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttpd_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvttpd2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvttpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvttpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvttpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtt_roundpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvttpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtt_roundpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvttpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtt_roundpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvttpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvttpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttpd_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttpd2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvttpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[k+63:k])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttpd2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvttpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttpd2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvttpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttpd2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvttpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	k := 64*j
-	dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[k+63:k])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvttpd2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvttpd2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttpd_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := 32*j
-	l := 64*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[l+63:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvttpd2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvttpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvttpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvttpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtt_roundpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvttpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtt_roundpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvttpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtt_roundpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvttpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvttpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttpd_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := Convert_FP64_To_UnsignedInt64_Truncate(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttpd2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvttps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttps2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvttps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*i
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_IntegerTruncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttps2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttps2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttps_epi32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*i
-	IF k[j]
-		dst[i+31:i] := Convert_FP32_To_IntegerTruncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttps2dq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvttps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvttps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvttps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtt_roundps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvttps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtt_roundps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvttps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtt_roundps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvttps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvttps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttps_epi64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttps2qq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvttps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_UnsignedInt32_Truncate(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttps2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvttps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttps2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvttps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttps2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvttps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	dst[i+31:i] := Convert_FP32_To_UnsignedInt32_Truncate(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttps2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttps2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttps_epu32">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := Convert_FP64_To_UnsignedInt32_Truncate(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttps2udq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_cvttps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvttps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvttps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvttps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtt_roundps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvttps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtt_roundps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvttps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtt_roundps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="sae" type="int"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Pass __MM_FROUND_NO_EXC to "sae" to suppress all exceptions.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvttps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvttps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvttps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvttps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvttps_epu64">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := Convert_FP32_To_UnsignedInt64_Truncate(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvttps2uqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_cvtepu32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtudq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_cvtepu32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtudq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_cvtepu32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtudq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvtepu32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtudq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_cvtepu32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtudq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_cvtepu32_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedIntegerTo_FP64(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtudq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_cvtepu64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtuqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_cvtepu64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtuqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_cvtepu64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtuqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvt_roundepu64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtuqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_cvtepu64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtuqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvt_roundepu64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtuqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_cvtepu64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtuqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvt_roundepu64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtuqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_cvtepu64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vcvtuqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_cvtepu64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtuqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_cvtepu64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtuqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_cvtepu64_pd">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertUnsignedInt64_To_FP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtuqq2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_cvtepu64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtuqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_mask_cvtepu64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtuqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_maskz_cvtepu64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vcvtuqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_cvt_roundepu64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtuqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_cvtepu64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtuqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_cvt_roundepu64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtuqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_cvtepu64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtuqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_cvt_roundepu64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtuqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_cvtepu64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vcvtuqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_cvtepu64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtuqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_cvtepu64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtuqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_cvtepu64_ps">
-	<type>Floating Point</type>
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		dst[l+31:l] := ConvertUnsignedInt64_To_FP32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vcvtuqq2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_dbsad_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
-	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*128
-	tmp[i+31:i] := select(b[i+127:i], imm8[1:0])
-	tmp[i+63:i+32] := select(b[i+127:i], imm8[3:2])
-	tmp[i+95:i+64] := select(b[i+127:i], imm8[5:4])
-	tmp[i+127:i+96] := select(b[i+127:i], imm8[7:6])
-ENDFOR
-
-FOR j := 0 to 3
-	i := j*64
-	dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
-				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
-	
-	dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
-				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
-	
-	dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
-				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
-	
-	dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
-				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vdbpsadbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_dbsad_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*128
-	tmp[i+31:i] := select(b[i+127:i], imm8[1:0])
-	tmp[i+63:i+32] := select(b[i+127:i], imm8[3:2])
-	tmp[i+95:i+64] := select(b[i+127:i], imm8[5:4])
-	tmp[i+127:i+96] := select(b[i+127:i], imm8[7:6])
-ENDFOR
-
-FOR j := 0 to 3
-	i := j*64
-	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
-				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
-	
-	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
-				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
-	
-	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
-				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
-	
-	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
-				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
-ENDFOR
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vdbpsadbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_dbsad_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*128
-	tmp[i+31:i] := select(b[i+127:i], imm8[1:0])
-	tmp[i+63:i+32] := select(b[i+127:i], imm8[3:2])
-	tmp[i+95:i+64] := select(b[i+127:i], imm8[5:4])
-	tmp[i+127:i+96] := select(b[i+127:i], imm8[7:6])
-ENDFOR
-
-FOR j := 0 to 3
-	i := j*64
-	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
-				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
-	
-	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
-				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
-	
-	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
-				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
-	
-	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
-				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
-ENDFOR
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vdbpsadbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_dbsad_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
-	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*128
-	tmp[i+31:i] := select(b[i+127:i], imm8[1:0])
-	tmp[i+63:i+32] := select(b[i+127:i], imm8[3:2])
-	tmp[i+95:i+64] := select(b[i+127:i], imm8[5:4])
-	tmp[i+127:i+96] := select(b[i+127:i], imm8[7:6])
-ENDFOR
-
-FOR j := 0 to 7
-	i := j*64
-	dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
-				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
-	
-	dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
-				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
-	
-	dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
-				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
-	
-	dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
-				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vdbpsadbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_dbsad_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*128
-	tmp[i+31:i] := select(b[i+127:i], imm8[1:0])
-	tmp[i+63:i+32] := select(b[i+127:i], imm8[3:2])
-	tmp[i+95:i+64] := select(b[i+127:i], imm8[5:4])
-	tmp[i+127:i+96] := select(b[i+127:i], imm8[7:6])
-ENDFOR
-
-FOR j := 0 to 7
-	i := j*64
-	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
-				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
-	
-	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
-				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
-	
-	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
-				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
-	
-	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
-				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
-ENDFOR
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vdbpsadbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_dbsad_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*128
-	tmp[i+31:i] := select(b[i+127:i], imm8[1:0])
-	tmp[i+63:i+32] := select(b[i+127:i], imm8[3:2])
-	tmp[i+95:i+64] := select(b[i+127:i], imm8[5:4])
-	tmp[i+127:i+96] := select(b[i+127:i], imm8[7:6])
-ENDFOR
-
-FOR j := 0 to 7
-	i := j*64
-	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
-				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
-	
-	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
-				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
-	
-	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
-				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
-	
-	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
-				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
-ENDFOR
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vdbpsadbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_dbsad_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
-	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
-	</description>
-	<operation>
-tmp[31:0] := select(b[127:0], imm8[1:0])
-tmp[63:32] := select(b[127:0], imm8[3:2])
-tmp[95:64] := select(b[127:0], imm8[5:4])
-tmp[127:96] := select(b[127:0], imm8[7:6])
-
-FOR j := 0 to 1
-	i := j*64
-	dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
-				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
-	
-	dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
-				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
-	
-	dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
-				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
-	
-	dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
-				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
-ENDFOR
-
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vdbpsadbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_dbsad_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
-	</description>
-	<operation>
-tmp[31:0] := select(b[127:0], imm8[1:0])
-tmp[63:32] := select(b[127:0], imm8[3:2])
-tmp[95:64] := select(b[127:0], imm8[5:4])
-tmp[127:96] := select(b[127:0], imm8[7:6])
-
-FOR j := 0 to 1
-	i := j*64
-	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
-				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
-	
-	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
-				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
-	
-	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
-				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
-	
-	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
-				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
-ENDFOR
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vdbpsadbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_dbsad_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
-	</description>
-	<operation>
-tmp[31:0] := select(b[127:0], imm8[1:0])
-tmp[63:32] := select(b[127:0], imm8[3:2])
-tmp[95:64] := select(b[127:0], imm8[5:4])
-tmp[127:96] := select(b[127:0], imm8[7:6])
-
-FOR j := 0 to 1
-	i := j*64
-	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8])
-				 + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
-	
-	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16])
-				 + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
-	
-	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24])
-				 + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
-	
-	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32])
-				 + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
-ENDFOR
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vdbpsadbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_div_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] / b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vdivpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_div_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] / b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vdivpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_div_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] / b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vdivpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_div_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] / b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vdivpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_div_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] / b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vdivps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_div_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] / b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vdivps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_div_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] / b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vdivps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_div_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] / b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vdivps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_expand_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[m+63:m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vexpandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_expandloadu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vexpandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_expand_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[m+63:m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vexpandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_expandloadu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vexpandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_expand_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[m+63:m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vexpandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_expandloadu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vexpandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_expand_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[m+63:m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vexpandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_expandloadu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vexpandpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_expand_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[m+31:m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vexpandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_expandloadu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vexpandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_expand_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[m+31:m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vexpandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_expandloadu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vexpandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_expand_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[m+31:m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vexpandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_expandloadu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vexpandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_expand_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[m+31:m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vexpandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_expandloadu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vexpandps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_extractf32x4_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-ESAC
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextractf32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_mask_extractf32x4_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[127:0] := a[127:0]
-1: tmp[127:0] := a[255:128]
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextractf32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_maskz_extractf32x4_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[127:0] := a[127:0]
-1: tmp[127:0] := a[255:128]
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextractf32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_extractf32x8_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[255:0] := a[255:0]
-1: dst[255:0] := a[511:256]
-ESAC
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vextractf32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_mask_extractf32x8_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[255:0] := a[255:0]
-1: tmp[255:0] := a[511:256]
-ESAC
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vextractf32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm512_maskz_extractf32x8_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[255:0] := a[255:0]
-1: tmp[255:0] := a[511:256]
-ESAC
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vextractf32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm256_extractf64x2_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-ESAC
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextractf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm256_mask_extractf64x2_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[127:0] := a[127:0]
-1: tmp[127:0] := a[255:128]
-ESAC
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextractf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm256_maskz_extractf64x2_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[127:0] := a[127:0]
-1: tmp[127:0] := a[255:128]
-ESAC
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextractf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm512_extractf64x2_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-2: dst[127:0] := a[383:256]
-3: dst[127:0] := a[511:384]
-ESAC
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextractf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm512_mask_extractf64x2_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[127:0] := a[127:0]
-1: tmp[127:0] := a[255:128]
-2: tmp[127:0] := a[383:256]
-3: tmp[127:0] := a[511:384]
-ESAC
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextractf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm512_maskz_extractf64x2_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[127:0] := a[127:0]
-1: tmp[127:0] := a[255:128]
-2: tmp[127:0] := a[383:256]
-3: tmp[127:0] := a[511:384]
-ESAC
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextractf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_extracti32x4_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-ESAC
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextracti32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_extracti32x4_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextracti32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_extracti32x4_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextracti32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_extracti32x8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[255:0] := a[255:0]
-1: dst[255:0] := a[511:256]
-ESAC
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vextracti32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_extracti32x8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[255:0] := a[255:0]
-1: tmp[255:0] := a[511:256]
-ESAC
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vextracti32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_extracti32x8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[255:0] := a[255:0]
-1: tmp[255:0] := a[511:256]
-ESAC
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vextracti32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_extracti64x2_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-ESAC
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextracti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_extracti64x2_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[127:0] := a[127:0]
-1: tmp[127:0] := a[255:128]
-ESAC
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextracti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_extracti64x2_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[127:0] := a[127:0]
-1: tmp[127:0] := a[255:128]
-ESAC
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextracti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_extracti64x2_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
-	<operation>
-CASE imm8[7:0] of
-0: dst[127:0] := a[127:0]
-1: dst[127:0] := a[255:128]
-2: dst[127:0] := a[383:256]
-3: dst[127:0] := a[511:384]
-ESAC
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextracti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_mask_extracti64x2_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[127:0] := a[127:0]
-1: tmp[127:0] := a[255:128]
-2: tmp[127:0] := a[383:256]
-3: tmp[127:0] := a[511:384]
-ESAC
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextracti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm512_maskz_extracti64x2_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-CASE imm8[7:0] of
-0: tmp[127:0] := a[127:0]
-1: tmp[127:0] := a[255:128]
-2: tmp[127:0] := a[383:256]
-3: tmp[127:0] := a[511:384]
-ESAC
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vextracti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_fixupimm_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfixupimmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fixupimm_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfixupimmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fixupimm_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfixupimmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_fixupimm_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfixupimmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fixupimm_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.	</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfixupimmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fixupimm_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN := 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]){
-	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
-	CASE(tsrc[63:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[63:0] := src1[63:0]
-	1 : dest[63:0] := tsrc[63:0]
-	2 : dest[63:0] := QNaN(tsrc[63:0])
-	3 : dest[63:0] := QNAN_Indefinite
-	4 : dest[63:0] := -INF
-	5 : dest[63:0] := +INF
-	6 : dest[63:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[63:0] := -0
-	8 : dest[63:0] := +0
-	9 : dest[63:0] := -1
-	10: dest[63:0] := +1
-	11: dest[63:0] := 1&#x2044;2
-	12: dest[63:0] := 90.0
-	13: dest[63:0] := PI/2
-	14: dest[63:0] := MAX_FLOAT
-	15: dest[63:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[63:0]
-}
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfixupimmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_fixupimm_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfixupimmps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fixupimm_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfixupimmps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fixupimm_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfixupimmps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_fixupimm_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfixupimmps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fixupimm_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfixupimmps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fixupimm_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
-	<operation>
-enum TOKEN_TYPE {
-	QNAN_TOKEN := 0, 
-	SNAN_TOKEN L= 1, 
-	ZERO_VALUE_TOKEN := 2, 
-	ONE_VALUE_TOKEN := 3, 
-	NEG_INF_TOKEN := 4, 
-	POS_INF_TOKEN := 5, 
-	NEG_VALUE_TOKEN := 6, 
-	POS_VALUE_TOKEN := 7
-}
-FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]){
-	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	QNAN_TOKEN:j := 0
-	SNAN_TOKEN:j := 1
-	ZERO_VALUE_TOKEN: j := 2
-	ONE_VALUE_TOKEN: j := 3
-	NEG_INF_TOKEN: j := 4
-	POS_INF_TOKEN: j := 5
-	NEG_VALUE_TOKEN: j := 6
-	POS_VALUE_TOKEN: j := 7
-	ESAC
-	
-	token_response[3:0] := src3[3+4*j:4*j]
-	
-	CASE(token_response[3:0]) of
-	0 : dest[31:0] := src1[31:0]
-	1 : dest[31:0] := tsrc[31:0]
-	2 : dest[31:0] := QNaN(tsrc[31:0])
-	3 : dest[31:0] := QNAN_Indefinite
-	4 : dest[31:0] := -INF
-	5 : dest[31:0] := +INF
-	6 : dest[31:0] := tsrc.sign? &#x2013;INF : +INF
-	7 : dest[31:0] := -0
-	8 : dest[31:0] := +0
-	9 : dest[31:0] := -1
-	10: dest[31:0] := +1
-	11: dest[31:0] := 1&#x2044;2
-	12: dest[31:0] := 90.0
-	13: dest[31:0] := PI/2
-	14: dest[31:0] := MAX_FLOAT
-	15: dest[31:0] := -MAX_FLOAT
-	ESAC
-	
-	CASE(tsrc[31:0] of TOKEN_TYPE)
-	ZERO_VALUE_TOKEN: if imm8[0] then set #ZE
-	ZERO_VALUE_TOKEN: if imm8[1] then set #IE
-	ONE_VALUE_TOKEN: if imm8[2] then set #ZE
-	ONE_VALUE_TOKEN: if imm8[3] then set #IE
-	SNAN_TOKEN: if imm8[4] then set #IE
-	NEG_INF_TOKEN: if imm8[5] then set #IE
-	NEG_VALUE_TOKEN: if imm8[6] then set #IE
-	POS_INF_TOKEN: if imm8[7] then set #IE
-	ESAC
-	RETURN dest[31:0]
-}
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfixupimmps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask3_fmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmadd132pd"/>
-	<instruction name="vfmadd213pd"/>
-	<instruction name="vfmadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmadd132pd"/>
-	<instruction name="vfmadd213pd"/>
-	<instruction name="vfmadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmadd132pd"/>
-	<instruction name="vfmadd213pd"/>
-	<instruction name="vfmadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmadd132pd"/>
-	<instruction name="vfmadd213pd"/>
-	<instruction name="vfmadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmadd132pd"/>
-	<instruction name="vfmadd213pd"/>
-	<instruction name="vfmadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmadd132pd"/>
-	<instruction name="vfmadd213pd"/>
-	<instruction name="vfmadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask3_fmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmadd132ps"/>
-	<instruction name="vfmadd213ps"/>
-	<instruction name="vfmadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmadd132ps"/>
-	<instruction name="vfmadd213ps"/>
-	<instruction name="vfmadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmadd132ps"/>
-	<instruction name="vfmadd213ps"/>
-	<instruction name="vfmadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmadd132ps"/>
-	<instruction name="vfmadd213ps"/>
-	<instruction name="vfmadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmadd132ps"/>
-	<instruction name="vfmadd213ps"/>
-	<instruction name="vfmadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmadd132ps"/>
-	<instruction name="vfmadd213ps"/>
-	<instruction name="vfmadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask3_fmaddsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmaddsub132pd"/>
-	<instruction name="vfmaddsub213pd"/>
-	<instruction name="vfmaddsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fmaddsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmaddsub132pd"/>
-	<instruction name="vfmaddsub213pd"/>
-	<instruction name="vfmaddsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fmaddsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmaddsub132pd"/>
-	<instruction name="vfmaddsub213pd"/>
-	<instruction name="vfmaddsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmaddsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmaddsub132pd"/>
-	<instruction name="vfmaddsub213pd"/>
-	<instruction name="vfmaddsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmaddsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmaddsub132pd"/>
-	<instruction name="vfmaddsub213pd"/>
-	<instruction name="vfmaddsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmaddsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmaddsub132pd"/>
-	<instruction name="vfmaddsub213pd"/>
-	<instruction name="vfmaddsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask3_fmaddsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmaddsub132ps"/>
-	<instruction name="vfmaddsub213ps"/>
-	<instruction name="vfmaddsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fmaddsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmaddsub132ps"/>
-	<instruction name="vfmaddsub213ps"/>
-	<instruction name="vfmaddsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fmaddsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmaddsub132ps"/>
-	<instruction name="vfmaddsub213ps"/>
-	<instruction name="vfmaddsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmaddsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmaddsub132ps"/>
-	<instruction name="vfmaddsub213ps"/>
-	<instruction name="vfmaddsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmaddsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmaddsub132ps"/>
-	<instruction name="vfmaddsub213ps"/>
-	<instruction name="vfmaddsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmaddsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmaddsub132ps"/>
-	<instruction name="vfmaddsub213ps"/>
-	<instruction name="vfmaddsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask3_fmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmsub132pd"/>
-	<instruction name="vfmsub213pd"/>
-	<instruction name="vfmsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmsub132pd"/>
-	<instruction name="vfmsub213pd"/>
-	<instruction name="vfmsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmsub132pd"/>
-	<instruction name="vfmsub213pd"/>
-	<instruction name="vfmsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmsub132pd"/>
-	<instruction name="vfmsub213pd"/>
-	<instruction name="vfmsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmsub132pd"/>
-	<instruction name="vfmsub213pd"/>
-	<instruction name="vfmsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmsub132pd"/>
-	<instruction name="vfmsub213pd"/>
-	<instruction name="vfmsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask3_fmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmsub132ps"/>
-	<instruction name="vfmsub213ps"/>
-	<instruction name="vfmsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmsub132ps"/>
-	<instruction name="vfmsub213ps"/>
-	<instruction name="vfmsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmsub132ps"/>
-	<instruction name="vfmsub213ps"/>
-	<instruction name="vfmsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmsub132ps"/>
-	<instruction name="vfmsub213ps"/>
-	<instruction name="vfmsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmsub132ps"/>
-	<instruction name="vfmsub213ps"/>
-	<instruction name="vfmsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmsub132ps"/>
-	<instruction name="vfmsub213ps"/>
-	<instruction name="vfmsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask3_fmsubadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmsubadd132pd"/>
-	<instruction name="vfmsubadd213pd"/>
-	<instruction name="vfmsubadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fmsubadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmsubadd132pd"/>
-	<instruction name="vfmsubadd213pd"/>
-	<instruction name="vfmsubadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fmsubadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmsubadd132pd"/>
-	<instruction name="vfmsubadd213pd"/>
-	<instruction name="vfmsubadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fmsubadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmsubadd132pd"/>
-	<instruction name="vfmsubadd213pd"/>
-	<instruction name="vfmsubadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fmsubadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1 
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmsubadd132pd"/>
-	<instruction name="vfmsubadd213pd"/>
-	<instruction name="vfmsubadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fmsubadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF (j is even) 
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
-		ELSE
-			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmsubadd132pd"/>
-	<instruction name="vfmsubadd213pd"/>
-	<instruction name="vfmsubadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask3_fmsubadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmsubadd132ps"/>
-	<instruction name="vfmsubadd213ps"/>
-	<instruction name="vfmsubadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fmsubadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmsubadd132ps"/>
-	<instruction name="vfmsubadd213ps"/>
-	<instruction name="vfmsubadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fmsubadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfmsubadd132ps"/>
-	<instruction name="vfmsubadd213ps"/>
-	<instruction name="vfmsubadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fmsubadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmsubadd132ps"/>
-	<instruction name="vfmsubadd213ps"/>
-	<instruction name="vfmsubadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fmsubadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmsubadd132ps"/>
-	<instruction name="vfmsubadd213ps"/>
-	<instruction name="vfmsubadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fmsubadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF (j is even) 
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
-		ELSE
-			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfmsubadd132ps"/>
-	<instruction name="vfmsubadd213ps"/>
-	<instruction name="vfmsubadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask3_fnmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfnmadd132pd"/>
-	<instruction name="vfnmadd213pd"/>
-	<instruction name="vfnmadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fnmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfnmadd132pd"/>
-	<instruction name="vfnmadd213pd"/>
-	<instruction name="vfnmadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fnmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfnmadd132pd"/>
-	<instruction name="vfnmadd213pd"/>
-	<instruction name="vfnmadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fnmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfnmadd132pd"/>
-	<instruction name="vfnmadd213pd"/>
-	<instruction name="vfnmadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fnmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfnmadd132pd"/>
-	<instruction name="vfnmadd213pd"/>
-	<instruction name="vfnmadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fnmadd_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfnmadd132pd"/>
-	<instruction name="vfnmadd213pd"/>
-	<instruction name="vfnmadd231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask3_fnmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfnmadd132ps"/>
-	<instruction name="vfnmadd213ps"/>
-	<instruction name="vfnmadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fnmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfnmadd132ps"/>
-	<instruction name="vfnmadd213ps"/>
-	<instruction name="vfnmadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fnmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfnmadd132ps"/>
-	<instruction name="vfnmadd213ps"/>
-	<instruction name="vfnmadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fnmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfnmadd132ps"/>
-	<instruction name="vfnmadd213ps"/>
-	<instruction name="vfnmadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fnmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfnmadd132ps"/>
-	<instruction name="vfnmadd213ps"/>
-	<instruction name="vfnmadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fnmadd_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfnmadd132ps"/>
-	<instruction name="vfnmadd213ps"/>
-	<instruction name="vfnmadd231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask3_fnmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfnmsub132pd"/>
-	<instruction name="vfnmsub213pd"/>
-	<instruction name="vfnmsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_fnmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfnmsub132pd"/>
-	<instruction name="vfnmsub213pd"/>
-	<instruction name="vfnmsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_fnmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="c" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfnmsub132pd"/>
-	<instruction name="vfnmsub213pd"/>
-	<instruction name="vfnmsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask3_fnmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := c[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfnmsub132pd"/>
-	<instruction name="vfnmsub213pd"/>
-	<instruction name="vfnmsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_fnmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfnmsub132pd"/>
-	<instruction name="vfnmsub213pd"/>
-	<instruction name="vfnmsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_fnmsub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="c" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfnmsub132pd"/>
-	<instruction name="vfnmsub213pd"/>
-	<instruction name="vfnmsub231pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask3_fnmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfnmsub132ps"/>
-	<instruction name="vfnmsub213ps"/>
-	<instruction name="vfnmsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_fnmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfnmsub132ps"/>
-	<instruction name="vfnmsub213ps"/>
-	<instruction name="vfnmsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_fnmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="c" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vfnmsub132ps"/>
-	<instruction name="vfnmsub213ps"/>
-	<instruction name="vfnmsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask3_fnmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := c[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfnmsub132ps"/>
-	<instruction name="vfnmsub213ps"/>
-	<instruction name="vfnmsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_fnmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).  </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfnmsub132ps"/>
-	<instruction name="vfnmsub213ps"/>
-	<instruction name="vfnmsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_fnmsub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="c" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vfnmsub132ps"/>
-	<instruction name="vfnmsub213ps"/>
-	<instruction name="vfnmsub231ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_fpclass_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
-	[fpclass_note]
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vfpclasspd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_fpclass_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
-	[fpclass_note]
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vfpclasspd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_fpclass_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
-	[fpclass_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vfpclasspd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_mask_fpclass_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
-	[fpclass_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k1[j]
-		k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vfpclasspd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_fpclass_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
-	[fpclass_note]
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vfpclasspd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_fpclass_pd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
-	[fpclass_note]
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vfpclasspd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_fpclass_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
-	[fpclass_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vfpclassps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_fpclass_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
-	[fpclass_note]
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vfpclassps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_fpclass_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
-	[fpclass_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vfpclassps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_mask_fpclass_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
-	[fpclass_note]
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k1[j]
-		k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vfpclassps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_fpclass_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
-	[fpclass_note]
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vfpclassps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_fpclass_ps_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
-	[fpclass_note]
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vfpclassps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_fpclass_sd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test the lower double-precision (64-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k".
-	[fpclass_note]
-	</description>
-	<operation>
-k[0] := CheckFPClass_FP64(a[63:0], imm8[7:0])
-k[MAX:1] := 0
-	</operation>
-	<instruction name="vfpclasssd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_fpclass_sd_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test the lower double-precision (64-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set).
-	[fpclass_note]
-	</description>
-	<operation>
-IF k1[0]
-	k[0] := CheckFPClass_FP64(a[63:0], imm8[7:0])
-ELSE
-	k[0] := 0
-FI
-k[MAX:1] := 0
-	</operation>
-	<instruction name="vfpclasssd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_fpclass_ss_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test the lower single-precision (32-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k.
-	[fpclass_note]
-	</description>
-	<operation>
-k[0] := CheckFPClass_FP32(a[31:0], imm8[7:0])
-k[MAX:1] := 0
-	</operation>
-	<instruction name="vfpclassss"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_fpclass_ss_mask">
-	<type>Floating Point</type>
-	<type>Mask</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Test the lower single-precision (32-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set).
-	[fpclass_note]
-	</description>
-	<operation>
-IF k1[0]
-	k[0] := CheckFPClass_FP32(a[31:0], imm8[7:0])
-ELSE
-	k[0] := 0
-FI
-k[MAX:1] := 0
-	</operation>
-	<instruction name="vfpclassss"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mmask_i32gather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	m := j*32
-	IF k[j]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-k[MAX:4] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgatherdpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mmask_i32gather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	m := j*32
-	IF k[j]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-k[MAX:2] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgatherdpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mmask_i32gather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>
-	Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-k[MAX:8] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgatherdps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mmask_i32gather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>
-	Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-k[MAX:4] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgatherdps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mmask_i64gather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-k[MAX:4] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgatherqpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mmask_i64gather_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>
-	Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-k[MAX:2] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgatherqpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm256_mmask_i64gather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>
-	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	m := j*64
-	IF k[j]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-k[MAX:4] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgatherqps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mmask_i64gather_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>
-	Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	m := j*64
-	IF k[j]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-k[MAX:2] := 0
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vgatherqps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_getexp_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgetexppd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_getexp_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgetexppd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_getexp_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgetexppd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_getexp_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgetexppd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_getexp_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgetexppd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_getexp_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgetexppd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_getexp_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgetexpps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_getexp_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgetexpps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_getexp_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgetexpps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_getexp_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgetexpps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_getexp_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgetexpps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_getexp_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgetexpps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_getmant_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgetmantpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_getmant_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgetmantpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_getmant_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgetmantpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_getmant_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgetmantpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_getmant_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgetmantpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_getmant_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgetmantpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_getmant_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgetmantps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_getmant_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgetmantps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_getmant_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vgetmantps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_getmant_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgetmantps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_getmant_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgetmantps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_getmant_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="interv" type="_MM_MANTISSA_NORM_ENUM"/>
-	<parameter varname="sc" type="_MM_MANTISSA_SIGN_ENUM"/>
-	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "&#xB1;(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
-	[getmant_note]</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vgetmantps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_insertf32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[255:0] := a[255:0]
-CASE (imm8[1:0]) of
-0: dst[127:0] := b[127:0]
-1: dst[255:128] := b[127:0]
-ESAC
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vinsertf32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_insertf32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[255:0] := a[255:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-ESAC
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vinsertf32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_insertf32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[255:0] := a[255:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-ESAC
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vinsertf32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_insertf32x8">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "dst", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".
-	</description>
-	<operation>
-dst[511:0] := a[511:0]
-CASE (imm8[7:0]) OF
-0: dst[255:0] := b[255:0]
-1: dst[511:256] := b[255:0]
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vinsertf32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_insertf32x8">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[7:0]) OF
-0: tmp[255:0] := b[255:0]
-1: tmp[511:256] := b[255:0]
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vinsertf32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_insertf32x8">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[7:0]) OF
-0: tmp[255:0] := b[255:0]
-1: tmp[511:256] := b[255:0]
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vinsertf32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_insertf64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[255:0] := a[255:0]
-CASE imm8[7:0] of
-0: dst[127:0] := b[127:0]
-1: dst[255:128] := b[127:0]
-ESAC
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vinsertf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_insertf64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[255:0] := a[255:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vinsertf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_insertf64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[255:0] := a[255:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vinsertf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_insertf64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[511:0] := a[511:0]
-CASE imm8[7:0] of
-0: dst[127:0] := b[127:0]
-1: dst[255:128] := b[127:0]
-2: dst[383:256] := b[127:0]
-3: dst[511:384] := b[127:0]
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vinsertf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_insertf64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-2: tmp[383:256] := b[127:0]
-3: tmp[511:384] := b[127:0]
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vinsertf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_insertf64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-2: tmp[383:256] := b[127:0]
-3: tmp[511:384] := b[127:0]
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vinsertf64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_inserti32x4">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[255:0] := a[255:0]
-CASE (imm8[1:0]) of
-0: dst[127:0] := b[127:0]
-1: dst[255:128] := b[127:0]
-ESAC
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vinserti32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_inserti32x4">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[255:0] := a[255:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-ESAC
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vinserti32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_inserti32x4">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[255:0] := a[255:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-ESAC
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vinserti32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_inserti32x8">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "dst", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[511:0] := a[511:0]
-CASE imm8[7:0] of
-0: dst[255:0] := b[255:0]
-1: dst[511:256] := b[255:0]
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vinserti32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_inserti32x8">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[7:0]) OF
-0: tmp[255:0] := b[255:0]
-1: tmp[511:256] := b[255:0]
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vinserti32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_inserti32x8">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[7:0]) OF
-0: tmp[255:0] := b[255:0]
-1: tmp[511:256] := b[255:0]
-ESAC
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vinserti32x8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_inserti64x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[255:0] := a[255:0]
-CASE imm8[7:0] of
-0: dst[127:0] := b[127:0]
-1: dst[255:128] := b[127:0]
-ESAC
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vinserti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_inserti64x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[255:0] := a[255:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vinserti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_inserti64x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[255:0] := a[255:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vinserti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_inserti64x2">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
-	<operation>
-dst[511:0] := a[511:0]
-CASE imm8[7:0] of
-0: dst[127:0] := b[127:0]
-1: dst[255:128] := b[127:0]
-2: dst[383:256] := b[127:0]
-3: dst[511:384] := b[127:0]
-ESAC
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vinserti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_inserti64x2">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-2: tmp[383:256] := b[127:0]
-3: tmp[511:384] := b[127:0]
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vinserti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_inserti64x2">
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	</description>
-	<operation>
-tmp[511:0] := a[511:0]
-CASE (imm8[1:0]) of
-0: tmp[127:0] := b[127:0]
-1: tmp[255:128] := b[127:0]
-2: tmp[383:256] := b[127:0]
-3: tmp[511:384] := b[127:0]
-ESAC
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vinserti64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_max_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmaxpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_max_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmaxpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_max_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmaxpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_max_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmaxpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_max_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmaxps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_max_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmaxps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_max_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmaxps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_max_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmaxps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_min_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vminpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_min_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vminpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_min_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vminpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_min_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vminpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_min_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vminps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_min_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vminps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_min_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vminps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_min_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vminps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_load_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovapd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_mov_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovapd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_store_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovapd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_load_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovapd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_mov_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovapd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_load_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovapd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_mov_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovapd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_store_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovapd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_load_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovapd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_mov_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovapd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_load_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovaps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_mov_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovaps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_store_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovaps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_load_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovaps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_mov_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovaps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_load_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovaps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_mov_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovaps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_store_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovaps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_load_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovaps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_mov_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovaps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_movedup_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp[63:0] := a[63:0]
-tmp[127:64] := a[63:0]
-tmp[191:128] := a[191:128]
-tmp[255:192] := a[191:128]
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovddup"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_movedup_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-tmp[63:0] := a[63:0]
-tmp[127:64] := a[63:0]
-tmp[191:128] := a[191:128]
-tmp[255:192] := a[191:128]
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovddup"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_movedup_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp[63:0] := a[63:0]
-tmp[127:64] := a[63:0]
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovddup"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_movedup_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-tmp[63:0] := a[63:0]
-tmp[127:64] := a[63:0]
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovddup"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_load_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqa32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mov_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqa32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_store_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovdqa32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_load_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqa32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mov_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqa32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_load_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqa32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mov_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqa32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_store_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovdqa32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_load_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqa32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mov_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqa32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_load_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqa64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mov_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqa64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_store_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovdqa64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_load_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqa64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mov_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqa64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_load_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqa64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mov_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqa64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_store_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovdqa64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_load_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
-	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqa64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mov_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqa64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_loadu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mov_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Store packed 16-bit integers from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_loadu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mov_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_loadu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mov_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Store packed 16-bit integers from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_loadu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mov_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_loadu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mov_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Store packed 16-bit integers from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_loadu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mov_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqu16"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_loadu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqu32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_storeu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovdqu32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_loadu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqu32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_loadu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqu32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_storeu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovdqu32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_loadu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqu32"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_loadu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqu64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_storeu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovdqu64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_loadu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqu64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_loadu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqu64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_storeu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovdqu64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_loadu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqu64"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_loadu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mov_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Store packed 8-bit integers from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_loadu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mov_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_loadu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mov_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Store packed 8-bit integers from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_loadu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mov_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_loadu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mov_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Store packed 8-bit integers from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_loadu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mov_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovdqu8"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_movehdup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp[31:0] := a[63:32] 
-tmp[63:32] := a[63:32] 
-tmp[95:64] := a[127:96] 
-tmp[127:96] := a[127:96]
-tmp[159:128] := a[191:160] 
-tmp[191:160] := a[191:160] 
-tmp[223:192] := a[255:224] 
-tmp[255:224] := a[255:224]
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovshdup"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_movehdup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-tmp[31:0] := a[63:32] 
-tmp[63:32] := a[63:32] 
-tmp[95:64] := a[127:96] 
-tmp[127:96] := a[127:96]
-tmp[159:128] := a[191:160] 
-tmp[191:160] := a[191:160] 
-tmp[223:192] := a[255:224] 
-tmp[255:224] := a[255:224]
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovshdup"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_movehdup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp[31:0] := a[63:32] 
-tmp[63:32] := a[63:32] 
-tmp[95:64] := a[127:96] 
-tmp[127:96] := a[127:96]
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovshdup"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_movehdup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-tmp[31:0] := a[63:32] 
-tmp[63:32] := a[63:32] 
-tmp[95:64] := a[127:96] 
-tmp[127:96] := a[127:96]
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovshdup"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_moveldup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp[31:0] := a[31:0] 
-tmp[63:32] := a[31:0] 
-tmp[95:64] := a[95:64] 
-tmp[127:96] := a[95:64]
-tmp[159:128] := a[159:128] 
-tmp[191:160] := a[159:128] 
-tmp[223:192] := a[223:192] 
-tmp[255:224] := a[223:192]
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovsldup"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_moveldup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-tmp[31:0] := a[31:0] 
-tmp[63:32] := a[31:0] 
-tmp[95:64] := a[95:64] 
-tmp[127:96] := a[95:64]
-tmp[159:128] := a[159:128] 
-tmp[191:160] := a[159:128] 
-tmp[223:192] := a[223:192] 
-tmp[255:224] := a[223:192]
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0	
-	</operation>
-	<instruction name="vmovsldup"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_moveldup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp[31:0] := a[31:0] 
-tmp[63:32] := a[31:0] 
-tmp[95:64] := a[95:64] 
-tmp[127:96] := a[95:64]
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR	
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovsldup"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_moveldup_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Move</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-tmp[31:0] := a[31:0] 
-tmp[63:32] := a[31:0] 
-tmp[95:64] := a[95:64] 
-tmp[127:96] := a[95:64]
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0	
-	</operation>
-	<instruction name="vmovsldup"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_loadu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary. </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovupd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_storeu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovupd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_loadu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovupd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_loadu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary. </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovupd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_storeu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovupd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_loadu_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovupd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_loadu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovups"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_storeu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovups"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_loadu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmovups"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_loadu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovups"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_storeu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="mem_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vmovups"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_loadu_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	"mem_addr" does not need to be aligned on any particular boundary.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmovups"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_mul_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] * b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmulpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_mul_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] * b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmulpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_mul_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] * b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmulpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_mul_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] * b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmulpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_mul_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  RM.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmulps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_mul_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vmulps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_mul_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  RM.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmulps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_mul_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vmulps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_or_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_or_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_or_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_or_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_or_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_or_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_or_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] BITWISE OR b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_or_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_or_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_or_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_or_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_or_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_or_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_or_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] BITWISE OR b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_abs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := ABS(a[i+7:i])
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpabsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_abs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := ABS(a[i+7:i])
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpabsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_abs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	dst[i+7:i] := ABS(a[i+7:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpabsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_abs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := ABS(a[i+7:i])
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpabsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_abs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := ABS(a[i+7:i])
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpabsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_abs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := ABS(a[i+7:i])
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpabsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_abs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := ABS(a[i+7:i])
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpabsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_abs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ABS(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpabsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_abs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ABS(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpabsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_abs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ABS(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpabsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_abs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ABS(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpabsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_abs_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ABS(a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpabsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_abs_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ABS(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpabsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_abs_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ABS(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpabsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_abs_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ABS(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpabsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_abs_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ABS(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpabsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_abs_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Compute the absolute value of packed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ABS(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpabsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_abs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ABS(a[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpabsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_abs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ABS(a[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpabsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_abs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	dst[i+15:i] := ABS(a[i+15:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpabsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_abs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ABS(a[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpabsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_abs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ABS(a[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpabsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_abs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ABS(a[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpabsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_abs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ABS(a[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpabsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_packs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
-tmp_dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
-tmp_dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
-tmp_dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
-tmp_dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
-tmp_dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
-tmp_dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
-tmp_dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
-tmp_dst[143:128] := Saturate_Int32_To_Int16 (a[159:128])
-tmp_dst[159:144] := Saturate_Int32_To_Int16 (a[191:160])
-tmp_dst[175:160] := Saturate_Int32_To_Int16 (a[223:192])
-tmp_dst[191:176] := Saturate_Int32_To_Int16 (a[255:224])
-tmp_dst[207:192] := Saturate_Int32_To_Int16 (b[159:128])
-tmp_dst[223:208] := Saturate_Int32_To_Int16 (b[191:160])
-tmp_dst[239:224] := Saturate_Int32_To_Int16 (b[223:192])
-tmp_dst[255:240] := Saturate_Int32_To_Int16 (b[255:224])
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpackssdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_packs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-tmp_dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
-tmp_dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
-tmp_dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
-tmp_dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
-tmp_dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
-tmp_dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
-tmp_dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
-tmp_dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
-tmp_dst[143:128] := Saturate_Int32_To_Int16 (a[159:128])
-tmp_dst[159:144] := Saturate_Int32_To_Int16 (a[191:160])
-tmp_dst[175:160] := Saturate_Int32_To_Int16 (a[223:192])
-tmp_dst[191:176] := Saturate_Int32_To_Int16 (a[255:224])
-tmp_dst[207:192] := Saturate_Int32_To_Int16 (b[159:128])
-tmp_dst[223:208] := Saturate_Int32_To_Int16 (b[191:160])
-tmp_dst[239:224] := Saturate_Int32_To_Int16 (b[223:192])
-tmp_dst[255:240] := Saturate_Int32_To_Int16 (b[255:224])
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpackssdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_packs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
-tmp_dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
-tmp_dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
-tmp_dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
-tmp_dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
-tmp_dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
-tmp_dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
-tmp_dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
-tmp_dst[143:128] := Saturate_Int32_To_Int16 (a[159:128])
-tmp_dst[159:144] := Saturate_Int32_To_Int16 (a[191:160])
-tmp_dst[175:160] := Saturate_Int32_To_Int16 (a[223:192])
-tmp_dst[191:176] := Saturate_Int32_To_Int16 (a[255:224])
-tmp_dst[207:192] := Saturate_Int32_To_Int16 (b[159:128])
-tmp_dst[223:208] := Saturate_Int32_To_Int16 (b[191:160])
-tmp_dst[239:224] := Saturate_Int32_To_Int16 (b[223:192])
-tmp_dst[255:240] := Saturate_Int32_To_Int16 (b[255:224])
-tmp_dst[271:256] := Saturate_Int32_To_Int16 (a[287:256])
-tmp_dst[287:272] := Saturate_Int32_To_Int16 (a[319:288])
-tmp_dst[303:288] := Saturate_Int32_To_Int16 (a[351:320])
-tmp_dst[319:304] := Saturate_Int32_To_Int16 (a[383:352])
-tmp_dst[335:320] := Saturate_Int32_To_Int16 (b[287:256])
-tmp_dst[351:336] := Saturate_Int32_To_Int16 (b[319:288])
-tmp_dst[367:352] := Saturate_Int32_To_Int16 (b[351:320])
-tmp_dst[383:368] := Saturate_Int32_To_Int16 (b[383:352])
-tmp_dst[399:384] := Saturate_Int32_To_Int16 (a[415:384])
-tmp_dst[415:400] := Saturate_Int32_To_Int16 (a[447:416])
-tmp_dst[431:416] := Saturate_Int32_To_Int16 (a[479:448])
-tmp_dst[447:432] := Saturate_Int32_To_Int16 (a[511:480])
-tmp_dst[463:448] := Saturate_Int32_To_Int16 (b[415:384])
-tmp_dst[479:464] := Saturate_Int32_To_Int16 (b[447:416])
-tmp_dst[495:480] := Saturate_Int32_To_Int16 (b[479:448])
-tmp_dst[511:496] := Saturate_Int32_To_Int16 (b[511:480])
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackssdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_packs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-tmp_dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
-tmp_dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
-tmp_dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
-tmp_dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
-tmp_dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
-tmp_dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
-tmp_dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
-tmp_dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
-tmp_dst[143:128] := Saturate_Int32_To_Int16 (a[159:128])
-tmp_dst[159:144] := Saturate_Int32_To_Int16 (a[191:160])
-tmp_dst[175:160] := Saturate_Int32_To_Int16 (a[223:192])
-tmp_dst[191:176] := Saturate_Int32_To_Int16 (a[255:224])
-tmp_dst[207:192] := Saturate_Int32_To_Int16 (b[159:128])
-tmp_dst[223:208] := Saturate_Int32_To_Int16 (b[191:160])
-tmp_dst[239:224] := Saturate_Int32_To_Int16 (b[223:192])
-tmp_dst[255:240] := Saturate_Int32_To_Int16 (b[255:224])
-tmp_dst[271:256] := Saturate_Int32_To_Int16 (a[287:256])
-tmp_dst[287:272] := Saturate_Int32_To_Int16 (a[319:288])
-tmp_dst[303:288] := Saturate_Int32_To_Int16 (a[351:320])
-tmp_dst[319:304] := Saturate_Int32_To_Int16 (a[383:352])
-tmp_dst[335:320] := Saturate_Int32_To_Int16 (b[287:256])
-tmp_dst[351:336] := Saturate_Int32_To_Int16 (b[319:288])
-tmp_dst[367:352] := Saturate_Int32_To_Int16 (b[351:320])
-tmp_dst[383:368] := Saturate_Int32_To_Int16 (b[383:352])
-tmp_dst[399:384] := Saturate_Int32_To_Int16 (a[415:384])
-tmp_dst[415:400] := Saturate_Int32_To_Int16 (a[447:416])
-tmp_dst[431:416] := Saturate_Int32_To_Int16 (a[479:448])
-tmp_dst[447:432] := Saturate_Int32_To_Int16 (a[511:480])
-tmp_dst[463:448] := Saturate_Int32_To_Int16 (b[415:384])
-tmp_dst[479:464] := Saturate_Int32_To_Int16 (b[447:416])
-tmp_dst[495:480] := Saturate_Int32_To_Int16 (b[479:448])
-tmp_dst[511:496] := Saturate_Int32_To_Int16 (b[511:480])
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackssdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_packs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
-	<operation>
-dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
-dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
-dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
-dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
-dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
-dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
-dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
-dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
-dst[143:128] := Saturate_Int32_To_Int16 (a[159:128])
-dst[159:144] := Saturate_Int32_To_Int16 (a[191:160])
-dst[175:160] := Saturate_Int32_To_Int16 (a[223:192])
-dst[191:176] := Saturate_Int32_To_Int16 (a[255:224])
-dst[207:192] := Saturate_Int32_To_Int16 (b[159:128])
-dst[223:208] := Saturate_Int32_To_Int16 (b[191:160])
-dst[239:224] := Saturate_Int32_To_Int16 (b[223:192])
-dst[255:240] := Saturate_Int32_To_Int16 (b[255:224])
-dst[271:256] := Saturate_Int32_To_Int16 (a[287:256])
-dst[287:272] := Saturate_Int32_To_Int16 (a[319:288])
-dst[303:288] := Saturate_Int32_To_Int16 (a[351:320])
-dst[319:304] := Saturate_Int32_To_Int16 (a[383:352])
-dst[335:320] := Saturate_Int32_To_Int16 (b[287:256])
-dst[351:336] := Saturate_Int32_To_Int16 (b[319:288])
-dst[367:352] := Saturate_Int32_To_Int16 (b[351:320])
-dst[383:368] := Saturate_Int32_To_Int16 (b[383:352])
-dst[399:384] := Saturate_Int32_To_Int16 (a[415:384])
-dst[415:400] := Saturate_Int32_To_Int16 (a[447:416])
-dst[431:416] := Saturate_Int32_To_Int16 (a[479:448])
-dst[447:432] := Saturate_Int32_To_Int16 (a[511:480])
-dst[463:448] := Saturate_Int32_To_Int16 (b[415:384])
-dst[479:464] := Saturate_Int32_To_Int16 (b[447:416])
-dst[495:480] := Saturate_Int32_To_Int16 (b[479:448])
-dst[511:496] := Saturate_Int32_To_Int16 (b[511:480])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackssdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_packs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
-tmp_dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
-tmp_dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
-tmp_dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
-tmp_dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
-tmp_dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
-tmp_dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
-tmp_dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpackssdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_packs_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-tmp_dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
-tmp_dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
-tmp_dst[47:32] := Saturate_Int32_To_Int16 (a[95:64])
-tmp_dst[63:48] := Saturate_Int32_To_Int16 (a[127:96])
-tmp_dst[79:64] := Saturate_Int32_To_Int16 (b[31:0])
-tmp_dst[95:80] := Saturate_Int32_To_Int16 (b[63:32])
-tmp_dst[111:96] := Saturate_Int32_To_Int16 (b[95:64])
-tmp_dst[127:112] := Saturate_Int32_To_Int16 (b[127:96])
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpackssdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_packs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
-tmp_dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
-tmp_dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
-tmp_dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
-tmp_dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
-tmp_dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
-tmp_dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
-tmp_dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
-tmp_dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
-tmp_dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
-tmp_dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
-tmp_dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
-tmp_dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
-tmp_dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
-tmp_dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
-tmp_dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
-tmp_dst[135:128] := Saturate_Int16_To_Int8 (a[143:128])
-tmp_dst[143:136] := Saturate_Int16_To_Int8 (a[159:144])
-tmp_dst[151:144] := Saturate_Int16_To_Int8 (a[175:160])
-tmp_dst[159:152] := Saturate_Int16_To_Int8 (a[191:176])
-tmp_dst[167:160] := Saturate_Int16_To_Int8 (a[207:192])
-tmp_dst[175:168] := Saturate_Int16_To_Int8 (a[223:208])
-tmp_dst[183:176] := Saturate_Int16_To_Int8 (a[239:224])
-tmp_dst[191:184] := Saturate_Int16_To_Int8 (a[255:240])
-tmp_dst[199:192] := Saturate_Int16_To_Int8 (b[143:128])
-tmp_dst[207:200] := Saturate_Int16_To_Int8 (b[159:144])
-tmp_dst[215:208] := Saturate_Int16_To_Int8 (b[175:160])
-tmp_dst[223:216] := Saturate_Int16_To_Int8 (b[191:176])
-tmp_dst[231:224] := Saturate_Int16_To_Int8 (b[207:192])
-tmp_dst[239:232] := Saturate_Int16_To_Int8 (b[223:208])
-tmp_dst[247:240] := Saturate_Int16_To_Int8 (b[239:224])
-tmp_dst[255:248] := Saturate_Int16_To_Int8 (b[255:240])
-
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpacksswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_packs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-tmp_dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
-tmp_dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
-tmp_dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
-tmp_dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
-tmp_dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
-tmp_dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
-tmp_dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
-tmp_dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
-tmp_dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
-tmp_dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
-tmp_dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
-tmp_dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
-tmp_dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
-tmp_dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
-tmp_dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
-tmp_dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
-tmp_dst[135:128] := Saturate_Int16_To_Int8 (a[143:128])
-tmp_dst[143:136] := Saturate_Int16_To_Int8 (a[159:144])
-tmp_dst[151:144] := Saturate_Int16_To_Int8 (a[175:160])
-tmp_dst[159:152] := Saturate_Int16_To_Int8 (a[191:176])
-tmp_dst[167:160] := Saturate_Int16_To_Int8 (a[207:192])
-tmp_dst[175:168] := Saturate_Int16_To_Int8 (a[223:208])
-tmp_dst[183:176] := Saturate_Int16_To_Int8 (a[239:224])
-tmp_dst[191:184] := Saturate_Int16_To_Int8 (a[255:240])
-tmp_dst[199:192] := Saturate_Int16_To_Int8 (b[143:128])
-tmp_dst[207:200] := Saturate_Int16_To_Int8 (b[159:144])
-tmp_dst[215:208] := Saturate_Int16_To_Int8 (b[175:160])
-tmp_dst[223:216] := Saturate_Int16_To_Int8 (b[191:176])
-tmp_dst[231:224] := Saturate_Int16_To_Int8 (b[207:192])
-tmp_dst[239:232] := Saturate_Int16_To_Int8 (b[223:208])
-tmp_dst[247:240] := Saturate_Int16_To_Int8 (b[239:224])
-tmp_dst[255:248] := Saturate_Int16_To_Int8 (b[255:240])
-
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpacksswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_packs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
-tmp_dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
-tmp_dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
-tmp_dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
-tmp_dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
-tmp_dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
-tmp_dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
-tmp_dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
-tmp_dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
-tmp_dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
-tmp_dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
-tmp_dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
-tmp_dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
-tmp_dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
-tmp_dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
-tmp_dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
-tmp_dst[135:128] := Saturate_Int16_To_Int8 (a[143:128])
-tmp_dst[143:136] := Saturate_Int16_To_Int8 (a[159:144])
-tmp_dst[151:144] := Saturate_Int16_To_Int8 (a[175:160])
-tmp_dst[159:152] := Saturate_Int16_To_Int8 (a[191:176])
-tmp_dst[167:160] := Saturate_Int16_To_Int8 (a[207:192])
-tmp_dst[175:168] := Saturate_Int16_To_Int8 (a[223:208])
-tmp_dst[183:176] := Saturate_Int16_To_Int8 (a[239:224])
-tmp_dst[191:184] := Saturate_Int16_To_Int8 (a[255:240])
-tmp_dst[199:192] := Saturate_Int16_To_Int8 (b[143:128])
-tmp_dst[207:200] := Saturate_Int16_To_Int8 (b[159:144])
-tmp_dst[215:208] := Saturate_Int16_To_Int8 (b[175:160])
-tmp_dst[223:216] := Saturate_Int16_To_Int8 (b[191:176])
-tmp_dst[231:224] := Saturate_Int16_To_Int8 (b[207:192])
-tmp_dst[239:232] := Saturate_Int16_To_Int8 (b[223:208])
-tmp_dst[247:240] := Saturate_Int16_To_Int8 (b[239:224])
-tmp_dst[255:248] := Saturate_Int16_To_Int8 (b[255:240])
-tmp_dst[263:256] := Saturate_Int16_To_Int8 (a[271:256])
-tmp_dst[271:264] := Saturate_Int16_To_Int8 (a[287:272])
-tmp_dst[279:272] := Saturate_Int16_To_Int8 (a[303:288])
-tmp_dst[287:280] := Saturate_Int16_To_Int8 (a[319:304])
-tmp_dst[295:288] := Saturate_Int16_To_Int8 (a[335:320])
-tmp_dst[303:296] := Saturate_Int16_To_Int8 (a[351:336])
-tmp_dst[311:304] := Saturate_Int16_To_Int8 (a[367:352])
-tmp_dst[319:312] := Saturate_Int16_To_Int8 (a[383:368])
-tmp_dst[327:320] := Saturate_Int16_To_Int8 (b[271:256])
-tmp_dst[335:328] := Saturate_Int16_To_Int8 (b[287:272])
-tmp_dst[343:336] := Saturate_Int16_To_Int8 (b[303:288])
-tmp_dst[351:344] := Saturate_Int16_To_Int8 (b[319:304])
-tmp_dst[359:352] := Saturate_Int16_To_Int8 (b[335:320])
-tmp_dst[367:360] := Saturate_Int16_To_Int8 (b[351:336])
-tmp_dst[375:368] := Saturate_Int16_To_Int8 (b[367:352])
-tmp_dst[383:376] := Saturate_Int16_To_Int8 (b[383:368])
-tmp_dst[391:384] := Saturate_Int16_To_Int8 (a[399:384])
-tmp_dst[399:392] := Saturate_Int16_To_Int8 (a[415:400])
-tmp_dst[407:400] := Saturate_Int16_To_Int8 (a[431:416])
-tmp_dst[415:408] := Saturate_Int16_To_Int8 (a[447:432])
-tmp_dst[423:416] := Saturate_Int16_To_Int8 (a[463:448])
-tmp_dst[431:424] := Saturate_Int16_To_Int8 (a[479:464])
-tmp_dst[439:432] := Saturate_Int16_To_Int8 (a[495:480])
-tmp_dst[447:440] := Saturate_Int16_To_Int8 (a[511:496])
-tmp_dst[455:448] := Saturate_Int16_To_Int8 (b[399:384])
-tmp_dst[463:456] := Saturate_Int16_To_Int8 (b[415:400])
-tmp_dst[471:464] := Saturate_Int16_To_Int8 (b[431:416])
-tmp_dst[479:472] := Saturate_Int16_To_Int8 (b[447:432])
-tmp_dst[487:480] := Saturate_Int16_To_Int8 (b[463:448])
-tmp_dst[495:488] := Saturate_Int16_To_Int8 (b[479:464])
-tmp_dst[503:496] := Saturate_Int16_To_Int8 (b[495:480])
-tmp_dst[511:504] := Saturate_Int16_To_Int8 (b[511:496])
-
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpacksswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_packs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-tmp_dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
-tmp_dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
-tmp_dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
-tmp_dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
-tmp_dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
-tmp_dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
-tmp_dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
-tmp_dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
-tmp_dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
-tmp_dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
-tmp_dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
-tmp_dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
-tmp_dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
-tmp_dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
-tmp_dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
-tmp_dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
-tmp_dst[135:128] := Saturate_Int16_To_Int8 (a[143:128])
-tmp_dst[143:136] := Saturate_Int16_To_Int8 (a[159:144])
-tmp_dst[151:144] := Saturate_Int16_To_Int8 (a[175:160])
-tmp_dst[159:152] := Saturate_Int16_To_Int8 (a[191:176])
-tmp_dst[167:160] := Saturate_Int16_To_Int8 (a[207:192])
-tmp_dst[175:168] := Saturate_Int16_To_Int8 (a[223:208])
-tmp_dst[183:176] := Saturate_Int16_To_Int8 (a[239:224])
-tmp_dst[191:184] := Saturate_Int16_To_Int8 (a[255:240])
-tmp_dst[199:192] := Saturate_Int16_To_Int8 (b[143:128])
-tmp_dst[207:200] := Saturate_Int16_To_Int8 (b[159:144])
-tmp_dst[215:208] := Saturate_Int16_To_Int8 (b[175:160])
-tmp_dst[223:216] := Saturate_Int16_To_Int8 (b[191:176])
-tmp_dst[231:224] := Saturate_Int16_To_Int8 (b[207:192])
-tmp_dst[239:232] := Saturate_Int16_To_Int8 (b[223:208])
-tmp_dst[247:240] := Saturate_Int16_To_Int8 (b[239:224])
-tmp_dst[255:248] := Saturate_Int16_To_Int8 (b[255:240])
-tmp_dst[263:256] := Saturate_Int16_To_Int8 (a[271:256])
-tmp_dst[271:264] := Saturate_Int16_To_Int8 (a[287:272])
-tmp_dst[279:272] := Saturate_Int16_To_Int8 (a[303:288])
-tmp_dst[287:280] := Saturate_Int16_To_Int8 (a[319:304])
-tmp_dst[295:288] := Saturate_Int16_To_Int8 (a[335:320])
-tmp_dst[303:296] := Saturate_Int16_To_Int8 (a[351:336])
-tmp_dst[311:304] := Saturate_Int16_To_Int8 (a[367:352])
-tmp_dst[319:312] := Saturate_Int16_To_Int8 (a[383:368])
-tmp_dst[327:320] := Saturate_Int16_To_Int8 (b[271:256])
-tmp_dst[335:328] := Saturate_Int16_To_Int8 (b[287:272])
-tmp_dst[343:336] := Saturate_Int16_To_Int8 (b[303:288])
-tmp_dst[351:344] := Saturate_Int16_To_Int8 (b[319:304])
-tmp_dst[359:352] := Saturate_Int16_To_Int8 (b[335:320])
-tmp_dst[367:360] := Saturate_Int16_To_Int8 (b[351:336])
-tmp_dst[375:368] := Saturate_Int16_To_Int8 (b[367:352])
-tmp_dst[383:376] := Saturate_Int16_To_Int8 (b[383:368])
-tmp_dst[391:384] := Saturate_Int16_To_Int8 (a[399:384])
-tmp_dst[399:392] := Saturate_Int16_To_Int8 (a[415:400])
-tmp_dst[407:400] := Saturate_Int16_To_Int8 (a[431:416])
-tmp_dst[415:408] := Saturate_Int16_To_Int8 (a[447:432])
-tmp_dst[423:416] := Saturate_Int16_To_Int8 (a[463:448])
-tmp_dst[431:424] := Saturate_Int16_To_Int8 (a[479:464])
-tmp_dst[439:432] := Saturate_Int16_To_Int8 (a[495:480])
-tmp_dst[447:440] := Saturate_Int16_To_Int8 (a[511:496])
-tmp_dst[455:448] := Saturate_Int16_To_Int8 (b[399:384])
-tmp_dst[463:456] := Saturate_Int16_To_Int8 (b[415:400])
-tmp_dst[471:464] := Saturate_Int16_To_Int8 (b[431:416])
-tmp_dst[479:472] := Saturate_Int16_To_Int8 (b[447:432])
-tmp_dst[487:480] := Saturate_Int16_To_Int8 (b[463:448])
-tmp_dst[495:488] := Saturate_Int16_To_Int8 (b[479:464])
-tmp_dst[503:496] := Saturate_Int16_To_Int8 (b[495:480])
-tmp_dst[511:504] := Saturate_Int16_To_Int8 (b[511:496])
-
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpacksswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_packs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".
-	</description>
-	<operation>
-dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
-dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
-dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
-dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
-dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
-dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
-dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
-dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
-dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
-dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
-dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
-dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
-dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
-dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
-dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
-dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
-dst[135:128] := Saturate_Int16_To_Int8 (a[143:128])
-dst[143:136] := Saturate_Int16_To_Int8 (a[159:144])
-dst[151:144] := Saturate_Int16_To_Int8 (a[175:160])
-dst[159:152] := Saturate_Int16_To_Int8 (a[191:176])
-dst[167:160] := Saturate_Int16_To_Int8 (a[207:192])
-dst[175:168] := Saturate_Int16_To_Int8 (a[223:208])
-dst[183:176] := Saturate_Int16_To_Int8 (a[239:224])
-dst[191:184] := Saturate_Int16_To_Int8 (a[255:240])
-dst[199:192] := Saturate_Int16_To_Int8 (b[143:128])
-dst[207:200] := Saturate_Int16_To_Int8 (b[159:144])
-dst[215:208] := Saturate_Int16_To_Int8 (b[175:160])
-dst[223:216] := Saturate_Int16_To_Int8 (b[191:176])
-dst[231:224] := Saturate_Int16_To_Int8 (b[207:192])
-dst[239:232] := Saturate_Int16_To_Int8 (b[223:208])
-dst[247:240] := Saturate_Int16_To_Int8 (b[239:224])
-dst[255:248] := Saturate_Int16_To_Int8 (b[255:240])
-dst[263:256] := Saturate_Int16_To_Int8 (a[271:256])
-dst[271:264] := Saturate_Int16_To_Int8 (a[287:272])
-dst[279:272] := Saturate_Int16_To_Int8 (a[303:288])
-dst[287:280] := Saturate_Int16_To_Int8 (a[319:304])
-dst[295:288] := Saturate_Int16_To_Int8 (a[335:320])
-dst[303:296] := Saturate_Int16_To_Int8 (a[351:336])
-dst[311:304] := Saturate_Int16_To_Int8 (a[367:352])
-dst[319:312] := Saturate_Int16_To_Int8 (a[383:368])
-dst[327:320] := Saturate_Int16_To_Int8 (b[271:256])
-dst[335:328] := Saturate_Int16_To_Int8 (b[287:272])
-dst[343:336] := Saturate_Int16_To_Int8 (b[303:288])
-dst[351:344] := Saturate_Int16_To_Int8 (b[319:304])
-dst[359:352] := Saturate_Int16_To_Int8 (b[335:320])
-dst[367:360] := Saturate_Int16_To_Int8 (b[351:336])
-dst[375:368] := Saturate_Int16_To_Int8 (b[367:352])
-dst[383:376] := Saturate_Int16_To_Int8 (b[383:368])
-dst[391:384] := Saturate_Int16_To_Int8 (a[399:384])
-dst[399:392] := Saturate_Int16_To_Int8 (a[415:400])
-dst[407:400] := Saturate_Int16_To_Int8 (a[431:416])
-dst[415:408] := Saturate_Int16_To_Int8 (a[447:432])
-dst[423:416] := Saturate_Int16_To_Int8 (a[463:448])
-dst[431:424] := Saturate_Int16_To_Int8 (a[479:464])
-dst[439:432] := Saturate_Int16_To_Int8 (a[495:480])
-dst[447:440] := Saturate_Int16_To_Int8 (a[511:496])
-dst[455:448] := Saturate_Int16_To_Int8 (b[399:384])
-dst[463:456] := Saturate_Int16_To_Int8 (b[415:400])
-dst[471:464] := Saturate_Int16_To_Int8 (b[431:416])
-dst[479:472] := Saturate_Int16_To_Int8 (b[447:432])
-dst[487:480] := Saturate_Int16_To_Int8 (b[463:448])
-dst[495:488] := Saturate_Int16_To_Int8 (b[479:464])
-dst[503:496] := Saturate_Int16_To_Int8 (b[495:480])
-dst[511:504] := Saturate_Int16_To_Int8 (b[511:496])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpacksswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_packs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
-tmp_dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
-tmp_dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
-tmp_dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
-tmp_dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
-tmp_dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
-tmp_dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
-tmp_dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
-tmp_dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
-tmp_dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
-tmp_dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
-tmp_dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
-tmp_dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
-tmp_dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
-tmp_dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
-tmp_dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
-
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpacksswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_packs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-tmp_dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
-tmp_dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
-tmp_dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
-tmp_dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
-tmp_dst[39:32] := Saturate_Int16_To_Int8 (a[79:64])
-tmp_dst[47:40] := Saturate_Int16_To_Int8 (a[95:80])
-tmp_dst[55:48] := Saturate_Int16_To_Int8 (a[111:96])
-tmp_dst[63:56] := Saturate_Int16_To_Int8 (a[127:112])
-tmp_dst[71:64] := Saturate_Int16_To_Int8 (b[15:0])
-tmp_dst[79:72] := Saturate_Int16_To_Int8 (b[31:16])
-tmp_dst[87:80] := Saturate_Int16_To_Int8 (b[47:32])
-tmp_dst[95:88] := Saturate_Int16_To_Int8 (b[63:48])
-tmp_dst[103:96] := Saturate_Int16_To_Int8 (b[79:64])
-tmp_dst[111:104] := Saturate_Int16_To_Int8 (b[95:80])
-tmp_dst[119:112] := Saturate_Int16_To_Int8 (b[111:96])
-tmp_dst[127:120] := Saturate_Int16_To_Int8 (b[127:112])
-
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpacksswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_packus_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
-tmp_dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
-tmp_dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
-tmp_dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
-tmp_dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
-tmp_dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
-tmp_dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
-tmp_dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
-tmp_dst[143:128] := Saturate_Int32_To_UnsignedInt16 (a[159:128])
-tmp_dst[159:144] := Saturate_Int32_To_UnsignedInt16 (a[191:160])
-tmp_dst[175:160] := Saturate_Int32_To_UnsignedInt16 (a[223:192])
-tmp_dst[191:176] := Saturate_Int32_To_UnsignedInt16 (a[255:224])
-tmp_dst[207:192] := Saturate_Int32_To_UnsignedInt16 (b[159:128])
-tmp_dst[223:208] := Saturate_Int32_To_UnsignedInt16 (b[191:160])
-tmp_dst[239:224] := Saturate_Int32_To_UnsignedInt16 (b[223:192])
-tmp_dst[255:240] := Saturate_Int32_To_UnsignedInt16 (b[255:224])
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpackusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_packus_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
-tmp_dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
-tmp_dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
-tmp_dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
-tmp_dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
-tmp_dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
-tmp_dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
-tmp_dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
-tmp_dst[143:128] := Saturate_Int32_To_UnsignedInt16 (a[159:128])
-tmp_dst[159:144] := Saturate_Int32_To_UnsignedInt16 (a[191:160])
-tmp_dst[175:160] := Saturate_Int32_To_UnsignedInt16 (a[223:192])
-tmp_dst[191:176] := Saturate_Int32_To_UnsignedInt16 (a[255:224])
-tmp_dst[207:192] := Saturate_Int32_To_UnsignedInt16 (b[159:128])
-tmp_dst[223:208] := Saturate_Int32_To_UnsignedInt16 (b[191:160])
-tmp_dst[239:224] := Saturate_Int32_To_UnsignedInt16 (b[223:192])
-tmp_dst[255:240] := Saturate_Int32_To_UnsignedInt16 (b[255:224])
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpackusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_packus_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
-tmp_dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
-tmp_dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
-tmp_dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
-tmp_dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
-tmp_dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
-tmp_dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
-tmp_dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
-tmp_dst[143:128] := Saturate_Int32_To_UnsignedInt16 (a[159:128])
-tmp_dst[159:144] := Saturate_Int32_To_UnsignedInt16 (a[191:160])
-tmp_dst[175:160] := Saturate_Int32_To_UnsignedInt16 (a[223:192])
-tmp_dst[191:176] := Saturate_Int32_To_UnsignedInt16 (a[255:224])
-tmp_dst[207:192] := Saturate_Int32_To_UnsignedInt16 (b[159:128])
-tmp_dst[223:208] := Saturate_Int32_To_UnsignedInt16 (b[191:160])
-tmp_dst[239:224] := Saturate_Int32_To_UnsignedInt16 (b[223:192])
-tmp_dst[255:240] := Saturate_Int32_To_UnsignedInt16 (b[255:224])
-tmp_dst[271:256] := Saturate_Int32_To_UnsignedInt16 (a[287:256])
-tmp_dst[287:272] := Saturate_Int32_To_UnsignedInt16 (a[319:288])
-tmp_dst[303:288] := Saturate_Int32_To_UnsignedInt16 (a[351:320])
-tmp_dst[319:304] := Saturate_Int32_To_UnsignedInt16 (a[383:352])
-tmp_dst[335:320] := Saturate_Int32_To_UnsignedInt16 (b[287:256])
-tmp_dst[351:336] := Saturate_Int32_To_UnsignedInt16 (b[319:288])
-tmp_dst[367:352] := Saturate_Int32_To_UnsignedInt16 (b[351:320])
-tmp_dst[383:368] := Saturate_Int32_To_UnsignedInt16 (b[383:352])
-tmp_dst[399:384] := Saturate_Int32_To_UnsignedInt16 (a[415:384])
-tmp_dst[415:400] := Saturate_Int32_To_UnsignedInt16 (a[447:416])
-tmp_dst[431:416] := Saturate_Int32_To_UnsignedInt16 (a[479:448])
-tmp_dst[447:432] := Saturate_Int32_To_UnsignedInt16 (a[511:480])
-tmp_dst[463:448] := Saturate_Int32_To_UnsignedInt16 (b[415:384])
-tmp_dst[479:464] := Saturate_Int32_To_UnsignedInt16 (b[447:416])
-tmp_dst[495:480] := Saturate_Int32_To_UnsignedInt16 (b[479:448])
-tmp_dst[511:496] := Saturate_Int32_To_UnsignedInt16 (b[511:480])
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_packus_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
-tmp_dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
-tmp_dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
-tmp_dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
-tmp_dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
-tmp_dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
-tmp_dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
-tmp_dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
-tmp_dst[143:128] := Saturate_Int32_To_UnsignedInt16 (a[159:128])
-tmp_dst[159:144] := Saturate_Int32_To_UnsignedInt16 (a[191:160])
-tmp_dst[175:160] := Saturate_Int32_To_UnsignedInt16 (a[223:192])
-tmp_dst[191:176] := Saturate_Int32_To_UnsignedInt16 (a[255:224])
-tmp_dst[207:192] := Saturate_Int32_To_UnsignedInt16 (b[159:128])
-tmp_dst[223:208] := Saturate_Int32_To_UnsignedInt16 (b[191:160])
-tmp_dst[239:224] := Saturate_Int32_To_UnsignedInt16 (b[223:192])
-tmp_dst[255:240] := Saturate_Int32_To_UnsignedInt16 (b[255:224])
-tmp_dst[271:256] := Saturate_Int32_To_UnsignedInt16 (a[287:256])
-tmp_dst[287:272] := Saturate_Int32_To_UnsignedInt16 (a[319:288])
-tmp_dst[303:288] := Saturate_Int32_To_UnsignedInt16 (a[351:320])
-tmp_dst[319:304] := Saturate_Int32_To_UnsignedInt16 (a[383:352])
-tmp_dst[335:320] := Saturate_Int32_To_UnsignedInt16 (b[287:256])
-tmp_dst[351:336] := Saturate_Int32_To_UnsignedInt16 (b[319:288])
-tmp_dst[367:352] := Saturate_Int32_To_UnsignedInt16 (b[351:320])
-tmp_dst[383:368] := Saturate_Int32_To_UnsignedInt16 (b[383:352])
-tmp_dst[399:384] := Saturate_Int32_To_UnsignedInt16 (a[415:384])
-tmp_dst[415:400] := Saturate_Int32_To_UnsignedInt16 (a[447:416])
-tmp_dst[431:416] := Saturate_Int32_To_UnsignedInt16 (a[479:448])
-tmp_dst[447:432] := Saturate_Int32_To_UnsignedInt16 (a[511:480])
-tmp_dst[463:448] := Saturate_Int32_To_UnsignedInt16 (b[415:384])
-tmp_dst[479:464] := Saturate_Int32_To_UnsignedInt16 (b[447:416])
-tmp_dst[495:480] := Saturate_Int32_To_UnsignedInt16 (b[479:448])
-tmp_dst[511:496] := Saturate_Int32_To_UnsignedInt16 (b[511:480])
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_packus_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst".</description>
-	<operation>
-dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
-dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
-dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
-dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
-dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
-dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
-dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
-dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
-dst[143:128] := Saturate_Int32_To_UnsignedInt16 (a[159:128])
-dst[159:144] := Saturate_Int32_To_UnsignedInt16 (a[191:160])
-dst[175:160] := Saturate_Int32_To_UnsignedInt16 (a[223:192])
-dst[191:176] := Saturate_Int32_To_UnsignedInt16 (a[255:224])
-dst[207:192] := Saturate_Int32_To_UnsignedInt16 (b[159:128])
-dst[223:208] := Saturate_Int32_To_UnsignedInt16 (b[191:160])
-dst[239:224] := Saturate_Int32_To_UnsignedInt16 (b[223:192])
-dst[255:240] := Saturate_Int32_To_UnsignedInt16 (b[255:224])
-dst[271:256] := Saturate_Int32_To_UnsignedInt16 (a[287:256])
-dst[287:272] := Saturate_Int32_To_UnsignedInt16 (a[319:288])
-dst[303:288] := Saturate_Int32_To_UnsignedInt16 (a[351:320])
-dst[319:304] := Saturate_Int32_To_UnsignedInt16 (a[383:352])
-dst[335:320] := Saturate_Int32_To_UnsignedInt16 (b[287:256])
-dst[351:336] := Saturate_Int32_To_UnsignedInt16 (b[319:288])
-dst[367:352] := Saturate_Int32_To_UnsignedInt16 (b[351:320])
-dst[383:368] := Saturate_Int32_To_UnsignedInt16 (b[383:352])
-dst[399:384] := Saturate_Int32_To_UnsignedInt16 (a[415:384])
-dst[415:400] := Saturate_Int32_To_UnsignedInt16 (a[447:416])
-dst[431:416] := Saturate_Int32_To_UnsignedInt16 (a[479:448])
-dst[447:432] := Saturate_Int32_To_UnsignedInt16 (a[511:480])
-dst[463:448] := Saturate_Int32_To_UnsignedInt16 (b[415:384])
-dst[479:464] := Saturate_Int32_To_UnsignedInt16 (b[447:416])
-dst[495:480] := Saturate_Int32_To_UnsignedInt16 (b[479:448])
-dst[511:496] := Saturate_Int32_To_UnsignedInt16 (b[511:480])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_packus_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
-tmp_dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
-tmp_dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
-tmp_dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
-tmp_dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
-tmp_dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
-tmp_dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
-tmp_dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpackusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_packus_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
-tmp_dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
-tmp_dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
-tmp_dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
-tmp_dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
-tmp_dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
-tmp_dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
-tmp_dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpackusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_packus_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
-tmp_dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
-tmp_dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
-tmp_dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
-tmp_dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
-tmp_dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
-tmp_dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
-tmp_dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
-tmp_dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
-tmp_dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
-tmp_dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
-tmp_dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
-tmp_dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
-tmp_dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
-tmp_dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
-tmp_dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
-tmp_dst[135:128] := Saturate_Int16_To_UnsignedInt8 (a[143:128])
-tmp_dst[143:136] := Saturate_Int16_To_UnsignedInt8 (a[159:144])
-tmp_dst[151:144] := Saturate_Int16_To_UnsignedInt8 (a[175:160])
-tmp_dst[159:152] := Saturate_Int16_To_UnsignedInt8 (a[191:176])
-tmp_dst[167:160] := Saturate_Int16_To_UnsignedInt8 (a[207:192])
-tmp_dst[175:168] := Saturate_Int16_To_UnsignedInt8 (a[223:208])
-tmp_dst[183:176] := Saturate_Int16_To_UnsignedInt8 (a[239:224])
-tmp_dst[191:184] := Saturate_Int16_To_UnsignedInt8 (a[255:240])
-tmp_dst[199:192] := Saturate_Int16_To_UnsignedInt8 (b[143:128])
-tmp_dst[207:200] := Saturate_Int16_To_UnsignedInt8 (b[159:144])
-tmp_dst[215:208] := Saturate_Int16_To_UnsignedInt8 (b[175:160])
-tmp_dst[223:216] := Saturate_Int16_To_UnsignedInt8 (b[191:176])
-tmp_dst[231:224] := Saturate_Int16_To_UnsignedInt8 (b[207:192])
-tmp_dst[239:232] := Saturate_Int16_To_UnsignedInt8 (b[223:208])
-tmp_dst[247:240] := Saturate_Int16_To_UnsignedInt8 (b[239:224])
-tmp_dst[255:248] := Saturate_Int16_To_UnsignedInt8 (b[255:240])
-
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpackuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_packus_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
-tmp_dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
-tmp_dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
-tmp_dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
-tmp_dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
-tmp_dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
-tmp_dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
-tmp_dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
-tmp_dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
-tmp_dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
-tmp_dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
-tmp_dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
-tmp_dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
-tmp_dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
-tmp_dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
-tmp_dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
-tmp_dst[135:128] := Saturate_Int16_To_UnsignedInt8 (a[143:128])
-tmp_dst[143:136] := Saturate_Int16_To_UnsignedInt8 (a[159:144])
-tmp_dst[151:144] := Saturate_Int16_To_UnsignedInt8 (a[175:160])
-tmp_dst[159:152] := Saturate_Int16_To_UnsignedInt8 (a[191:176])
-tmp_dst[167:160] := Saturate_Int16_To_UnsignedInt8 (a[207:192])
-tmp_dst[175:168] := Saturate_Int16_To_UnsignedInt8 (a[223:208])
-tmp_dst[183:176] := Saturate_Int16_To_UnsignedInt8 (a[239:224])
-tmp_dst[191:184] := Saturate_Int16_To_UnsignedInt8 (a[255:240])
-tmp_dst[199:192] := Saturate_Int16_To_UnsignedInt8 (b[143:128])
-tmp_dst[207:200] := Saturate_Int16_To_UnsignedInt8 (b[159:144])
-tmp_dst[215:208] := Saturate_Int16_To_UnsignedInt8 (b[175:160])
-tmp_dst[223:216] := Saturate_Int16_To_UnsignedInt8 (b[191:176])
-tmp_dst[231:224] := Saturate_Int16_To_UnsignedInt8 (b[207:192])
-tmp_dst[239:232] := Saturate_Int16_To_UnsignedInt8 (b[223:208])
-tmp_dst[247:240] := Saturate_Int16_To_UnsignedInt8 (b[239:224])
-tmp_dst[255:248] := Saturate_Int16_To_UnsignedInt8 (b[255:240])
-
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpackuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_packus_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
-tmp_dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
-tmp_dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
-tmp_dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
-tmp_dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
-tmp_dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
-tmp_dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
-tmp_dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
-tmp_dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
-tmp_dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
-tmp_dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
-tmp_dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
-tmp_dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
-tmp_dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
-tmp_dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
-tmp_dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
-tmp_dst[135:128] := Saturate_Int16_To_UnsignedInt8 (a[143:128])
-tmp_dst[143:136] := Saturate_Int16_To_UnsignedInt8 (a[159:144])
-tmp_dst[151:144] := Saturate_Int16_To_UnsignedInt8 (a[175:160])
-tmp_dst[159:152] := Saturate_Int16_To_UnsignedInt8 (a[191:176])
-tmp_dst[167:160] := Saturate_Int16_To_UnsignedInt8 (a[207:192])
-tmp_dst[175:168] := Saturate_Int16_To_UnsignedInt8 (a[223:208])
-tmp_dst[183:176] := Saturate_Int16_To_UnsignedInt8 (a[239:224])
-tmp_dst[191:184] := Saturate_Int16_To_UnsignedInt8 (a[255:240])
-tmp_dst[199:192] := Saturate_Int16_To_UnsignedInt8 (b[143:128])
-tmp_dst[207:200] := Saturate_Int16_To_UnsignedInt8 (b[159:144])
-tmp_dst[215:208] := Saturate_Int16_To_UnsignedInt8 (b[175:160])
-tmp_dst[223:216] := Saturate_Int16_To_UnsignedInt8 (b[191:176])
-tmp_dst[231:224] := Saturate_Int16_To_UnsignedInt8 (b[207:192])
-tmp_dst[239:232] := Saturate_Int16_To_UnsignedInt8 (b[223:208])
-tmp_dst[247:240] := Saturate_Int16_To_UnsignedInt8 (b[239:224])
-tmp_dst[255:248] := Saturate_Int16_To_UnsignedInt8 (b[255:240])
-tmp_dst[263:256] := Saturate_Int16_To_UnsignedInt8 (a[271:256])
-tmp_dst[271:264] := Saturate_Int16_To_UnsignedInt8 (a[287:272])
-tmp_dst[279:272] := Saturate_Int16_To_UnsignedInt8 (a[303:288])
-tmp_dst[287:280] := Saturate_Int16_To_UnsignedInt8 (a[319:304])
-tmp_dst[295:288] := Saturate_Int16_To_UnsignedInt8 (a[335:320])
-tmp_dst[303:296] := Saturate_Int16_To_UnsignedInt8 (a[351:336])
-tmp_dst[311:304] := Saturate_Int16_To_UnsignedInt8 (a[367:352])
-tmp_dst[319:312] := Saturate_Int16_To_UnsignedInt8 (a[383:368])
-tmp_dst[327:320] := Saturate_Int16_To_UnsignedInt8 (b[271:256])
-tmp_dst[335:328] := Saturate_Int16_To_UnsignedInt8 (b[287:272])
-tmp_dst[343:336] := Saturate_Int16_To_UnsignedInt8 (b[303:288])
-tmp_dst[351:344] := Saturate_Int16_To_UnsignedInt8 (b[319:304])
-tmp_dst[359:352] := Saturate_Int16_To_UnsignedInt8 (b[335:320])
-tmp_dst[367:360] := Saturate_Int16_To_UnsignedInt8 (b[351:336])
-tmp_dst[375:368] := Saturate_Int16_To_UnsignedInt8 (b[367:352])
-tmp_dst[383:376] := Saturate_Int16_To_UnsignedInt8 (b[383:368])
-tmp_dst[391:384] := Saturate_Int16_To_UnsignedInt8 (a[399:384])
-tmp_dst[399:392] := Saturate_Int16_To_UnsignedInt8 (a[415:400])
-tmp_dst[407:400] := Saturate_Int16_To_UnsignedInt8 (a[431:416])
-tmp_dst[415:408] := Saturate_Int16_To_UnsignedInt8 (a[447:432])
-tmp_dst[423:416] := Saturate_Int16_To_UnsignedInt8 (a[463:448])
-tmp_dst[431:424] := Saturate_Int16_To_UnsignedInt8 (a[479:464])
-tmp_dst[439:432] := Saturate_Int16_To_UnsignedInt8 (a[495:480])
-tmp_dst[447:440] := Saturate_Int16_To_UnsignedInt8 (a[511:496])
-tmp_dst[455:448] := Saturate_Int16_To_UnsignedInt8 (b[399:384])
-tmp_dst[463:456] := Saturate_Int16_To_UnsignedInt8 (b[415:400])
-tmp_dst[471:464] := Saturate_Int16_To_UnsignedInt8 (b[431:416])
-tmp_dst[479:472] := Saturate_Int16_To_UnsignedInt8 (b[447:432])
-tmp_dst[487:480] := Saturate_Int16_To_UnsignedInt8 (b[463:448])
-tmp_dst[495:488] := Saturate_Int16_To_UnsignedInt8 (b[479:464])
-tmp_dst[503:496] := Saturate_Int16_To_UnsignedInt8 (b[495:480])
-tmp_dst[511:504] := Saturate_Int16_To_UnsignedInt8 (b[511:496])
-
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_packus_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
-tmp_dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
-tmp_dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
-tmp_dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
-tmp_dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
-tmp_dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
-tmp_dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
-tmp_dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
-tmp_dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
-tmp_dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
-tmp_dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
-tmp_dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
-tmp_dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
-tmp_dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
-tmp_dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
-tmp_dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
-tmp_dst[135:128] := Saturate_Int16_To_UnsignedInt8 (a[143:128])
-tmp_dst[143:136] := Saturate_Int16_To_UnsignedInt8 (a[159:144])
-tmp_dst[151:144] := Saturate_Int16_To_UnsignedInt8 (a[175:160])
-tmp_dst[159:152] := Saturate_Int16_To_UnsignedInt8 (a[191:176])
-tmp_dst[167:160] := Saturate_Int16_To_UnsignedInt8 (a[207:192])
-tmp_dst[175:168] := Saturate_Int16_To_UnsignedInt8 (a[223:208])
-tmp_dst[183:176] := Saturate_Int16_To_UnsignedInt8 (a[239:224])
-tmp_dst[191:184] := Saturate_Int16_To_UnsignedInt8 (a[255:240])
-tmp_dst[199:192] := Saturate_Int16_To_UnsignedInt8 (b[143:128])
-tmp_dst[207:200] := Saturate_Int16_To_UnsignedInt8 (b[159:144])
-tmp_dst[215:208] := Saturate_Int16_To_UnsignedInt8 (b[175:160])
-tmp_dst[223:216] := Saturate_Int16_To_UnsignedInt8 (b[191:176])
-tmp_dst[231:224] := Saturate_Int16_To_UnsignedInt8 (b[207:192])
-tmp_dst[239:232] := Saturate_Int16_To_UnsignedInt8 (b[223:208])
-tmp_dst[247:240] := Saturate_Int16_To_UnsignedInt8 (b[239:224])
-tmp_dst[255:248] := Saturate_Int16_To_UnsignedInt8 (b[255:240])
-tmp_dst[263:256] := Saturate_Int16_To_UnsignedInt8 (a[271:256])
-tmp_dst[271:264] := Saturate_Int16_To_UnsignedInt8 (a[287:272])
-tmp_dst[279:272] := Saturate_Int16_To_UnsignedInt8 (a[303:288])
-tmp_dst[287:280] := Saturate_Int16_To_UnsignedInt8 (a[319:304])
-tmp_dst[295:288] := Saturate_Int16_To_UnsignedInt8 (a[335:320])
-tmp_dst[303:296] := Saturate_Int16_To_UnsignedInt8 (a[351:336])
-tmp_dst[311:304] := Saturate_Int16_To_UnsignedInt8 (a[367:352])
-tmp_dst[319:312] := Saturate_Int16_To_UnsignedInt8 (a[383:368])
-tmp_dst[327:320] := Saturate_Int16_To_UnsignedInt8 (b[271:256])
-tmp_dst[335:328] := Saturate_Int16_To_UnsignedInt8 (b[287:272])
-tmp_dst[343:336] := Saturate_Int16_To_UnsignedInt8 (b[303:288])
-tmp_dst[351:344] := Saturate_Int16_To_UnsignedInt8 (b[319:304])
-tmp_dst[359:352] := Saturate_Int16_To_UnsignedInt8 (b[335:320])
-tmp_dst[367:360] := Saturate_Int16_To_UnsignedInt8 (b[351:336])
-tmp_dst[375:368] := Saturate_Int16_To_UnsignedInt8 (b[367:352])
-tmp_dst[383:376] := Saturate_Int16_To_UnsignedInt8 (b[383:368])
-tmp_dst[391:384] := Saturate_Int16_To_UnsignedInt8 (a[399:384])
-tmp_dst[399:392] := Saturate_Int16_To_UnsignedInt8 (a[415:400])
-tmp_dst[407:400] := Saturate_Int16_To_UnsignedInt8 (a[431:416])
-tmp_dst[415:408] := Saturate_Int16_To_UnsignedInt8 (a[447:432])
-tmp_dst[423:416] := Saturate_Int16_To_UnsignedInt8 (a[463:448])
-tmp_dst[431:424] := Saturate_Int16_To_UnsignedInt8 (a[479:464])
-tmp_dst[439:432] := Saturate_Int16_To_UnsignedInt8 (a[495:480])
-tmp_dst[447:440] := Saturate_Int16_To_UnsignedInt8 (a[511:496])
-tmp_dst[455:448] := Saturate_Int16_To_UnsignedInt8 (b[399:384])
-tmp_dst[463:456] := Saturate_Int16_To_UnsignedInt8 (b[415:400])
-tmp_dst[471:464] := Saturate_Int16_To_UnsignedInt8 (b[431:416])
-tmp_dst[479:472] := Saturate_Int16_To_UnsignedInt8 (b[447:432])
-tmp_dst[487:480] := Saturate_Int16_To_UnsignedInt8 (b[463:448])
-tmp_dst[495:488] := Saturate_Int16_To_UnsignedInt8 (b[479:464])
-tmp_dst[503:496] := Saturate_Int16_To_UnsignedInt8 (b[495:480])
-tmp_dst[511:504] := Saturate_Int16_To_UnsignedInt8 (b[511:496])
-
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_packus_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
-	<operation>
-dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
-dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
-dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
-dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
-dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
-dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
-dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
-dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
-dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
-dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
-dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
-dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
-dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
-dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
-dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
-dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
-dst[135:128] := Saturate_Int16_To_UnsignedInt8 (a[143:128])
-dst[143:136] := Saturate_Int16_To_UnsignedInt8 (a[159:144])
-dst[151:144] := Saturate_Int16_To_UnsignedInt8 (a[175:160])
-dst[159:152] := Saturate_Int16_To_UnsignedInt8 (a[191:176])
-dst[167:160] := Saturate_Int16_To_UnsignedInt8 (a[207:192])
-dst[175:168] := Saturate_Int16_To_UnsignedInt8 (a[223:208])
-dst[183:176] := Saturate_Int16_To_UnsignedInt8 (a[239:224])
-dst[191:184] := Saturate_Int16_To_UnsignedInt8 (a[255:240])
-dst[199:192] := Saturate_Int16_To_UnsignedInt8 (b[143:128])
-dst[207:200] := Saturate_Int16_To_UnsignedInt8 (b[159:144])
-dst[215:208] := Saturate_Int16_To_UnsignedInt8 (b[175:160])
-dst[223:216] := Saturate_Int16_To_UnsignedInt8 (b[191:176])
-dst[231:224] := Saturate_Int16_To_UnsignedInt8 (b[207:192])
-dst[239:232] := Saturate_Int16_To_UnsignedInt8 (b[223:208])
-dst[247:240] := Saturate_Int16_To_UnsignedInt8 (b[239:224])
-dst[255:248] := Saturate_Int16_To_UnsignedInt8 (b[255:240])
-dst[263:256] := Saturate_Int16_To_UnsignedInt8 (a[271:256])
-dst[271:264] := Saturate_Int16_To_UnsignedInt8 (a[287:272])
-dst[279:272] := Saturate_Int16_To_UnsignedInt8 (a[303:288])
-dst[287:280] := Saturate_Int16_To_UnsignedInt8 (a[319:304])
-dst[295:288] := Saturate_Int16_To_UnsignedInt8 (a[335:320])
-dst[303:296] := Saturate_Int16_To_UnsignedInt8 (a[351:336])
-dst[311:304] := Saturate_Int16_To_UnsignedInt8 (a[367:352])
-dst[319:312] := Saturate_Int16_To_UnsignedInt8 (a[383:368])
-dst[327:320] := Saturate_Int16_To_UnsignedInt8 (b[271:256])
-dst[335:328] := Saturate_Int16_To_UnsignedInt8 (b[287:272])
-dst[343:336] := Saturate_Int16_To_UnsignedInt8 (b[303:288])
-dst[351:344] := Saturate_Int16_To_UnsignedInt8 (b[319:304])
-dst[359:352] := Saturate_Int16_To_UnsignedInt8 (b[335:320])
-dst[367:360] := Saturate_Int16_To_UnsignedInt8 (b[351:336])
-dst[375:368] := Saturate_Int16_To_UnsignedInt8 (b[367:352])
-dst[383:376] := Saturate_Int16_To_UnsignedInt8 (b[383:368])
-dst[391:384] := Saturate_Int16_To_UnsignedInt8 (a[399:384])
-dst[399:392] := Saturate_Int16_To_UnsignedInt8 (a[415:400])
-dst[407:400] := Saturate_Int16_To_UnsignedInt8 (a[431:416])
-dst[415:408] := Saturate_Int16_To_UnsignedInt8 (a[447:432])
-dst[423:416] := Saturate_Int16_To_UnsignedInt8 (a[463:448])
-dst[431:424] := Saturate_Int16_To_UnsignedInt8 (a[479:464])
-dst[439:432] := Saturate_Int16_To_UnsignedInt8 (a[495:480])
-dst[447:440] := Saturate_Int16_To_UnsignedInt8 (a[511:496])
-dst[455:448] := Saturate_Int16_To_UnsignedInt8 (b[399:384])
-dst[463:456] := Saturate_Int16_To_UnsignedInt8 (b[415:400])
-dst[471:464] := Saturate_Int16_To_UnsignedInt8 (b[431:416])
-dst[479:472] := Saturate_Int16_To_UnsignedInt8 (b[447:432])
-dst[487:480] := Saturate_Int16_To_UnsignedInt8 (b[463:448])
-dst[495:488] := Saturate_Int16_To_UnsignedInt8 (b[479:464])
-dst[503:496] := Saturate_Int16_To_UnsignedInt8 (b[495:480])
-dst[511:504] := Saturate_Int16_To_UnsignedInt8 (b[511:496])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpackuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_packus_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
-tmp_dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
-tmp_dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
-tmp_dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
-tmp_dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
-tmp_dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
-tmp_dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
-tmp_dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
-tmp_dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
-tmp_dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
-tmp_dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
-tmp_dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
-tmp_dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
-tmp_dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
-tmp_dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
-tmp_dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
-
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpackuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_packus_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
-tmp_dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
-tmp_dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
-tmp_dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
-tmp_dst[39:32] := Saturate_Int16_To_UnsignedInt8 (a[79:64])
-tmp_dst[47:40] := Saturate_Int16_To_UnsignedInt8 (a[95:80])
-tmp_dst[55:48] := Saturate_Int16_To_UnsignedInt8 (a[111:96])
-tmp_dst[63:56] := Saturate_Int16_To_UnsignedInt8 (a[127:112])
-tmp_dst[71:64] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
-tmp_dst[79:72] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
-tmp_dst[87:80] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
-tmp_dst[95:88] := Saturate_Int16_To_UnsignedInt8 (b[63:48])
-tmp_dst[103:96] := Saturate_Int16_To_UnsignedInt8 (b[79:64])
-tmp_dst[111:104] := Saturate_Int16_To_UnsignedInt8 (b[95:80])
-tmp_dst[119:112] := Saturate_Int16_To_UnsignedInt8 (b[111:96])
-tmp_dst[127:120] := Saturate_Int16_To_UnsignedInt8 (b[127:112])
-
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpackuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_add_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i] + b[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_add_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i] + b[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_add_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	dst[i+7:i] := a[i+7:i] + b[i+7:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_add_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i] + b[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_add_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i] + b[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_add_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i] + b[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_add_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i] + b[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_add_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_add_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_add_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_add_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] + b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_add_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_add_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] :=0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_add_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_add_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] + b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_adds_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_adds_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_adds_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_adds_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_adds_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_adds_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_adds_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_adds_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_adds_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_adds_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_adds_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_adds_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_adds_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_adds_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_adds_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_adds_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_adds_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_adds_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_adds_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_adds_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_adds_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_adds_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_adds_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_adds_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_adds_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_adds_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_adds_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_adds_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_add_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i] + b[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_add_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i] + b[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpaddw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_add_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	dst[i+15:i] := a[i+15:i] + b[i+15:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_add_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i] + b[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_add_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i] + b[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpaddw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_add_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i] + b[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_add_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i] + b[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpaddw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_alignr_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*128
-	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128) OR b[i+127:i]) &gt;&gt; (count[7:0]*8)
-	tmp_dst[i+127:i] := tmp[127:0]
-ENDFOR
-
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpalignr"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_alignr_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*128
-	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128) OR b[i+127:i]) &gt;&gt; (count[7:0]*8)
-	tmp_dst[i+127:i] := tmp[127:0]
-ENDFOR
-
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpalignr"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_alignr_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*128
-	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128) OR b[i+127:i]) &gt;&gt; (count[7:0]*8)
-	dst[i+127:i] := tmp[127:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpalignr"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_alignr_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*128
-	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128) OR b[i+127:i]) &gt;&gt; (count[7:0]*8)
-	tmp_dst[i+127:i] := tmp[127:0]
-ENDFOR
-
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpalignr"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_alignr_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*128
-	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128) OR b[i+127:i]) &gt;&gt; (count[7:0]*8)
-	tmp_dst[i+127:i] := tmp[127:0]
-ENDFOR
-
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpalignr"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_alignr_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-tmp_dst[255:0] := ((a[127:0] &lt;&lt; 128) OR b[127:0]) &gt;&gt; (count[7:0]*8)
-
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpalignr"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_alignr_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="count" type="const int"/>
-	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[255:0] := ((a[127:0] &lt;&lt; 128) OR b[127:0]) &gt;&gt; (count[7:0]*8)
-
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpalignr"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_and_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] BITWISE AND b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpandd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_and_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] BITWISE AND b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpandd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_and_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] AND b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpandd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_and_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] AND b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpandd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_andnot_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpandnd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_andnot_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpandnd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_andnot_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpandnd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_andnot_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpandnd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_andnot_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpandnq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_andnot_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpandnq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_andnot_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpandnq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_andnot_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpandnq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_and_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpandq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_and_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpandq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_and_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpandq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_and_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpandq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_avg_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpavgb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_avg_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpavgb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_avg_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpavgb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_avg_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpavgb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_avg_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpavgb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_avg_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpavgb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_avg_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpavgb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_avg_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpavgw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_avg_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpavgw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_avg_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpavgw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_avg_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpavgw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_avg_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpavgw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_avg_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpavgw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_avg_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpavgw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_blend_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := b[i+7:i]
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpblendmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_blend_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := b[i+7:i]
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpblendmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_blend_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := b[i+7:i]
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpblendmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_blend_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpblendmd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_blend_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := b[i+31:i]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpblendmd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_blend_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := b[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpblendmq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_blend_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := b[i+63:i]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpblendmq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_blend_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := b[i+15:i]
-	ELSE
-		dst[i+15:i] := a[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpblendmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_blend_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := b[i+15:i]
-	ELSE
-		dst[i+15:i] := a[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpblendmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_blend_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := b[i+15:i]
-	ELSE
-		dst[i+15:i] := a[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpblendmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcastb_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[7:0]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_set1_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Set</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="char"/>
-	<description>Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[7:0]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcastb_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[7:0]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_set1_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Set</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="char"/>
-	<description>Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[7:0]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcastb_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	dst[i+7:i] := a[7:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpbroadcastb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcastb_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[7:0]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpbroadcastb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_set1_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Set</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="char"/>
-	<description>Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[7:0]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpbroadcastb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcastb_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[7:0]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpbroadcastb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_set1_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Set</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="char"/>
-	<description>Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[7:0]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpbroadcastb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_broadcastb_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[7:0]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_set1_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Set</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="char"/>
-	<description>Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[7:0]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_broadcastb_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[7:0]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_set1_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Set</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="char"/>
-	<description>Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[7:0]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcastd_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_set1_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="int"/>
-	<description>Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcastd_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_set1_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="int"/>
-	<description>Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_broadcastd_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_set1_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="int"/>
-	<description>Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_broadcastd_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_set1_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="int"/>
-	<description>Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[31:0]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_broadcastmb_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ZeroExtend(k[7:0])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastmb2q"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_broadcastmb_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ZeroExtend(k[7:0])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastmb2q"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_broadcastmw_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<description>Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ZeroExtend(k[15:0])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastmw2d"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_broadcastmw_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<description>Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ZeroExtend(k[15:0])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastmw2d"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcastq_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_set1_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__int64"/>
-	<description>Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcastq_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_set1_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__int64"/>
-	<description>Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_broadcastq_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_set1_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__int64"/>
-	<description>Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_broadcastq_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_set1_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Set</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__int64"/>
-	<description>Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[63:0]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_broadcastw_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[15:0]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_set1_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Set</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="short"/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[15:0]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_broadcastw_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[15:0]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_set1_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Set</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="short"/>
-	<description>Broadcast 16-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[15:0]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpbroadcastw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_broadcastw_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	dst[i+15:i] := a[15:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpbroadcastw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_broadcastw_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[15:0]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpbroadcastw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_set1_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Set</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="short"/>
-	<description>Broadcast 16-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[15:0]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpbroadcastw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_broadcastw_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[15:0]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpbroadcastw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_set1_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Set</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="short"/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[15:0]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpbroadcastw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_broadcastw_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[15:0]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_set1_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Set</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="short"/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[15:0]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_broadcastw_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[15:0]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_set1_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Set</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="short"/>
-	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[15:0]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpbroadcastw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmp_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpeq_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpge_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpgt_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmple_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmplt_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpneq_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmp_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpeq_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpge_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpgt_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmple_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmplt_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpneq_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmp_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpeq_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpge_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpgt_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmple_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmplt_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpneq_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmp_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpeq_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpge_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpgt_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmple_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmplt_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpneq_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmp_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpeq_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpge_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpgt_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmple_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmplt_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpneq_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmp_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpeq_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpge_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpgt_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmple_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmplt_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpneq_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmp_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpeq_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpge_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpgt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmple_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmplt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpneq_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmp_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpeq_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpge_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpgt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmple_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmplt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpneq_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpeq_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpge_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpgt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmple_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmplt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpneq_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpeq_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpge_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpgt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	parameter varname='b' type='__m128i'/&gt;
-	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmple_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	parameter varname='b' type='__m128i'/&gt;
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmplt_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	parameter varname='b' type='__m128i'/&gt;
-	<description>Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpneq_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	parameter varname='b' type='__m128i'/&gt;
-	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmp_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpeq_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpge_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpgt_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmple_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmplt_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpneq_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmp_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpeq_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpge_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpgt_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmple_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmplt_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpneq_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpeq_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpge_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpgt_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmple_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmplt_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpneq_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpeq_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpge_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpgt_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmple_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmplt_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpneq_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmp_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpeq_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpge_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpgt_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmple_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmplt_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_cmpneq_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmp_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpeq_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpge_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpgt_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmple_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmplt_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_cmpneq_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmp_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpeq_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpge_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpgt_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmple_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmplt_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_cmpneq_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmp_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpeq_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpge_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpgt_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmple_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmplt_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_cmpneq_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmp_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpeq_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpge_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpgt_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmple_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmplt_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_cmpneq_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmp_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpeq_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpge_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpgt_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmple_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmplt_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_cmpneq_epu8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmp_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpeq_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpge_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpgt_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmple_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmplt_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpneq_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmp_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpeq_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpge_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpgt_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmple_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmplt_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpneq_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpeq_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpge_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpgt_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmple_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmplt_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpneq_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpeq_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpge_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpgt_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	parameter varname='b' type='__m128i'/&gt;
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmple_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	parameter varname='b' type='__m128i'/&gt;
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmplt_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	parameter varname='b' type='__m128i'/&gt;
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpneq_epu32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	parameter varname='b' type='__m128i'/&gt;
-	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmp_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpeq_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpge_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpgt_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmple_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmplt_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_cmpneq_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmp_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpeq_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpge_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpgt_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmple_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmplt_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_cmpneq_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpeq_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpge_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpgt_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmple_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmplt_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpneq_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const _MM_CMPINT_ENUM"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpeq_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpge_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpgt_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmple_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmplt_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpneq_epu64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpcmpuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmp_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpeq_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpge_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpgt_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmple_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmplt_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpneq_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmp_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpeq_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpge_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpgt_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmple_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
-	ELSE 
-			k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmplt_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpneq_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmp_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpeq_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpge_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpgt_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmple_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmplt_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpneq_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmp_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpeq_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpge_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpgt_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmple_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmplt_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpneq_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpeq_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpge_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpgt_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmple_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmplt_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpneq_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpeq_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpge_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpgt_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &gt;== b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmple_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmplt_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpneq_epu16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmp_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpeq_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpge_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpgt_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmple_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmplt_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_cmpneq_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmp_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpeq_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpge_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpgt_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmple_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmplt_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_cmpneq_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmp_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpeq_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpge_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpgt_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmple_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmplt_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_cmpneq_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmp_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpeq_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpge_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpgt_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmple_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmplt_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_cmpneq_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmp_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpeq_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpge_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpgt_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmple_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmplt_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_cmpneq_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmp_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Compare packed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-CASE (imm8[7:0]) OF
-0: OP := _MM_CMPINT_EQ
-1: OP := _MM_CMPINT_LT
-2: OP := _MM_CMPINT_LE
-3: OP := _MM_CMPINT_FALSE
-4: OP := _MM_CMPINT_NE
-5: OP := _MM_CMPINT_NLT
-6: OP := _MM_CMPINT_NLE
-7: OP := _MM_CMPINT_TRUE
-ESAC
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpeq_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpge_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpgt_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &gt;== b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmple_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmplt_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_cmpneq_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k1" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
-	ELSE 
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpcmpw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_compress_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
-	<operation>
-size := 32
-m := 0
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-dst[255:m] := src[255:m]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpcompressd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_compressstoreu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-size := 32
-m := base_addr
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		MEM[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vpcompressd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_compress_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
-	<operation>
-size := 32
-m := 0
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-dst[255:m] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpcompressd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_compress_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
-	<operation>
-size := 32
-m := 0
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-dst[127:m] := src[127:m]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpcompressd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_compressstoreu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-size := 32
-m := base_addr
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		MEM[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vpcompressd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_compress_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
-	<operation>
-size := 32
-m := 0
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[m+size-1:m] := a[i+31:i]
-		m := m + size
-	FI
-ENDFOR
-dst[127:m] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpcompressd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_compress_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
-	<operation>
-size := 64
-m := 0
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-dst[255:m] := src[255:m]
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpcompressq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_compressstoreu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-size := 64
-m := base_addr
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		MEM[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vpcompressq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_compress_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
-	<operation>
-size := 64
-m := 0
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-dst[255:m] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpcompressq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_compress_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
-	<operation>
-size := 64
-m := 0
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-dst[127:m] := src[127:m]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpcompressq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_compressstoreu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-size := 64
-m := base_addr
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		MEM[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-	</operation>
-	<instruction name="vpcompressq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_compress_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
-	<operation>
-size := 64
-m := 0
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[m+size-1:m] := a[i+63:i]
-		m := m + size
-	FI
-ENDFOR
-dst[127:m] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpcompressq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_conflict_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	FOR k := 0 to j-1
-		m := k*32
-		dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
-	ENDFOR
-	dst[i+31:i+j] := 0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpconflictd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_conflict_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>	
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[i]
-		FOR l := 0 to j-1
-			m := l*32
-			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
-		ENDFOR
-		dst[i+31:i+j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpconflictd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_conflict_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[i]
-		FOR l := 0 to j-1
-			m := l*32
-			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
-		ENDFOR
-		dst[i+31:i+j] := 0
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpconflictd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_conflict_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	FOR k := 0 to j-1
-		m := k*32
-		dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
-	ENDFOR
-	dst[i+31:i+j] := 0
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpconflictd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_conflict_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>	
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[i]
-		FOR l := 0 to j-1
-			m := l*32
-			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
-		ENDFOR
-		dst[i+31:i+j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpconflictd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_conflict_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[i]
-		FOR l := 0 to j-1
-			m := l*32
-			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
-		ENDFOR
-		dst[i+31:i+j] := 0
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpconflictd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_conflict_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	FOR k := 0 to j-1
-		m := k*64
-		dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
-	ENDFOR
-	dst[i+63:i+j] := 0
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpconflictq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_conflict_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		FOR l := 0 to j-1
-			m := l*64
-			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
-		ENDFOR
-		dst[i+63:i+j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpconflictq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_conflict_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		FOR l := 0 to j-1
-			m := l*64
-			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
-		ENDFOR
-		dst[i+63:i+j] := 0
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpconflictq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_conflict_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	FOR k := 0 to j-1
-		m := k*64
-		dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
-	ENDFOR
-	dst[i+63:i+j] := 0
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpconflictq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_conflict_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		FOR l := 0 to j-1
-			m := l*64
-			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
-		ENDFOR
-		dst[i+63:i+j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpconflictq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_conflict_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Compare</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		FOR l := 0 to j-1
-			m := l*64
-			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
-		ENDFOR
-		dst[i+63:i+j] := 0
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpconflictq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutexvar_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	id := idx[i+2:i]*32
-	IF k[j]
-		dst[i+31:i] := a[id+31:id]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutexvar_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	id := idx[i+2:i]*32
-	IF k[j]
-		dst[i+31:i] := a[id+31:id]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutexvar_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	id := idx[i+2:i]*32
-	dst[i+31:i] := a[id+31:id]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask2_permutex2var_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	off := idx[i+2:i]*32
-	IF k[j]
-		dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := idx[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2d"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutex2var_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	off := idx[i+3:i]*32
-	IF k[j]
-		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermt2d"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutex2var_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	off := idx[i+2:i]*32
-	IF k[j]
-		dst[i+31:i] := (idx[i+3]) ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2d"/>
-	<instruction name="vpermt2d"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutex2var_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	off := idx[i+2:i]*32
-	dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2d"/>
-	<instruction name="vpermt2d"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask2_permutex2var_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	off := idx[i+1:i]*32
-	IF k[j]
-		dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := idx[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2d"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_permutex2var_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	off := idx[i+1:i]*32
-	IF k[j]
-		dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermt2d"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_permutex2var_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	off := idx[i+1:i]*32
-	IF k[j]
-		dst[i+31:i] := (idx[i+2]) ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2d"/>
-	<instruction name="vpermt2d"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_permutex2var_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	off := idx[i+2:i]*32
-	dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2d"/>
-	<instruction name="vpermt2d"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask2_permutex2var_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	off := idx[i+1:i]*64
-	IF k[j]
-		dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := idx[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_permutex2var_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	off := idx[i+1:i]*64
-	IF k[j]
-		dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermt2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_permutex2var_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	off := idx[i+1:i]*64
-	IF k[j]
-		dst[i+63:i] := (idx[i+2]) ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2pd"/>
-	<instruction name="vpermt2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_permutex2var_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	off := idx[i+1:i]*64
-	dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2pd"/>
-	<instruction name="vpermt2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask2_permutex2var_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set)</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	off := idx[i]*64
-	IF k[j]
-		dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := idx[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_permutex2var_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	off := idx[i]*64
-	IF k[j]
-		dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermt2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_permutex2var_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	off := idx[i]*64
-	IF k[j]
-		dst[i+63:i] := (idx[i+1]) ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2pd"/>
-	<instruction name="vpermt2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_permutex2var_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	off := idx[i]*64
-	dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2pd"/>
-	<instruction name="vpermt2pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask2_permutex2var_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	off := idx[i+2:i]*32
-	IF k[j]
-		dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := idx[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_permutex2var_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	off := idx[i+2:i]*32
-	IF k[j]
-		dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermt2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_permutex2var_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	off := idx[i+2:i]*32
-	IF k[j]
-		dst[i+31:i] := (idx[i+3]) ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2ps"/>
-	<instruction name="vpermt2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_permutex2var_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	off := idx[i+2:i]*32
-	dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2ps"/>
-	<instruction name="vpermt2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask2_permutex2var_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	off := idx[i+1:i]*32
-	IF k[j]
-		dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := idx[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_permutex2var_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	off := idx[i+1:i]*32
-	IF k[j]
-		dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := a[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermt2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_permutex2var_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	off := idx[i+1:i]*32
-	IF k[j]
-		dst[i+31:i] := (idx[i+2]) ? b[off+31:off] : a[off+31:off]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2ps"/>
-	<instruction name="vpermt2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_permutex2var_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	off := idx[i+1:i]*32
-	dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2ps"/>
-	<instruction name="vpermt2ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask2_permutex2var_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	off := idx[i+1:i]*64
-	IF k[j]
-		dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := idx[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2q"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutex2var_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	off := idx[i+1:i]*64
-	IF k[j]
-		dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermt2q"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutex2var_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	off := idx[i+1:i]*64
-	IF k[j]
-		dst[i+63:i] := (idx[i+2]) ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2q"/>
-	<instruction name="vpermt2q"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutex2var_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	off := idx[i+1:i]*64
-	dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2q"/>
-	<instruction name="vpermt2q"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask2_permutex2var_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	off := idx[i]*64
-	IF k[j]
-		dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := idx[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2q"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_permutex2var_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	off := idx[i]*64
-	IF k[j]
-		dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermt2q"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_permutex2var_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	off := idx[i]*64
-	IF k[j]
-		dst[i+63:i] := (idx[i+1]) ? b[off+63:off] : a[off+63:off]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2q"/>
-	<instruction name="vpermt2q"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_permutex2var_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	off := idx[i]*64
-	dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2q"/>
-	<instruction name="vpermt2q"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask2_permutex2var_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		off := 16*idx[i+3:i]
-		dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
-	ELSE
-		dst[i+15:i] := idx[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutex2var_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		off := 16*idx[i+3:i]
-		dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
-	ELSE
-		dst[i+15:i] := a[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermt2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutex2var_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		off := 16*idx[i+3:i]
-		dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2w"/>
-	<instruction name="vpermt2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutex2var_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	off := 16*idx[i+3:i]
-	dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2w"/>
-	<instruction name="vpermt2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask2_permutex2var_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		off := 16*idx[i+4:i]
-		dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off]
-	ELSE
-		dst[i+15:i] := idx[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermi2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutex2var_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		off := 16*idx[i+4:i]
-		dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off]
-	ELSE
-		dst[i+15:i] := a[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermt2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutex2var_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		off := 16*idx[i+4:i]
-		dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermi2w"/>
-	<instruction name="vpermt2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutex2var_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	off := 16*idx[i+4:i]
-	dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermi2w"/>
-	<instruction name="vpermt2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask2_permutex2var_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		off := 16*idx[i+2:i]
-		dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off]
-	ELSE
-		dst[i+15:i] := idx[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_permutex2var_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		off := 16*idx[i+2:i]
-		dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off]
-	ELSE
-		dst[i+15:i] := a[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermt2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_permutex2var_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		off := 16*idx[i+2:i]
-		dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2w"/>
-	<instruction name="vpermt2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_permutex2var_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	off := 16*idx[i+2:i]
-	dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2w"/>
-	<instruction name="vpermt2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_permute_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]
-IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]
-IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]
-IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]
-IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]
-IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]
-IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]
-IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermilpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_permutevar_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-IF (b[1] == 0) tmp_dst[63:0] := a[63:0]
-IF (b[1] == 1) tmp_dst[63:0] := a[127:64]
-IF (b[65] == 0) tmp_dst[127:64] := a[63:0]
-IF (b[65] == 1) tmp_dst[127:64] := a[127:64]
-IF (b[129] == 0) tmp_dst[191:128] := a[191:128]
-IF (b[129] == 1) tmp_dst[191:128] := a[255:192]
-IF (b[193] == 0) tmp_dst[255:192] := a[191:128]
-IF (b[193] == 1) tmp_dst[255:192] := a[255:192]
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermilpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_permute_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]
-IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]
-IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]
-IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]
-IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]
-IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]
-IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]
-IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermilpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_permutevar_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-IF (b[1] == 0) tmp_dst[63:0] := a[63:0]
-IF (b[1] == 1) tmp_dst[63:0] := a[127:64]
-IF (b[65] == 0) tmp_dst[127:64] := a[63:0]
-IF (b[65] == 1) tmp_dst[127:64] := a[127:64]
-IF (b[129] == 0) tmp_dst[191:128] := a[191:128]
-IF (b[129] == 1) tmp_dst[191:128] := a[255:192]
-IF (b[193] == 0) tmp_dst[255:192] := a[191:128]
-IF (b[193] == 1) tmp_dst[255:192] := a[255:192]
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermilpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_permute_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]
-IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]
-IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]
-IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermilpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_permutevar_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-IF (b[1] == 0) tmp_dst[63:0] := a[63:0]
-IF (b[1] == 1) tmp_dst[63:0] := a[127:64]
-IF (b[65] == 0) tmp_dst[127:64] := a[63:0]
-IF (b[65] == 1) tmp_dst[127:64] := a[127:64]
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermilpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_permute_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]
-IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]
-IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]
-IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermilpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_permutevar_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-IF (b[1] == 0) tmp_dst[63:0] := a[63:0]
-IF (b[1] == 1) tmp_dst[63:0] := a[127:64]
-IF (b[65] == 0) tmp_dst[127:64] := a[63:0]
-IF (b[65] == 1) tmp_dst[127:64] := a[127:64]
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermilpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_permute_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
-tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermilps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_permutevar_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
-tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
-tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
-tmp_dst[159:128] := SELECT4(a[255:128], b[129:128])
-tmp_dst[191:160] := SELECT4(a[255:128], b[161:160])
-tmp_dst[223:192] := SELECT4(a[255:128], b[193:192])
-tmp_dst[255:224] := SELECT4(a[255:128], b[225:224])
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermilps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_permute_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
-tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermilps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_permutevar_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
-tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
-tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
-tmp_dst[159:128] := SELECT4(a[255:128], b[129:128])
-tmp_dst[191:160] := SELECT4(a[255:128], b[161:160])
-tmp_dst[223:192] := SELECT4(a[255:128], b[193:192])
-tmp_dst[255:224] := SELECT4(a[255:128], b[225:224])
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermilps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_permute_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermilps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_permutevar_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
-tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
-tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermilps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_permute_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermilps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_permutevar_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
-tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
-tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermilps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_permutex_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_permutexvar_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	id := idx[i+1:i]*64
-	IF k[j]
-		dst[i+63:i] := a[id+63:id]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_permutex_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_permutexvar_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	id := idx[i+1:i]*64
-	IF k[j]
-		dst[i+63:i] := a[id+63:id]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_permutex_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_permutexvar_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	id := idx[i+1:i]*64
-	dst[i+63:i] := a[id+63:id]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_permutexvar_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	id := idx[i+2:i]*32
-	IF k[j]
-		dst[i+31:i] := a[id+31:id]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_permutexvar_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	id := idx[i+2:i]*32
-	IF k[j]
-		dst[i+31:i] := a[id+31:id]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_permutexvar_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	id := idx[i+2:i]*32
-	dst[i+31:i] := a[id+31:id]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutex_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 64-bit integers in "a" across lanes lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutexvar_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	id := idx[i+1:i]*64
-	IF k[j]
-		dst[i+63:i] := a[id+63:id]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutex_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutexvar_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	id := idx[i+1:i]*64
-	IF k[j]
-		dst[i+63:i] := a[id+63:id]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutex_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[63:0] := src[63:0]
-	1:	tmp[63:0] := src[127:64]
-	2:	tmp[63:0] := src[191:128]
-	3:	tmp[63:0] := src[255:192]
-	ESAC
-	RETURN tmp[63:0]
-}
-
-dst[63:0] := SELECT4(a[255:0], imm8[1:0])
-dst[127:64] := SELECT4(a[255:0], imm8[3:2])
-dst[191:128] := SELECT4(a[255:0], imm8[5:4])
-dst[255:192] := SELECT4(a[255:0], imm8[7:6])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutexvar_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	id := idx[i+1:i]*64
-	dst[i+63:i] := a[id+63:id]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutexvar_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	id := idx[i+3:i]*16
-	IF k[j]
-		dst[i+15:i] := a[id+15:id]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutexvar_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	id := idx[i+3:i]*16
-	IF k[j]
-		dst[i+15:i] := a[id+15:id]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutexvar_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	id := idx[i+3:i]*16
-	dst[i+15:i] := a[id+15:id]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutexvar_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	id := idx[i+4:i]*16
-	IF k[j]
-		dst[i+15:i] := a[id+15:id]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutexvar_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	id := idx[i+4:i]*16
-	IF k[j]
-		dst[i+15:i] := a[id+15:id]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutexvar_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	id := idx[i+4:i]*16
-	dst[i+15:i] := a[id+15:id]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_permutexvar_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	id := idx[i+2:i]*16
-	IF k[j]
-		dst[i+15:i] := a[id+15:id]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_permutexvar_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	id := idx[i+2:i]*16
-	IF k[j]
-		dst[i+15:i] := a[id+15:id]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_permutexvar_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	id := idx[i+2:i]*16
-	dst[i+15:i] := a[id+15:id]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_expand_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[m+31:m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpexpandd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_expandloadu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpexpandd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_expand_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[m+31:m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpexpandd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_expandloadu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpexpandd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_expand_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[m+31:m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpexpandd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_expandloadu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpexpandd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_expand_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[m+31:m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpexpandd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_expandloadu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
-		m := m + 32
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpexpandd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_expand_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[m+63:m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpexpandq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_expandloadu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpexpandq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_expand_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[m+63:m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpexpandq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_expandloadu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpexpandq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_expand_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[m+63:m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpexpandq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_expandloadu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpexpandq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_expand_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[m+63:m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpexpandq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_expandloadu_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="mem_addr" type="void const*"/>
-	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-m := 0
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
-		m := m + 64
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpexpandq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mmask_i32gather_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>
-	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-k[MAX:8] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpgatherdd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mmask_i32gather_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>
-	Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+31:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-k[MAX:4] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpgatherdd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mmask_i32gather_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	m := j*32
-	IF k[j]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-k[MAX:4] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpgatherdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mmask_i32gather_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	m := j*32
-	IF k[j]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[m+31:m])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-k[MAX:2] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpgatherdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mmask_i64gather_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	m := j*64
-	IF k[j]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-k[MAX:4] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpgatherqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mmask_i64gather_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	m := j*64
-	IF k[j]
-		dst[i+31:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-k[MAX:2] := 0
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpgatherqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mmask_i64gather_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-k[MAX:4] := 0
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpgatherqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mmask_i64gather_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Load</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="base_addr" type="void const*"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := MEM[base_addr + SignExtend(vindex[i+63:i])*scale]
-		k[j] := 0
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-k[MAX:2] := 0
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpgatherqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_lzcnt_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	tmp := 31
-	dst[i+31:i] := 0
-	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-		tmp := tmp - 1
-		dst[i+31:i] := dst[i+31:i] + 1
-	OD
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vplzcntd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_lzcnt_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		tmp := 31
-		dst[i+31:i] := 0
-		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-			tmp := tmp - 1
-			dst[i+31:i] := dst[i+31:i] + 1
-		OD
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vplzcntd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_lzcnt_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		tmp := 31
-		dst[i+31:i] := 0
-		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-			tmp := tmp - 1
-			dst[i+31:i] := dst[i+31:i] + 1
-		OD
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vplzcntd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_lzcnt_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	tmp := 31
-	dst[i+31:i] := 0
-	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-		tmp := tmp - 1
-		dst[i+31:i] := dst[i+31:i] + 1
-	OD
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vplzcntd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_lzcnt_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		tmp := 31
-		dst[i+31:i] := 0
-		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-			tmp := tmp - 1
-			dst[i+31:i] := dst[i+31:i] + 1
-		OD
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vplzcntd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_lzcnt_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		tmp := 31
-		dst[i+31:i] := 0
-		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-			tmp := tmp - 1
-			dst[i+31:i] := dst[i+31:i] + 1
-		OD
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vplzcntd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_lzcnt_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	tmp := 63
-	dst[i+63:i] := 0
-	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-		tmp := tmp - 1
-		dst[i+63:i] := dst[i+63:i] + 1
-	OD
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vplzcntq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_lzcnt_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		tmp := 63
-		dst[i+63:i] := 0
-		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-			tmp := tmp - 1
-			dst[i+63:i] := dst[i+63:i] + 1
-		OD
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vplzcntq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_lzcnt_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		tmp := 63
-		dst[i+63:i] := 0
-		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-			tmp := tmp - 1
-			dst[i+63:i] := dst[i+63:i] + 1
-		OD
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vplzcntq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_lzcnt_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	tmp := 63
-	dst[i+63:i] := 0
-	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-		tmp := tmp - 1
-		dst[i+63:i] := dst[i+63:i] + 1
-	OD
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vplzcntq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_lzcnt_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		tmp := 63
-		dst[i+63:i] := 0
-		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-			tmp := tmp - 1
-			dst[i+63:i] := dst[i+63:i] + 1
-		OD
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vplzcntq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_lzcnt_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512CD</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		tmp := 63
-		dst[i+63:i] := 0
-		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
-			tmp := tmp - 1
-			dst[i+63:i] := dst[i+63:i] + 1
-		OD
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vplzcntq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_maddubs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaddubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_maddubs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaddubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maddubs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaddubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_maddubs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaddubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_maddubs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaddubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_maddubs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaddubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_maddubs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaddubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_madd_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaddwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_madd_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaddwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_madd_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	st[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaddwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_madd_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaddwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_madd_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaddwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_madd_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaddwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_madd_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaddwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &gt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &gt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_max_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &gt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaxsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &gt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaxsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_max_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst". </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF a[i+7:i] &gt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaxsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &gt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &gt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &gt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &gt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &gt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &gt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &gt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &gt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_max_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF a[i+63:i] &gt; b[i+63:i]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := b[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &gt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &gt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_max_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF a[i+63:i] &gt; b[i+63:i]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := b[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &gt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &gt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_max_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &gt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaxsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &gt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaxsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_max_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF a[i+15:i] &gt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaxsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &gt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &gt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &gt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &gt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_max_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &gt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaxub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &gt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaxub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_max_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF a[i+7:i] &gt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaxub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &gt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &gt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &gt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &gt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &gt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &gt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &gt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &gt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_max_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF a[i+63:i] &gt; b[i+63:i]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := b[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &gt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &gt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_max_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF a[i+63:i] &gt; b[i+63:i]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := b[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_max_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &gt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_max_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &gt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmaxuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_max_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &gt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaxuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_max_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &gt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaxuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_max_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF a[i+15:i] &gt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmaxuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_max_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &gt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_max_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &gt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmaxuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &lt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &lt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_min_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &lt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpminsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &lt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpminsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_min_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF a[i+7:i] &lt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpminsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &lt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &lt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &lt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &lt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &lt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &lt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminsd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &lt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &lt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_min_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF a[i+63:i] &lt; b[i+63:i]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := b[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &lt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &lt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_min_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF a[i+63:i] &lt; b[i+63:i]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := b[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminsq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &lt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &lt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_min_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &lt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpminsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &lt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpminsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_min_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF a[i+15:i] &lt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpminsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &lt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &lt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &lt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &lt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_min_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &lt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpminub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &lt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpminub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_min_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF a[i+7:i] &lt; b[i+7:i]
-		dst[i+7:i] := a[i+7:i]
-	ELSE
-		dst[i+7:i] := b[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpminub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &lt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		IF a[i+7:i] &lt; b[i+7:i]
-			dst[i+7:i] := a[i+7:i]
-		ELSE
-			dst[i+7:i] := b[i+7:i]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminub"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &lt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &lt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &lt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF a[i+31:i] &lt; b[i+31:i]
-			dst[i+31:i] := a[i+31:i]
-		ELSE
-			dst[i+31:i] := b[i+31:i]
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminud"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &lt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &lt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_min_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF a[i+63:i] &lt; b[i+63:i]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := b[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &lt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-   </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF a[i+63:i] &lt; b[i+63:i]
-			dst[i+63:i] := a[i+63:i]
-		ELSE
-			dst[i+63:i] := b[i+63:i]
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_min_epu64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF a[i+63:i] &lt; b[i+63:i]
-		dst[i+63:i] := a[i+63:i]
-	ELSE
-		dst[i+63:i] := b[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminuq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_min_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &lt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_min_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &lt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpminuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_min_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &lt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpminuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_min_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &lt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpminuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_min_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF a[i+15:i] &lt; b[i+15:i]
-		dst[i+15:i] := a[i+15:i]
-	ELSE
-		dst[i+15:i] := b[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpminuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_min_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &lt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_min_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF a[i+15:i] &lt; b[i+15:i]
-			dst[i+15:i] := a[i+15:i]
-		ELSE
-			dst[i+15:i] := b[i+15:i]
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpminuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_movepi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF a[i+7]
-		k[j] := 1
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovb2m"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_movepi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF a[i+7]
-		k[j] := 1
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovb2m"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_movepi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF a[i+7]
-		k[j] := 1
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpmovb2m"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_movepi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF a[i+31]
-		k[j] := 1
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpmovd2m"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm512_movepi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF a[i+31]
-		k[j] := 1
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpmovd2m"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_movepi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF a[i+31]
-		k[j] := 1
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpmovd2m"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 8*j
-	dst[k+7:k] := Truncate_Int32_To_Int8(a[i+31:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtepi32_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Truncate_Int32_To_Int8(a[i+31:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 8*j
-	dst[k+7:k] := Truncate_Int32_To_Int8(a[i+31:i])
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtepi32_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Truncate_Int32_To_Int8(a[i+31:i])
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 16*j
-	dst[k+15:k] := Truncate_Int32_To_Int16(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Truncate_Int32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtepi32_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Truncate_Int32_To_Int16(a[i+31:i])
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Truncate_Int32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 16*j
-	dst[k+15:k] := Truncate_Int32_To_Int16(a[i+31:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Truncate_Int32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtepi32_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Truncate_Int32_To_Int16(a[i+31:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Truncate_Int32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_movm_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<description>Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := 0xFF
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovm2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_movm_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask64"/>
-	<description>Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := 0xFF
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmovm2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_movm_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<description>Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := 0xFF
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovm2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_movm_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := 0xFFFFFFFF
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovm2d"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_movm_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<description>Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := 0xFFFFFFFF
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmovm2d"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_movm_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := 0xFFFFFFFF
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovm2d"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_movm_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := 0xFFFFFFFFffffffff
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovm2q"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_movm_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := 0xFFFFFFFFffffffff
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmovm2q"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_movm_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := 0xFFFFFFFFffffffff
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovm2q"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_movm_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<description>Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := 0xFFFF
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovm2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_movm_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<description>Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := 0xFFFF
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmovm2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_movm_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<description>Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := 0xFFFF
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovm2w"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_movepi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF a[i+63]
-		k[j] := 1
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpmovq2m"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm512_movepi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF a[i+63]
-		k[j] := 1
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpmovq2m"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_movepi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF a[i+63]
-		k[j] := 1
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpmovq2m"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	k := 8*j
-	dst[k+7:k] := Truncate_Int64_To_Int8(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtepi64_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Truncate_Int64_To_Int8(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 8*j
-	dst[k+7:k] := Truncate_Int64_To_Int8(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtepi64_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Truncate_Int64_To_Int8(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	k := 32*j
-	dst[k+31:k] := Truncate_Int64_To_Int32(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Truncate_Int64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtepi64_storeu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		MEM[base_addr+l+31:base_addr+l] := Truncate_Int64_To_Int32(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Truncate_Int64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 32*j
-	dst[k+31:k] := Truncate_Int64_To_Int32(a[i+63:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Truncate_Int64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtepi64_storeu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		MEM[base_addr+l+31:base_addr+l] := Truncate_Int64_To_Int32(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Truncate_Int64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	k := 16*j
-	dst[k+15:k] := Truncate_Int64_To_Int16(a[i+63:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Truncate_Int64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtepi64_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Truncate_Int64_To_Int16(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Truncate_Int64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 16*j
-	dst[k+15:k] := Truncate_Int64_To_Int16(a[i+63:i])
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Truncate_Int64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtepi64_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Truncate_Int64_To_Int16(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Truncate_Int64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtsepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 8*j
-	dst[k+7:k] := Saturate_Int32_To_Int8(a[i+31:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtsepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtsepi32_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_Int32_To_Int8(a[i+31:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtsepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtsepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 8*j
-	dst[k+7:k] := Saturate_Int32_To_Int8(a[i+31:i])
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovsdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtsepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovsdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtsepi32_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_Int32_To_Int8(a[i+31:i])
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovsdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtsepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovsdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtsepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 16*j
-	dst[k+15:k] := Saturate_Int32_To_Int16(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtsepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_Int32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtsepi32_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Saturate_Int32_To_Int16(a[i+31:i])
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtsepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_Int32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtsepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 16*j
-	dst[k+15:k] := Saturate_Int32_To_Int16(a[i+31:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtsepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_Int32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtsepi32_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Saturate_Int32_To_Int16(a[i+31:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtsepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_Int32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtsepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	k := 8*j
-	dst[k+7:k] := Saturate_Int64_To_Int8(a[i+63:i])
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovsqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtsepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovsqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtsepi64_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_Int64_To_Int8(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovsqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtsepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovsqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtsepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 8*j
-	dst[k+7:k] := Saturate_Int64_To_Int8(a[i+63:i])
-ENDFOR
-dst[MAX:16] := 0
-	</operation>
-	<instruction name="vpmovsqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtsepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:16] := 0
-	</operation>
-	<instruction name="vpmovsqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtsepi64_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_Int64_To_Int8(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:16] := 0
-	</operation>
-	<instruction name="vpmovsqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtsepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:16] := 0
-	</operation>
-	<instruction name="vpmovsqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtsepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	k := 32*j
-	dst[k+31:k] := Saturate_Int64_To_Int32(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtsepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Saturate_Int64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtsepi64_storeu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		MEM[base_addr+l+31:base_addr+l] := Saturate_Int64_To_Int32(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtsepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Saturate_Int64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtsepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 32*j
-	dst[k+31:k] := Saturate_Int64_To_Int32(a[i+63:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtsepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Saturate_Int64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtsepi64_storeu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		MEM[base_addr+l+31:base_addr+l] := Saturate_Int64_To_Int32(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtsepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Saturate_Int64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtsepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	k := 16*j
-	dst[k+15:k] := Saturate_Int64_To_Int16(a[i+63:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtsepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_Int64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtsepi64_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Saturate_Int64_To_Int16(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtsepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_Int64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovsqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtsepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 16*j
-	dst[k+15:k] := Saturate_Int64_To_Int16(a[i+63:i])
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovsqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtsepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_Int64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovsqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtsepi64_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Saturate_Int64_To_Int16(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovsqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtsepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_Int64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovsqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtsepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 8*j
-	dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtsepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtsepi16_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_Int16_To_Int8(a[i+15:i])
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtsepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtsepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	l := 8*j
-	dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtsepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtsepi16_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_Int16_To_Int8(a[i+15:i])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtsepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtsepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 8*j
-	dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtsepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtsepi16_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_Int16_To_Int8(a[i+15:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtsepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_Int16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepi8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[l+7:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovsxbd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepi8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[l+7:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovsxbd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[l+7:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsxbd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[l+7:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsxbd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepi8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+7:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovsxbq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepi8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+7:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovsxbq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+7:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsxbq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+7:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsxbq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepi8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	l := j*16
-	IF k[j]
-		dst[l+15:l] := SignExtend(a[i+7:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovsxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepi8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	l := j*16
-	IF k[j]
-		dst[l+15:l] := SignExtend(a[i+7:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovsxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepi8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	l := j*16
-	dst[l+15:l] := SignExtend(a[i+7:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmovsxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepi8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	l := j*16
-	IF k[j]
-		dst[l+15:l] := SignExtend(a[i+7:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmovsxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepi8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	l := j*16
-	IF k[j]
-		dst[l+15:l] := SignExtend(a[i+7:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmovsxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	l := j*16
-	IF k[j]
-		dst[l+15:l] := SignExtend(a[i+7:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	l := j*16
-	IF k[j]
-		dst[l+15:l] := SignExtend(a[i+7:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepi32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovsxdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepi32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovsxdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsxdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+31:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsxdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepi16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	l := j*16
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[l+15:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovsxwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepi16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[l+15:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovsxwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	l := j*16
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[l+15:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsxwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[l+15:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsxwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepi16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+15:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovsxwq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepi16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+15:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovsxwq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+15:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsxwq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Sign extend packed 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[l+15:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovsxwq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtusepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 8*j
-	dst[k+7:k] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtusepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtusepi32_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtusepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtusepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 8*j
-	dst[k+7:k] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovusdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtusepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovusdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtusepi32_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovusdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtusepi32_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt32_To_Int8(a[i+31:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovusdb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtusepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	k := 16*j
-	dst[k+15:k] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtusepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtusepi32_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtusepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtusepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	k := 16*j
-	dst[k+15:k] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtusepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtusepi32_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtusepi32_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_UnsignedInt32_To_Int16(a[i+31:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusdw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtusepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	k := 8*j
-	dst[k+7:k] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovusqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtusepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovusqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtusepi64_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovusqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtusepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovusqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtusepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 8*j
-	dst[k+7:k] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
-ENDFOR
-dst[MAX:16] := 0
-	</operation>
-	<instruction name="vpmovusqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtusepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:16] := 0
-	</operation>
-	<instruction name="vpmovusqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtusepi64_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:16] := 0
-	</operation>
-	<instruction name="vpmovusqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtusepi64_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt64_To_Int8(a[i+63:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:16] := 0
-	</operation>
-	<instruction name="vpmovusqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtusepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	k := 32*j
-	dst[k+31:k] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovusqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtusepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovusqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtusepi64_storeu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		MEM[base_addr+l+31:base_addr+l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovusqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtusepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovusqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtusepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 32*j
-	dst[k+31:k] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtusepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := src[l+31:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtusepi64_storeu_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		MEM[base_addr+l+31:base_addr+l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtusepi64_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[l+31:l] := Saturate_UnsignedInt64_To_Int32(a[i+63:i])
-	ELSE
-		dst[l+31:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtusepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	k := 16*j
-	dst[k+15:k] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtusepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtusepi64_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtusepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovusqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtusepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	k := 16*j
-	dst[k+15:k] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovusqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtusepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovusqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtusepi64_storeu_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		MEM[base_addr+l+15:base_addr+l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovusqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtusepi64_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[l+15:l] := Saturate_UnsignedInt64_To_Int16(a[i+63:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovusqw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtusepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 8*j
-	dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtusepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtusepi16_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtusepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtusepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	l := 8*j
-	dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtusepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtusepi16_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtusepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtusepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 8*j
-	dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtusepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtusepi16_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtusepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Saturate_UnsignedInt16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovuswb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_movepi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF a[i+15]
-		k[j] := 1
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vpmovw2m"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_movepi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF a[i+15]
-		k[j] := 1
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vpmovw2m"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_movepi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF a[i+15]
-		k[j] := 1
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpmovw2m"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_cvtepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 8*j
-	dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovwb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_mask_cvtepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovwb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_cvtepi16_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Truncate_Int16_To_Int8(a[i+15:i])
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovwb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm256_maskz_cvtepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovwb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_cvtepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	l := 8*j
-	dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovwb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_mask_cvtepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovwb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm512_mask_cvtepi16_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Truncate_Int16_To_Int8(a[i+15:i])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovwb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm512_maskz_cvtepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovwb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_cvtepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 8*j
-	dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovwb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := src[l+7:l]
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovwb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_cvtepi16_storeu_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		MEM[base_addr+l+7:base_addr+l] := Truncate_Int16_To_Int8(a[i+15:i])
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovwb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepi16_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 16*j
-	l := 8*j
-	IF k[j]
-		dst[l+7:l] := Truncate_Int16_To_Int8(a[i+15:i])
-	ELSE
-		dst[l+7:l] := 0
-	FI
-ENDFOR
-dst[MAX:64] := 0
-	</operation>
-	<instruction name="vpmovwb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepu8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[l+7:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovzxbd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepu8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[l+7:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovzxbd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepu8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[l+7:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovzxbd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepu8_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in th elow 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 8*j
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[l+7:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovzxbd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepu8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+7:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovzxbq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepu8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+7:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovzxbq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepu8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+7:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovzxbq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepu8_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 8*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+7:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovzxbq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepu8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	l := j*16
-	IF k[j]
-		dst[l+15:l] := ZeroExtend(a[i+7:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovzxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepu8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	l := j*16
-	IF k[j]
-		dst[l+15:l] := ZeroExtend(a[i+7:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovzxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_cvtepu8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="a" type="__m256i"/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	l := j*16
-	dst[l+15:l] := ZeroExtend(a[i+7:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmovzxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_cvtepu8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	l := j*16
-	IF k[j]
-		dst[l+15:l] := ZeroExtend(a[i+7:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmovzxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_cvtepu8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	l := j*16
-	IF k[j]
-		dst[l+15:l] := ZeroExtend(a[i+7:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmovzxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepu8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	l := j*16
-	IF k[j]
-		dst[l+15:l] := ZeroExtend(a[i+7:i])
-	ELSE
-		dst[l+15:l] := src[l+15:l]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovzxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepu8_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*8
-	l := j*16
-	IF k[j]
-		dst[l+15:l] := ZeroExtend(a[i+7:i])
-	ELSE
-		dst[l+15:l] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovzxbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepu32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovzxdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepu32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+31:l])
-	ELSE 
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovzxdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepu32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+31:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovzxdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepu32_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 32*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+31:l])
-	ELSE 
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovzxdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepu16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[l+15:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovzxwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepu16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[l+15:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovzxwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepu16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[l+15:l])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovzxwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepu16_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := 32*j
-	l := 16*j
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[l+15:l])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovzxwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_cvtepu16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+15:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovzxwq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_cvtepu16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+15:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmovzxwq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_cvtepu16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+15:l])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovzxwq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_cvtepu16_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Convert</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Zero extend packed unsigned 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := 64*j
-	l := 16*j
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[l+15:l])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmovzxwq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mul_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmuldq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mul_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmuldq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mul_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmuldq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mul_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmuldq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mulhrs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
-		dst[i+15:i] := tmp[16:1]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmulhrsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mulhrs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
-		dst[i+15:i] := tmp[16:1]
-	ELSE
-		dst[i+15:i] := 9
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmulhrsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mulhrs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
-		dst[i+15:i] := tmp[16:1]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulhrsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mulhrs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
-		dst[i+15:i] := tmp[16:1]
-	ELSE
-		dst[i+15:i] := 9
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulhrsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mulhrs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
-	dst[i+15:i] := tmp[16:1]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulhrsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mulhrs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
-		dst[i+15:i] := tmp[16:1]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmulhrsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mulhrs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		tmp[31:0] := ((a[i+15:i] * b[i+15:i]) &gt;&gt; 14) + 1
-		dst[i+15:i] := tmp[16:1]
-	ELSE
-		dst[i+15:i] := 9
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmulhrsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mulhi_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[31:16]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmulhuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mulhi_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[31:16]
-	ELSE
-		dst[i+15:i] := o
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmulhuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mulhi_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[31:16]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulhuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mulhi_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[31:16]
-	ELSE
-		dst[i+15:i] := o
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulhuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mulhi_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[31:16]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulhuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mulhi_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[31:16]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmulhuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mulhi_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[31:16]
-	ELSE
-		dst[i+15:i] := o
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmulhuw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mulhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[31:16]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmulhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mulhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[31:16]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmulhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mulhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>	
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[31:16]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mulhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[31:16]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mulhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[31:16]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mulhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[31:16]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmulhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mulhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[31:16]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmulhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mullo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		tmp[63:0] := a[i+31:i] * b[i+31:i]
-		dst[i+31:i] := tmp[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmulld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mullo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		tmp[63:0] := a[i+31:i] * b[i+31:i]
-		dst[i+31:i] := tmp[31:0]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmulld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mullo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		tmp[63:0] := a[i+31:i] * b[i+31:i]
-		dst[i+31:i] := tmp[31:0]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmulld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mullo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		tmp[63:0] := a[i+31:i] * b[i+31:i]
-		dst[i+31:i] := tmp[31:0]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmulld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mullo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		tmp[63:0] := a[i+31:i] * b[i+31:i]
-		dst[i+31:i] := tmp[31:0]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmulld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mullo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		tmp[127:0] := a[i+63:i] * b[i+63:i]
-		dst[i+63:i] := tmp[63:0]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmullq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mullo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		tmp[127:0] := a[i+63:i] * b[i+63:i]
-		dst[i+63:i] := tmp[63:0]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmullq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mullo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	tmp[127:0] := a[i+63:i] * b[i+63:i]
-	dst[i+63:i] := tmp[63:0]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmullq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mullo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		tmp[127:0] := a[i+63:i] * b[i+63:i]
-		dst[i+63:i] := tmp[63:0]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmullq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mullo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		tmp[127:0] := a[i+63:i] * b[i+63:i]
-		dst[i+63:i] := tmp[63:0]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmullq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mullo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	tmp[127:0] := a[i+63:i] * b[i+63:i]
-	dst[i+63:i] := tmp[63:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmullq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mullo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		tmp[127:0] := a[i+63:i] * b[i+63:i]
-		dst[i+63:i] := tmp[63:0]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmullq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mullo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		tmp[127:0] := a[i+63:i] * b[i+63:i]
-		dst[i+63:i] := tmp[63:0]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmullq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mullo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	tmp[127:0] := a[i+63:i] * b[i+63:i]
-	dst[i+63:i] := tmp[63:0]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmullq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mullo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[15:0]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmullw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mullo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[15:0]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmullw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_mullo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[15:0]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmullw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_mullo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[15:0]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmullw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mullo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	tmp[31:0] := a[i+15:i] * b[i+15:i]
-	dst[i+15:i] := tmp[15:0]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmullw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mullo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[15:0]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmullw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mullo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		tmp[31:0] := a[i+15:i] * b[i+15:i]
-		dst[i+15:i] := tmp[15:0]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmullw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_mul_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmuludq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_mul_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmuludq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_mul_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmuludq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_mul_epu32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+31:i] * b[i+31:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmuludq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_or_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_or_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_or_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_or_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_or_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vporq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_or_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vporq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_or_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vporq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_or_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vporq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_rol_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprold"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_rol_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprold"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_rol_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprold"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_rol_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprold"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_rol_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprold"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_rol_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprold"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_rol_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprolq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_rol_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprolq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_rol_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprolq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_rol_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprolq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_rol_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprolq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_rol_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprolq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_rolv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprolvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_rolv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprolvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_rolv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprolvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_rolv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprolvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_rolv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprolvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_rolv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
-	<operation>
-LEFT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
-}
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprolvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_rolv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprolvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_rolv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprolvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_rolv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprolvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_rolv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprolvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_rolv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprolvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_rolv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
-	<operation>
-LEFT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
-}
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprolvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_ror_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_ror_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_ror_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_ror_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_ror_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_ror_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_ror_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprorq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_ror_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprorq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_ror_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprorq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_ror_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprorq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_ror_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprorq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_ror_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprorq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_rorv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprorvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_rorv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprorvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_rorv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprorvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_rorv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprorvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_rorv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprorvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_rorv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
-	<operation>
-RIGHT_ROTATE_DWORDS(src, count_src){
-	count := count_src modulo 32
-	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
-}
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprorvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_rorv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprorvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_rorv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprorvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_rorv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vprorvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_rorv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprorvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_rorv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprorvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_rorv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". </description>
-	<operation>
-RIGHT_ROTATE_QWORDS(src, count_src){
-	count := count_src modulo 64
-	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
-}
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vprorvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sad_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
-ENDFOR
-FOR j := 0 to 7
-	i := j*64
-	dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56]
-	dst[i+63:i+16] := 0
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsadbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_i32scatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name="vpscatterdd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i32scatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vpscatterdd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_i32scatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name="vpscatterdd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i32scatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpscatterdd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_i32scatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name="vpscatterdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i32scatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpscatterdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_i32scatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name="vpscatterdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i32scatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpscatterdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_i64scatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	l := j*64
-	MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name="vpscatterqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i64scatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	l := j*64
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpscatterqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_i64scatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	l := j*64
-	MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name="vpscatterqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i64scatter_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	l := j*64
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpscatterqd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_i64scatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name="vpscatterqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i64scatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vpscatterqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_i64scatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name="vpscatterqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i64scatter_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vpscatterqq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_shuffle_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		IF b[i+7] == 1
-			dst[i+7:i] := 0
-		ELSE
-			index[4:0] := b[i+3:i] + (j &amp; 0x10)
-			dst[i+7:i] := a[index*8+7:index*8]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpshufb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_shuffle_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		IF b[i+7] == 1
-			dst[i+7:i] := 0
-		ELSE
-			index[4:0] := b[i+3:i] + (j &amp; 0x10)
-			dst[i+7:i] := a[index*8+7:index*8]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpshufb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_shuffle_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 8-bit integers in "a" within 128-bit lanes using the control in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		IF b[i+7] == 1
-			dst[i+7:i] := 0
-		ELSE
-			index[5:0] := b[i+3:i] + (j &amp; 0x30)
-			dst[i+7:i] := a[index*8+7:index*8]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpshufb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_shuffle_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		IF b[i+7] == 1
-			dst[i+7:i] := 0
-		ELSE
-			index[5:0] := b[i+3:i] + (j &amp; 0x30)
-			dst[i+7:i] := a[index*8+7:index*8]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpshufb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_shuffle_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF b[i+7] == 1
-		dst[i+7:i] := 0
-	ELSE
-		index[5:0] := b[i+3:i] + (j &amp; 0x30)
-		dst[i+7:i] := a[index*8+7:index*8]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpshufb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_shuffle_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		IF b[i+7] == 1
-			dst[i+7:i] := 0
-		ELSE
-			index[3:0] := b[i+3:i]
-			dst[i+7:i] := a[index*8+7:index*8]
-		FI
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpshufb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_shuffle_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		IF b[i+7] == 1
-			dst[i+7:i] := 0
-		ELSE
-			index[3:0] := b[i+3:i]
-			dst[i+7:i] := a[index*8+7:index*8]
-		FI
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpshufb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_shuffle_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
-	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
-tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpshufd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_shuffle_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
-	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
-tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpshufd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_shuffle_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
-	<description>Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpshufd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_shuffle_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="_MM_PERM_ENUM"/>
-	<description>Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpshufd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_shufflehi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[63:0] := a[63:0]
-tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
-tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
-tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
-tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
-tmp_dst[191:128] := a[191:128]
-tmp_dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
-tmp_dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
-tmp_dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
-tmp_dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpshufhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_shufflehi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[63:0] := a[63:0]
-tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
-tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
-tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
-tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
-tmp_dst[191:128] := a[191:128]
-tmp_dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
-tmp_dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
-tmp_dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
-tmp_dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpshufhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_shufflehi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[63:0] := a[63:0]
-tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
-tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
-tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
-tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
-tmp_dst[191:128] := a[191:128]
-tmp_dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
-tmp_dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
-tmp_dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
-tmp_dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
-tmp_dst[319:256] := a[319:256]
-tmp_dst[335:320] := (a &gt;&gt; (imm8[1:0] * 16))[335:320]
-tmp_dst[351:336] := (a &gt;&gt; (imm8[3:2] * 16))[335:320]
-tmp_dst[367:352] := (a &gt;&gt; (imm8[5:4] * 16))[335:320]
-tmp_dst[383:368] := (a &gt;&gt; (imm8[7:6] * 16))[335:320]
-tmp_dst[447:384] := a[447:384]
-tmp_dst[463:448] := (a &gt;&gt; (imm8[1:0] * 16))[463:448]
-tmp_dst[479:464] := (a &gt;&gt; (imm8[3:2] * 16))[463:448]
-tmp_dst[495:480] := (a &gt;&gt; (imm8[5:4] * 16))[463:448]
-tmp_dst[511:496] := (a &gt;&gt; (imm8[7:6] * 16))[463:448]
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpshufhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_shufflehi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[63:0] := a[63:0]
-tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
-tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
-tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
-tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
-tmp_dst[191:128] := a[191:128]
-tmp_dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
-tmp_dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
-tmp_dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
-tmp_dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
-tmp_dst[319:256] := a[319:256]
-tmp_dst[335:320] := (a &gt;&gt; (imm8[1:0] * 16))[335:320]
-tmp_dst[351:336] := (a &gt;&gt; (imm8[3:2] * 16))[335:320]
-tmp_dst[367:352] := (a &gt;&gt; (imm8[5:4] * 16))[335:320]
-tmp_dst[383:368] := (a &gt;&gt; (imm8[7:6] * 16))[335:320]
-tmp_dst[447:384] := a[447:384]
-tmp_dst[463:448] := (a &gt;&gt; (imm8[1:0] * 16))[463:448]
-tmp_dst[479:464] := (a &gt;&gt; (imm8[3:2] * 16))[463:448]
-tmp_dst[495:480] := (a &gt;&gt; (imm8[5:4] * 16))[463:448]
-tmp_dst[511:496] := (a &gt;&gt; (imm8[7:6] * 16))[463:448]
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpshufhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_shufflehi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst".</description>
-	<operation>
-dst[63:0] := a[63:0]
-dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
-dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
-dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
-dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
-dst[191:128] := a[191:128]
-dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
-dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
-dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
-dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
-dst[319:256] := a[319:256]
-dst[335:320] := (a &gt;&gt; (imm8[1:0] * 16))[335:320]
-dst[351:336] := (a &gt;&gt; (imm8[3:2] * 16))[335:320]
-dst[367:352] := (a &gt;&gt; (imm8[5:4] * 16))[335:320]
-dst[383:368] := (a &gt;&gt; (imm8[7:6] * 16))[335:320]
-dst[447:384] := a[447:384]
-dst[463:448] := (a &gt;&gt; (imm8[1:0] * 16))[463:448]
-dst[479:464] := (a &gt;&gt; (imm8[3:2] * 16))[463:448]
-dst[495:480] := (a &gt;&gt; (imm8[5:4] * 16))[463:448]
-dst[511:496] := (a &gt;&gt; (imm8[7:6] * 16))[463:448]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpshufhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_shufflehi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[63:0] := a[63:0]
-tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
-tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
-tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
-tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpshufhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_shufflehi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[63:0] := a[63:0]
-tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
-tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
-tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
-tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpshufhw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_shufflelo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
-tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
-tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
-tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
-tmp_dst[127:64] := a[127:64]
-tmp_dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
-tmp_dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
-tmp_dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
-tmp_dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
-tmp_dst[255:192] := a[255:192]
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpshuflw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_shufflelo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
-tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
-tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
-tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
-tmp_dst[127:64] := a[127:64]
-tmp_dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
-tmp_dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
-tmp_dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
-tmp_dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
-tmp_dst[255:192] := a[255:192]
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpshuflw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_shufflelo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
-tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
-tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
-tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
-tmp_dst[127:64] := a[127:64]
-tmp_dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
-tmp_dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
-tmp_dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
-tmp_dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
-tmp_dst[255:192] := a[255:192]
-tmp_dst[271:256] := (a &gt;&gt; (imm8[1:0] * 16))[271:256]
-tmp_dst[287:272] := (a &gt;&gt; (imm8[3:2] * 16))[271:256]
-tmp_dst[303:288] := (a &gt;&gt; (imm8[5:4] * 16))[271:256]
-tmp_dst[319:304] := (a &gt;&gt; (imm8[7:6] * 16))[271:256]
-tmp_dst[383:320] := a[383:320]
-tmp_dst[399:384] := (a &gt;&gt; (imm8[1:0] * 16))[399:384]
-tmp_dst[415:400] := (a &gt;&gt; (imm8[3:2] * 16))[399:384]
-tmp_dst[431:416] := (a &gt;&gt; (imm8[5:4] * 16))[399:384]
-tmp_dst[447:432] := (a &gt;&gt; (imm8[7:6] * 16))[399:384]
-tmp_dst[511:448] := a[511:448]
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpshuflw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_shufflelo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
-tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
-tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
-tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
-tmp_dst[127:64] := a[127:64]
-tmp_dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
-tmp_dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
-tmp_dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
-tmp_dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
-tmp_dst[255:192] := a[255:192]
-tmp_dst[271:256] := (a &gt;&gt; (imm8[1:0] * 16))[271:256]
-tmp_dst[287:272] := (a &gt;&gt; (imm8[3:2] * 16))[271:256]
-tmp_dst[303:288] := (a &gt;&gt; (imm8[5:4] * 16))[271:256]
-tmp_dst[319:304] := (a &gt;&gt; (imm8[7:6] * 16))[271:256]
-tmp_dst[383:320] := a[383:320]
-tmp_dst[399:384] := (a &gt;&gt; (imm8[1:0] * 16))[399:384]
-tmp_dst[415:400] := (a &gt;&gt; (imm8[3:2] * 16))[399:384]
-tmp_dst[431:416] := (a &gt;&gt; (imm8[5:4] * 16))[399:384]
-tmp_dst[447:432] := (a &gt;&gt; (imm8[7:6] * 16))[399:384]
-tmp_dst[511:448] := a[511:448]
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpshuflw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_shufflelo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst".</description>
-	<operation>
-dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
-dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
-dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
-dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
-dst[127:64] := a[127:64]
-dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
-dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
-dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
-dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
-dst[255:192] := a[255:192]
-dst[271:256] := (a &gt;&gt; (imm8[1:0] * 16))[271:256]
-dst[287:272] := (a &gt;&gt; (imm8[3:2] * 16))[271:256]
-dst[303:288] := (a &gt;&gt; (imm8[5:4] * 16))[271:256]
-dst[319:304] := (a &gt;&gt; (imm8[7:6] * 16))[271:256]
-dst[383:320] := a[383:320]
-dst[399:384] := (a &gt;&gt; (imm8[1:0] * 16))[399:384]
-dst[415:400] := (a &gt;&gt; (imm8[3:2] * 16))[399:384]
-dst[431:416] := (a &gt;&gt; (imm8[5:4] * 16))[399:384]
-dst[447:432] := (a &gt;&gt; (imm8[7:6] * 16))[399:384]
-dst[511:448] := a[511:448]
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpshuflw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_shufflelo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
-tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
-tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
-tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
-tmp_dst[127:64] := a[127:64]
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpshuflw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_shufflelo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
-tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
-tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
-tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
-tmp_dst[127:64] := a[127:64]
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpshuflw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sll_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpslld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_slli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpslld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sll_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpslld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_slli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpslld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sll_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpslld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_slli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpslld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sll_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpslld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_slli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpslld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_bslli_epi128">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-tmp := imm8[7:0]
-IF tmp &gt; 15
-	tmp := 16
-FI
-dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
-dst[255:128] := a[255:128] &lt;&lt; (tmp*8)
-dst[383:256] := a[383:256] &lt;&lt; (tmp*8)
-dst[511:384] := a[511:384] &lt;&lt; (tmp*8)
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpslldq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sll_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_slli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sll_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_slli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sll_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_slli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sll_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_slli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sllv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sllv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sllv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sllv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &lt;&lt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sllv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sllv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sllv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sllv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &lt;&lt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sllv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sllv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_sllv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sllv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsllvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sllv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsllvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sllv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsllvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sllv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sllv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_sllv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sll_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_slli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sll_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_slli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sll_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_slli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sll_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_slli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sll_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_slli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sll_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_slli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sll_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_slli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsllw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sra_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := SignBit
-		ELSE
-			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrad"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srai_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := SignBit
-		ELSE
-			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrad"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sra_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := SignBit
-		ELSE
-			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrad"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srai_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := SignBit
-		ELSE
-			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrad"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sra_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := SignBit
-		ELSE
-			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrad"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srai_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := SignBit
-		ELSE
-			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrad"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sra_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := SignBit
-		ELSE
-			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrad"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srai_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := SignBit
-		ELSE
-			dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrad"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sra_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := SignBit
-		ELSE
-			dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsraq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srai_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := SignBit
-		ELSE
-			dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsraq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sra_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := SignBit
-		ELSE
-			dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsraq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srai_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := SignBit
-		ELSE
-			dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsraq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_sra_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF count[63:0] &gt; 63
-		dst[i+63:i] := SignBit
-	ELSE
-		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsraq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_srai_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF imm8[7:0] &gt; 63
-		dst[i+63:i] := SignBit
-	ELSE
-		dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsraq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sra_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := SignBit
-		ELSE
-			dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsraq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srai_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := SignBit
-		ELSE
-			dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsraq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sra_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := SignBit
-		ELSE
-			dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsraq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srai_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := SignBit
-		ELSE
-			dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsraq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_sra_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF count[63:0] &gt; 63
-		dst[i+63:i] := SignBit
-	ELSE
-		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsraq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_srai_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF imm8[7:0] &gt; 63
-		dst[i+63:i] := SignBit
-	ELSE
-		dst[i+63:i] := SignExtend(a[i+63:i] &lt;&lt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsraq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srav_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsravd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srav_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsravd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srav_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsravd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srav_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SignExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsravd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srav_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsravq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srav_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsravq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_srav_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsravq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srav_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsravq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srav_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsravq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_srav_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := SignExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsravq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srav_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsravw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srav_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsravw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_srav_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsravw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srav_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsravw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srav_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsravw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srav_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsravw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srav_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsravw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srav_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsravw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_srav_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[i+15:i])	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsravw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sra_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := SignBit
-		ELSE
-			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srai_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := SignBit
-		ELSE
-			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sra_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := SignBit
-		ELSE
-			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srai_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := SignBit
-		ELSE
-			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sra_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := SignBit
-		ELSE
-			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srai_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>	
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := SignBit
-		ELSE
-			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sra_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := SignBit
-		ELSE
-			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srai_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := SignBit
-		ELSE
-			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sra_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := SignBit
-	ELSE
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srai_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := SignBit
-	ELSE
-		dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sra_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := SignBit
-		ELSE
-			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srai_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := SignBit
-		ELSE
-			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sra_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := SignBit
-		ELSE
-			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srai_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := SignBit
-		ELSE
-			dst[i+15:i] := SignExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsraw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srl_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srl_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srl_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srl_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF count[63:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srli_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		IF imm8[7:0] &gt; 31
-			dst[i+31:i] := 0
-		ELSE
-			dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrld"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_bsrli_epi128">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
-	<operation>
-tmp := imm8[7:0]
-IF tmp &gt; 15
-	tmp := 16
-FI
-dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
-dst[255:128] := a[255:128] &gt;&gt; (tmp*8)
-dst[383:256] := a[383:256] &gt;&gt; (tmp*8)
-dst[511:384] := a[511:384] &gt;&gt; (tmp*8)
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsrldq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srl_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srl_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srl_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srl_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF count[63:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srli_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		IF imm8[7:0] &gt; 63
-			dst[i+63:i] := 0
-		ELSE
-			dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srlv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srlv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srlv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srlv_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ZeroExtend(a[i+31:i] &gt;&gt; count[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlvd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srlv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srlv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srlv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srlv_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ZeroExtend(a[i+63:i] &gt;&gt; count[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlvq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srlv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+63:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srlv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_srlv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m256i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srlv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsrlvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srlv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsrlvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srlv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m512i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsrlvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srlv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srlv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_srlv_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlvw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srl_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_srli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srl_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_srli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srl_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_srli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srl_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_srli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srl_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF count[63:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_srli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="imm8" type="unsigned int"/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF imm8[7:0] &gt; 15
-		dst[i+15:i] := 0
-	ELSE
-		dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srl_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_srli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srl_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="count" type="__m128i"/>
-	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF count[63:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; count[63:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_srli_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Shift</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		IF imm8[7:0] &gt; 15
-			dst[i+15:i] := 0
-		ELSE
-			dst[i+15:i] := ZeroExtend(a[i+15:i] &gt;&gt; imm8[7:0])
-		FI
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsrlw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sub_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i] - b[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sub_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i] - b[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sub_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i] - b[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sub_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i] - b[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sub_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	dst[i+7:i] := a[i+7:i] - b[i+7:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sub_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i] - b[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sub_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := a[i+7:i] - b[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sub_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sub_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sub_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sub_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sub_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sub_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sub_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sub_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_subs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-		<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_subs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
-	ELSE
-		dst[i+7:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_subs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_subs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
-	ELSE
-		dst[i+7:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_subs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_subs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_subs_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
-	ELSE
-		dst[i+7:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubsb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_subs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_subs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_subs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_subs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_subs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_subs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_subs_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubsw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_subs_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_subs_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
-	ELSE
-		dst[i+7:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_subs_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_subs_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
-	ELSE
-		dst[i+7:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_subs_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_subs_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_subs_epu8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
-	ELSE
-		dst[i+7:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubusb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_subs_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_subs_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_subs_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_subs_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_subs_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_subs_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_subs_epu16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
-	ELSE
-		dst[i+15:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubusw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_sub_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i] - b[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_sub_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i] - b[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpsubw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_sub_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i] - b[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_sub_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i] - b[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_sub_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	dst[i+15:i] := a[i+15:i] - b[i+15:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpsubw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_sub_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i] - b[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_sub_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := a[i+15:i] - b[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpsubw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_ternarylogic_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		FOR h := 0 to 31
-			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
-			dst[i+h] := imm8[index[2:0]]
-		ENDFOR
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpternlogd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_ternarylogic_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="c" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		FOR h := 0 to 31
-			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
-			dst[i+h] := imm8[index[2:0]]
-		ENDFOR
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpternlogd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_ternarylogic_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="c" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	FOR h := 0 to 31
-		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
-		dst[i+h] := imm8[index[2:0]]
-	ENDFOR
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpternlogd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_ternarylogic_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		FOR h := 0 to 31
-			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
-			dst[i+h] := imm8[index[2:0]]
-		ENDFOR
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpternlogd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_ternarylogic_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		FOR h := 0 to 31
-			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
-			dst[i+h] := imm8[index[2:0]]
-		ENDFOR
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpternlogd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_ternarylogic_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	FOR h := 0 to 31
-		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
-		dst[i+h] := imm8[index[2:0]]
-	ENDFOR
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpternlogd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_ternarylogic_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		FOR h := 0 to 63
-			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
-			dst[i+h] := imm8[index[2:0]]
-		ENDFOR
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpternlogq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_ternarylogic_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="c" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		FOR h := 0 to 63
-			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
-			dst[i+h] := imm8[index[2:0]]
-		ENDFOR
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpternlogq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_ternarylogic_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="c" type="__m256i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	FOR h := 0 to 63
-		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
-		dst[i+h] := imm8[index[2:0]]
-	ENDFOR
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpternlogq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_ternarylogic_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		FOR h := 0 to 63
-			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
-			dst[i+h] := imm8[index[2:0]]
-		ENDFOR
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpternlogq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_ternarylogic_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		FOR h := 0 to 63
-			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
-			dst[i+h] := imm8[index[2:0]]
-		ENDFOR
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpternlogq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_ternarylogic_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="c" type="__m128i"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	FOR h := 0 to 63
-		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
-		dst[i+h] := imm8[index[2:0]]
-	ENDFOR
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpternlogq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_test_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vptestmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_test_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vptestmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_test_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vptestmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_test_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vptestmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_test_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vptestmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_test_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vptestmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_test_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vptestmd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_test_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vptestmd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_test_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vptestmd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_test_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vptestmd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_test_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vptestmq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_test_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vptestmq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_test_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vptestmq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_test_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vptestmq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_test_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vptestmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_test_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vptestmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_test_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vptestmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_test_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vptestmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_test_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vptestmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_test_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vptestmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_mask_testn_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k1[j]
-		k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vptestnmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm256_testn_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vptestnmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_mask_testn_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k1[j]
-		k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vptestnmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask64" name="_mm512_testn_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
-ENDFOR
-k[MAX:64] := 0
-	</operation>
-	<instruction name="vptestnmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_mask_testn_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k1[j]
-		k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vptestnmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm_testn_epi8_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vptestnmb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_testn_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k1[j]
-		k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vptestnmd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_testn_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vptestnmd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_testn_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k1[j]
-		k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vptestnmd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_testn_epi32_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	k[j] := ((a[i+31:i] NAND b[i+31:i]) == 0) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vptestnmd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_mask_testn_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k1[j]
-		k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vptestnmq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm256_testn_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vptestnmq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_testn_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k1[j]
-		k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vptestnmq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_testn_epi64_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vptestnmq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_mask_testn_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	IF k1[j]
-		k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vptestnmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask16" name="_mm256_testn_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*16
-	k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
-ENDFOR
-k[MAX:16] := 0
-	</operation>
-	<instruction name="vptestnmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_mask_testn_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	IF k1[j]
-		k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vptestnmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask32" name="_mm512_testn_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*16
-	k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
-ENDFOR
-k[MAX:32] := 0
-	</operation>
-	<instruction name="vptestnmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_mask_testn_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="k1" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	IF k1[j]
-		k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
-	ELSE
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vptestnmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__mmask8" name="_mm_testn_epi16_mask">
-	<type>Integer</type>
-	<type>Mask</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Compare</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*16
-	k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vptestnmw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpackhi_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[71:64] 
-	dst[15:8] := src2[71:64] 
-	dst[23:16] := src1[79:72] 
-	dst[31:24] := src2[79:72] 
-	dst[39:32] := src1[87:80] 
-	dst[47:40] := src2[87:80] 
-	dst[55:48] := src1[95:88] 
-	dst[63:56] := src2[95:88] 
-	dst[71:64] := src1[103:96] 
-	dst[79:72] := src2[103:96] 
-	dst[87:80] := src1[111:104] 
-	dst[95:88] := src2[111:104] 
-	dst[103:96] := src1[119:112] 
-	dst[111:104] := src2[119:112] 
-	dst[119:112] := src1[127:120] 
-	dst[127:120] := src2[127:120] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
-
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpckhbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpackhi_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[71:64] 
-	dst[15:8] := src2[71:64] 
-	dst[23:16] := src1[79:72] 
-	dst[31:24] := src2[79:72] 
-	dst[39:32] := src1[87:80] 
-	dst[47:40] := src2[87:80] 
-	dst[55:48] := src1[95:88] 
-	dst[63:56] := src2[95:88] 
-	dst[71:64] := src1[103:96] 
-	dst[79:72] := src2[103:96] 
-	dst[87:80] := src1[111:104] 
-	dst[95:88] := src2[111:104] 
-	dst[103:96] := src1[119:112] 
-	dst[111:104] := src2[119:112] 
-	dst[119:112] := src1[127:120] 
-	dst[127:120] := src2[127:120] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
-
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpckhbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpackhi_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[71:64] 
-	dst[15:8] := src2[71:64] 
-	dst[23:16] := src1[79:72] 
-	dst[31:24] := src2[79:72] 
-	dst[39:32] := src1[87:80] 
-	dst[47:40] := src2[87:80] 
-	dst[55:48] := src1[95:88] 
-	dst[63:56] := src2[95:88] 
-	dst[71:64] := src1[103:96] 
-	dst[79:72] := src2[103:96] 
-	dst[87:80] := src1[111:104] 
-	dst[95:88] := src2[111:104] 
-	dst[103:96] := src1[119:112] 
-	dst[111:104] := src2[119:112] 
-	dst[119:112] := src1[127:120] 
-	dst[127:120] := src2[127:120] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384])
-
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpunpckhbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpackhi_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[71:64] 
-	dst[15:8] := src2[71:64] 
-	dst[23:16] := src1[79:72] 
-	dst[31:24] := src2[79:72] 
-	dst[39:32] := src1[87:80] 
-	dst[47:40] := src2[87:80] 
-	dst[55:48] := src1[95:88] 
-	dst[63:56] := src2[95:88] 
-	dst[71:64] := src1[103:96] 
-	dst[79:72] := src2[103:96] 
-	dst[87:80] := src1[111:104] 
-	dst[95:88] := src2[111:104] 
-	dst[103:96] := src1[119:112] 
-	dst[111:104] := src2[119:112] 
-	dst[119:112] := src1[127:120] 
-	dst[127:120] := src2[127:120] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384])
-
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpunpckhbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpackhi_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[71:64] 
-	dst[15:8] := src2[71:64] 
-	dst[23:16] := src1[79:72] 
-	dst[31:24] := src2[79:72] 
-	dst[39:32] := src1[87:80] 
-	dst[47:40] := src2[87:80] 
-	dst[55:48] := src1[95:88] 
-	dst[63:56] := src2[95:88] 
-	dst[71:64] := src1[103:96] 
-	dst[79:72] := src2[103:96] 
-	dst[87:80] := src1[111:104] 
-	dst[95:88] := src2[111:104] 
-	dst[103:96] := src1[119:112] 
-	dst[111:104] := src2[119:112] 
-	dst[119:112] := src1[127:120] 
-	dst[127:120] := src2[127:120] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
-dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256])
-dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpunpckhbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpackhi_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[71:64] 
-	dst[15:8] := src2[71:64] 
-	dst[23:16] := src1[79:72] 
-	dst[31:24] := src2[79:72] 
-	dst[39:32] := src1[87:80] 
-	dst[47:40] := src2[87:80] 
-	dst[55:48] := src1[95:88] 
-	dst[63:56] := src2[95:88] 
-	dst[71:64] := src1[103:96] 
-	dst[79:72] := src2[103:96] 
-	dst[87:80] := src1[111:104] 
-	dst[95:88] := src2[111:104] 
-	dst[103:96] := src1[119:112] 
-	dst[111:104] := src2[119:112] 
-	dst[119:112] := src1[127:120] 
-	dst[127:120] := src2[127:120] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
-
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpckhbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpackhi_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[71:64] 
-	dst[15:8] := src2[71:64] 
-	dst[23:16] := src1[79:72] 
-	dst[31:24] := src2[79:72] 
-	dst[39:32] := src1[87:80] 
-	dst[47:40] := src2[87:80] 
-	dst[55:48] := src1[95:88] 
-	dst[63:56] := src2[95:88] 
-	dst[71:64] := src1[103:96] 
-	dst[79:72] := src2[103:96] 
-	dst[87:80] := src1[111:104] 
-	dst[95:88] := src2[111:104] 
-	dst[103:96] := src1[119:112] 
-	dst[111:104] := src2[119:112] 
-	dst[119:112] := src1[127:120] 
-	dst[127:120] := src2[127:120] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
-
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpckhbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpackhi_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpckhdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpackhi_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpckhdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpackhi_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpckhdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpackhi_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpckhdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpackhi_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpckhqdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpackhi_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpckhqdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpackhi_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpckhqdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpackhi_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpckhqdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpackhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[79:64]
-	dst[31:16] := src2[79:64] 
-	dst[47:32] := src1[95:80] 
-	dst[63:48] := src2[95:80] 
-	dst[79:64] := src1[111:96] 
-	dst[95:80] := src2[111:96] 
-	dst[111:96] := src1[127:112] 
-	dst[127:112] := src2[127:112] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpckhwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpackhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[79:64]
-	dst[31:16] := src2[79:64] 
-	dst[47:32] := src1[95:80] 
-	dst[63:48] := src2[95:80] 
-	dst[79:64] := src1[111:96] 
-	dst[95:80] := src2[111:96] 
-	dst[111:96] := src1[127:112] 
-	dst[127:112] := src2[127:112] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpckhwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpackhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[79:64]
-	dst[31:16] := src2[79:64] 
-	dst[47:32] := src1[95:80] 
-	dst[63:48] := src2[95:80] 
-	dst[79:64] := src1[111:96] 
-	dst[95:80] := src2[111:96] 
-	dst[111:96] := src1[127:112] 
-	dst[127:112] := src2[127:112] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpunpckhwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpackhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[79:64]
-	dst[31:16] := src2[79:64] 
-	dst[47:32] := src1[95:80] 
-	dst[63:48] := src2[95:80] 
-	dst[79:64] := src1[111:96] 
-	dst[95:80] := src2[111:96] 
-	dst[111:96] := src1[127:112] 
-	dst[127:112] := src2[127:112] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpunpckhwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpackhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[79:64]
-	dst[31:16] := src2[79:64] 
-	dst[47:32] := src1[95:80] 
-	dst[63:48] := src2[95:80] 
-	dst[79:64] := src1[111:96] 
-	dst[95:80] := src2[111:96] 
-	dst[111:96] := src1[127:112] 
-	dst[127:112] := src2[127:112] 
-	RETURN dst[127:0]
-}
-
-dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
-dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256])
-dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpunpckhwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpackhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[79:64]
-	dst[31:16] := src2[79:64] 
-	dst[47:32] := src1[95:80] 
-	dst[63:48] := src2[95:80] 
-	dst[79:64] := src1[111:96] 
-	dst[95:80] := src2[111:96] 
-	dst[111:96] := src1[127:112] 
-	dst[127:112] := src2[127:112] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpckhwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpackhi_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[79:64]
-	dst[31:16] := src2[79:64] 
-	dst[47:32] := src1[95:80] 
-	dst[63:48] := src2[95:80] 
-	dst[79:64] := src1[111:96] 
-	dst[95:80] := src2[111:96] 
-	dst[111:96] := src1[127:112] 
-	dst[127:112] := src2[127:112] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpckhwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpacklo_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[7:0] 
-	dst[15:8] := src2[7:0] 
-	dst[23:16] := src1[15:8] 
-	dst[31:24] := src2[15:8] 
-	dst[39:32] := src1[23:16] 
-	dst[47:40] := src2[23:16] 
-	dst[55:48] := src1[31:24] 
-	dst[63:56] := src2[31:24] 
-	dst[71:64] := src1[39:32]
-	dst[79:72] := src2[39:32] 
-	dst[87:80] := src1[47:40] 
-	dst[95:88] := src2[47:40] 
-	dst[103:96] := src1[55:48] 
-	dst[111:104] := src2[55:48] 
-	dst[119:112] := src1[63:56] 
-	dst[127:120] := src2[63:56] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
-
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpcklbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpacklo_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[7:0] 
-	dst[15:8] := src2[7:0] 
-	dst[23:16] := src1[15:8] 
-	dst[31:24] := src2[15:8] 
-	dst[39:32] := src1[23:16] 
-	dst[47:40] := src2[23:16] 
-	dst[55:48] := src1[31:24] 
-	dst[63:56] := src2[31:24] 
-	dst[71:64] := src1[39:32]
-	dst[79:72] := src2[39:32] 
-	dst[87:80] := src1[47:40] 
-	dst[95:88] := src2[47:40] 
-	dst[103:96] := src1[55:48] 
-	dst[111:104] := src2[55:48] 
-	dst[119:112] := src1[63:56] 
-	dst[127:120] := src2[63:56] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
-
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpcklbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpacklo_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[7:0] 
-	dst[15:8] := src2[7:0] 
-	dst[23:16] := src1[15:8] 
-	dst[31:24] := src2[15:8] 
-	dst[39:32] := src1[23:16] 
-	dst[47:40] := src2[23:16] 
-	dst[55:48] := src1[31:24] 
-	dst[63:56] := src2[31:24] 
-	dst[71:64] := src1[39:32]
-	dst[79:72] := src2[39:32] 
-	dst[87:80] := src1[47:40] 
-	dst[95:88] := src2[47:40] 
-	dst[103:96] := src1[55:48] 
-	dst[111:104] := src2[55:48] 
-	dst[119:112] := src1[63:56] 
-	dst[127:120] := src2[63:56] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384])
-
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpunpcklbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpacklo_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[7:0] 
-	dst[15:8] := src2[7:0] 
-	dst[23:16] := src1[15:8] 
-	dst[31:24] := src2[15:8] 
-	dst[39:32] := src1[23:16] 
-	dst[47:40] := src2[23:16] 
-	dst[55:48] := src1[31:24] 
-	dst[63:56] := src2[31:24] 
-	dst[71:64] := src1[39:32]
-	dst[79:72] := src2[39:32] 
-	dst[87:80] := src1[47:40] 
-	dst[95:88] := src2[47:40] 
-	dst[103:96] := src1[55:48] 
-	dst[111:104] := src2[55:48] 
-	dst[119:112] := src1[63:56] 
-	dst[127:120] := src2[63:56] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384])
-
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpunpcklbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpacklo_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[7:0] 
-	dst[15:8] := src2[7:0] 
-	dst[23:16] := src1[15:8] 
-	dst[31:24] := src2[15:8] 
-	dst[39:32] := src1[23:16] 
-	dst[47:40] := src2[23:16] 
-	dst[55:48] := src1[31:24] 
-	dst[63:56] := src2[31:24] 
-	dst[71:64] := src1[39:32]
-	dst[79:72] := src2[39:32] 
-	dst[87:80] := src1[47:40] 
-	dst[95:88] := src2[47:40] 
-	dst[103:96] := src1[55:48] 
-	dst[111:104] := src2[55:48] 
-	dst[119:112] := src1[63:56] 
-	dst[127:120] := src2[63:56] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
-dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256])
-dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpunpcklbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpacklo_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[7:0] 
-	dst[15:8] := src2[7:0] 
-	dst[23:16] := src1[15:8] 
-	dst[31:24] := src2[15:8] 
-	dst[39:32] := src1[23:16] 
-	dst[47:40] := src2[23:16] 
-	dst[55:48] := src1[31:24] 
-	dst[63:56] := src2[31:24] 
-	dst[71:64] := src1[39:32]
-	dst[79:72] := src2[39:32] 
-	dst[87:80] := src1[47:40] 
-	dst[95:88] := src2[47:40] 
-	dst[103:96] := src1[55:48] 
-	dst[111:104] := src2[55:48] 
-	dst[119:112] := src1[63:56] 
-	dst[127:120] := src2[63:56] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
-
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpcklbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpacklo_epi8">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_BYTES(src1[127:0], src2[127:0]){
-	dst[7:0] := src1[7:0] 
-	dst[15:8] := src2[7:0] 
-	dst[23:16] := src1[15:8] 
-	dst[31:24] := src2[15:8] 
-	dst[39:32] := src1[23:16] 
-	dst[47:40] := src2[23:16] 
-	dst[55:48] := src1[31:24] 
-	dst[63:56] := src2[31:24] 
-	dst[71:64] := src1[39:32]
-	dst[79:72] := src2[39:32] 
-	dst[87:80] := src1[47:40] 
-	dst[95:88] := src2[47:40] 
-	dst[103:96] := src1[55:48] 
-	dst[111:104] := src2[55:48] 
-	dst[119:112] := src1[63:56] 
-	dst[127:120] := src2[63:56] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
-
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		dst[i+7:i] := tmp_dst[i+7:i]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpcklbw"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpacklo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpckldq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpacklo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpckldq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpacklo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpckldq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpacklo_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpckldq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpacklo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpcklqdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpacklo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpcklqdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpacklo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpcklqdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpacklo_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpcklqdq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_unpacklo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[15:0] 
-	dst[31:16] := src2[15:0] 
-	dst[47:32] := src1[31:16] 
-	dst[63:48] := src2[31:16] 
-	dst[79:64] := src1[47:32] 
-	dst[95:80] := src2[47:32] 
-	dst[111:96] := src1[63:48] 
-	dst[127:112] := src2[63:48] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpcklwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_unpacklo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[15:0] 
-	dst[31:16] := src2[15:0] 
-	dst[47:32] := src1[31:16] 
-	dst[63:48] := src2[31:16] 
-	dst[79:64] := src1[47:32] 
-	dst[95:80] := src2[47:32] 
-	dst[111:96] := src1[63:48] 
-	dst[127:112] := src2[63:48] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 15
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpunpcklwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_unpacklo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[15:0] 
-	dst[31:16] := src2[15:0] 
-	dst[47:32] := src1[31:16] 
-	dst[63:48] := src2[31:16] 
-	dst[79:64] := src1[47:32] 
-	dst[95:80] := src2[47:32] 
-	dst[111:96] := src1[63:48] 
-	dst[127:112] := src2[63:48] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpunpcklwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_unpacklo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[15:0] 
-	dst[31:16] := src2[15:0] 
-	dst[47:32] := src1[31:16] 
-	dst[63:48] := src2[31:16] 
-	dst[79:64] := src1[47:32] 
-	dst[95:80] := src2[47:32] 
-	dst[111:96] := src1[63:48] 
-	dst[127:112] := src2[63:48] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
-tmp_dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256])
-tmp_dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384])
-
-FOR j := 0 to 31
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpunpcklwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_unpacklo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". </description>
-	<operation>
-INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[15:0] 
-	dst[31:16] := src2[15:0] 
-	dst[47:32] := src1[31:16] 
-	dst[63:48] := src2[31:16] 
-	dst[79:64] := src1[47:32] 
-	dst[95:80] := src2[47:32] 
-	dst[111:96] := src1[63:48] 
-	dst[127:112] := src2[63:48] 
-	RETURN dst[127:0]
-}	
-
-dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
-dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
-dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256])
-dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384])
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpunpcklwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_unpacklo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[15:0] 
-	dst[31:16] := src2[15:0] 
-	dst[47:32] := src1[31:16] 
-	dst[63:48] := src2[31:16] 
-	dst[79:64] := src1[47:32] 
-	dst[95:80] := src2[47:32] 
-	dst[111:96] := src1[63:48] 
-	dst[127:112] := src2[63:48] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := src[i+15:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpcklwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_unpacklo_epi16">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512BW</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_WORDS(src1[127:0], src2[127:0]){
-	dst[15:0] := src1[15:0] 
-	dst[31:16] := src2[15:0] 
-	dst[47:32] := src1[31:16] 
-	dst[63:48] := src2[31:16] 
-	dst[79:64] := src1[47:32] 
-	dst[95:80] := src2[47:32] 
-	dst[111:96] := src1[63:48] 
-	dst[127:112] := src2[63:48] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 7
-	i := j*16
-	IF k[j]
-		dst[i+15:i] := tmp_dst[i+15:i]
-	ELSE
-		dst[i+15:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpunpcklwd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_xor_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpxord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_xor_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpxord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_xor_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpxord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_xor_epi32">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpxord"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_xor_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpxorq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_xor_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpxorq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_xor_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpxorq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_xor_epi64">
-	<type>Integer</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpxorq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_range_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrangepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_range_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrangepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_range_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrangepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_range_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrangepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_range_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	[round_note]
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrangepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_range_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrangepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_range_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	[round_note]
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrangepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_range_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrangepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_range_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	[round_note]
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrangepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_range_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_range_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_range_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_range_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[63:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrangeps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_range_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[63:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrangeps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_range_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[63:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrangeps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_range_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[63:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrangeps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_range_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	[round_note]
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[63:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrangeps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_range_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[63:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrangeps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_range_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	[round_note]
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[63:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrangeps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_range_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[63:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrangeps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_range_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	[round_note]
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[63:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vrangeps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_range_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[63:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangeps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_range_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[63:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangeps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_range_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[63:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangeps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_range_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	[round_note]
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-IF k[0]
-	dst[63:0]] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangesd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_range_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-IF k[0]
-	dst[63:0]] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangesd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_range_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	[round_note]
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-IF k[0]
-	dst[63:0]] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangesd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_range_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-IF k[0]
-	dst[63:0]] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangesd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_range_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	[round_note]
-	</description>
-	<operation>
-RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
-	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
-	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
-	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
-	1: dst[63:0] := tmp[63:0]
-	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
-	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
-	ESAC
-	
-	RETURN dst
-}
-
-dst[63:0]] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
-dst[127:64] := a[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangesd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_range_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	[round_note]
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[31:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-IF k[0]
-	dst[31:0]] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangess"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_range_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[31:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-IF k[0]
-	dst[31:0]] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangess"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_range_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	[round_note]
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[31:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-IF k[0]
-	dst[31:0]] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangess"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_range_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[31:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-IF k[0]
-	dst[31:0]] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangess"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_range_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
-	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
-	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
-	[round_note]
-	</description>
-	<operation>
-RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0])
-{
-	CASE opCtl[1:0]
-	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
-	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
-	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
-	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
-	ESAC
-	
-	CASE signSelCtl[1:0]
-	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
-	1: dst[31:0] := tmp[31:0]
-	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
-	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
-	ESAC
-	
-	RETURN dst
-}
-
-dst[31:0]] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
-dst[127:32] := a[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrangess"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_rcp14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrcp14pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_rcp14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrcp14pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_rcp14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256d"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrcp14pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rcp14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrcp14pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rcp14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrcp14pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_rcp14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128d"/>
-	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := APPROXIMATE(1.0/a[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrcp14pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_rcp14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrcp14ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_rcp14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrcp14ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_rcp14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrcp14ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rcp14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrcp14ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rcp14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrcp14ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_rcp14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128"/>
-	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := APPROXIMATE(1.0/a[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrcp14ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_reduce_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vreducepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_reduce_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vreducepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_reduce_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". </description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vreducepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_reduce_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vreducepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_reduce_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vreducepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_reduce_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vreducepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_reduce_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vreducepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_reduce_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". </description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vreducepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_reduce_round_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst".
-	[round_note]</description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vreducepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_reduce_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_reduce_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_reduce_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". </description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := ReduceArgumentPD(src[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_reduce_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vreduceps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_reduce_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vreduceps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_reduce_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst".</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vreduceps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_reduce_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>	
-	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vreduceps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_reduce_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vreduceps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_reduce_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vreduceps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_reduce_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	[round_note]</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vreduceps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_reduce_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst".</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vreduceps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_reduce_round_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst".
-	[round_note]</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vreduceps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_reduce_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreduceps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_reduce_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreduceps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_reduce_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst".</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := ReduceArgumentPS(src[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreduceps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_reduce_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".</description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-IF k[0]
-	dst[63:0] := ReduceArgumentPD(a[63:0], imm8[7:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducesd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_reduce_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-IF k[0]
-	dst[63:0] := ReduceArgumentPD(a[63:0], imm8[7:0])
-ELSE
-	dst[63:0] := src[63:0]
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducesd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_reduce_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".</description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-IF k[0]
-	dst[63:0] := ReduceArgumentPD(a[63:0], imm8[7:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducesd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_reduce_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-IF k[0]
-	dst[63:0] := ReduceArgumentPD(a[63:0], imm8[7:0])
-ELSE
-	dst[63:0] := 0
-FI
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducesd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_reduce_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst".</description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-dst[63:0] := ReduceArgumentPD(a[63:0], imm8[7:0])
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducesd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_reduce_round_sd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst".
-	[round_note]</description>
-	<operation>
-ReduceArgumentPD(src1[63:0], imm8[7:0])
-{
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[63:0] := pow(2, -m) * ROUND(pow(2, m) * src1[63:0], spe, rc_source, rc)
-	tmp[63:0] := src1[63:0] - tmp[63:0]
-	RETURN tmp[63:0]
-}
-
-dst[63:0] := ReduceArgumentPD(a[63:0], imm8[7:0])
-dst[127:64] := b[127:64]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducesd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_reduce_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-
-IF k[0]
-	dst[31:0] := ReduceArgumentPS(a[31:0], imm8[7:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:64] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducess"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_reduce_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-
-IF k[0]
-	dst[31:0] := ReduceArgumentPS(a[31:0], imm8[7:0])
-ELSE
-	dst[31:0] := src[31:0]
-FI
-dst[127:64] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducess"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_reduce_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-
-IF k[0]
-	dst[31:0] := ReduceArgumentPS(a[31:0], imm8[7:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:64] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducess"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_reduce_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-
-IF k[0]
-	dst[31:0] := ReduceArgumentPS(a[31:0], imm8[7:0])
-ELSE
-	dst[31:0] := 0
-FI
-dst[127:64] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducess"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_reduce_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := ReduceArgumentPS(a[31:0], imm8[7:0])
-dst[127:64] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducess"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_reduce_round_ss">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<parameter varname="rounding" type="int"/>
-	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "a" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst".
-	[round_note]</description>
-	<operation>
-ReduceArgumentPS(src1[31:0], imm8[7:0])
-{
-	IF src1[31:0] == NAN
-		RETURN (convert src1[31:0] to QNaN)
-	FI
-	
-	m := imm8[7:4] // number of fraction bits after the binary point to be preserved
-	rc := imm8[1:0] // round control
-	rc_src := imm8[2] // round ccontrol source
-	spe := 0
-	tmp[31:0] := pow(2, -m)*ROUND(pow(2, m)*src1[31:0], spe, rc_source, rc)
-	tmp[31:0] := src1[31:0] - tmp[31:0]
-	RETURN tmp[31:0]
-}
-
-dst[31:0] := ReduceArgumentPS(a[31:0], imm8[7:0])
-dst[127:64] := b[127:32]
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vreducess"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_roundscale_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}	
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrndscalepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_roundscale_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}	
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrndscalepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_roundscale_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
-	</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}	
-
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrndscalepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_roundscale_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}	
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrndscalepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_roundscale_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}	
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrndscalepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_roundscale_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
-	</description>
-	<operation>
-RoundTo_IntegerPD(src[63:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[63:0] := round_to_nearest_even_integer(2^M * src[63:0])
-	1: tmp[63:0] := round_to_equal_or_smaller_integer(2^M * src[63:0])
-	2: tmp[63:0] := round_to_equal_or_larger_integer(2^M * src[63:0])
-	3: tmp[63:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[63:0])
-	ESAC
-	
-	dst[63:0] := 2^-M * tmp[63:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[63:0] != dst[63:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[63:0]
-}	
-
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := RoundTo_IntegerPD(a[i+63:i], imm8[7:0])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrndscalepd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_roundscale_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}	
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrndscaleps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_roundscale_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}	
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrndscaleps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_roundscale_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
-	</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}	
-
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrndscaleps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_roundscale_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}	
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrndscaleps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_roundscale_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}	
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrndscaleps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_roundscale_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="imm8" type="int"/>
-	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst".
-	</description>
-	<operation>
-RoundTo_IntegerPS(src[31:0], imm8[7:0]){
-	IF(imm8[2] == 1)
-		rounding_direction := MXCSR.RC //Use the rounding mode specified by MXCSR.RC
-	ELSE
-		rounding_direction := imm8[1:0] //Use the rounding mode specified by imm8[1:0]
-	FI
-	
-	M := imm8[7:4] // The scaling factor (number of fraction bits to round to)
-	
-	CASE(rounding_direction)
-	0: tmp[31:0] := round_to_nearest_even_integer(2^M * src[31:0])
-	1: tmp[31:0] := round_to_equal_or_smaller_integer(2^M * src[31:0])
-	2: tmp[31:0] := round_to_equal_or_larger_integer(2^M * src[31:0])
-	3: tmp[31:0] := round_to_nearest_smallest_magnitude_integer(2^M * src[31:0])
-	ESAC
-	
-	dst[31:0] := 2^-M * tmp[31:0] // scale back down
-	
-	IF imm8[3] == 0 //check SPE
-		IF src[31:0] != dst[31:0] //check if precision has been lost
-			set_precision() //set #PE
-		FI
-	FI
-	RETURN dst[31:0]
-}	
-
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := RoundTo_IntegerPS(a[i+31:i], imm8[7:0])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrndscaleps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_rsqrt14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrsqrt14pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_rsqrt14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrsqrt14pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_rsqrt14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrsqrt14pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_rsqrt14_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := APPROXIMATE(1.0 / SQRT(a[i+63:i]))
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrsqrt14pd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_rsqrt14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrsqrt14ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_rsqrt14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vrsqrt14ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_rsqrt14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrsqrt14ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_rsqrt14_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := APPROXIMATE(1.0 / SQRT(a[i+31:i]))
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vrsqrt14ps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_scalef_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vscalefpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_scalef_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vscalefpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_scalef_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-FOR j := 0 to 3
-	i := j*64
-	dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vscalefpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_scalef_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vscalefpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_scalef_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vscalefpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_scalef_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[63:0] := tmp_src1[63:0] * POW(2, FLOOR(tmp_src2[63:0]))
-	RETURN dst[63:0]
-}
-
-FOR j := 0 to 1
-	i := j*64
-	dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vscalefpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_scalef_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[31:0]
-}
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vscalefps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_scalef_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[31:0]
-}
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vscalefps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_scalef_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[31:0]
-}
-
-FOR j := 0 to 7
-	i := j*32
-	dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vscalefps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_scalef_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[31:0]
-}
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vscalefps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_scalef_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[31:0]
-}
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vscalefps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_scalef_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
-	<operation>
-SCALE(src1, src2){
-	IF (src2 == NaN)
-		IF (src2 == SNaN)
-			RETURN QNAN(src2)
-		FI
-	ELSE IF (src1 == NaN)
-		IF (src1 == SNaN)
-			RETURN QNAN(src1)
-		FI
-		IF (src2 != INF)
-			RETURN QNAN(src1)
-		FI
-	ELSE
-		tmp_src2 := src2
-		tmp_src1 := src1
-		IF (src2 is denormal AND MXCSR.DAZ)
-			tmp_src2 := 0
-		FI
-		IF (src1 is denormal AND MXCSR.DAZ)
-			tmp_src1 := 0
-		FI
-	FI
-	dst[31:0] := tmp_src1[31:0] * POW(2, FLOOR(tmp_src2[31:0]))
-	RETURN dst[31:0]
-}
-
-FOR j := 0 to 3
-	i := j*32
-	dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vscalefps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_i32scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name="vscatterdpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i32scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	l := j*32
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vscatterdpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_i32scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name="vscatterdpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i32scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	l := j*32
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[l+31:l])*scale] := a[i+63:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vscatterdpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_i32scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name="vscatterdps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i32scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:8] := 0
-	</operation>
-	<instruction name="vscatterdps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_i32scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name="vscatterdps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i32scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[i+31:i])*scale] := a[i+31:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vscatterdps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_i64scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name="vscatterqpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i64scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vscatterqpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_i64scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
-ENDFOR
-	</operation>
-	<instruction name="vscatterqpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i64scatter_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[i+63:i])*scale] := a[i+63:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vscatterqpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_i64scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	l := j*64
-	MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name="vscatterqps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm256_mask_i64scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m256i"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	l := j*64
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:4] := 0
-	</operation>
-	<instruction name="vscatterqps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_i64scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	l := j*64
-	MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
-ENDFOR
-	</operation>
-	<instruction name="vscatterqps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="void" name="_mm_mask_i64scatter_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Store</category>
-	<parameter varname="base_addr" type="void*"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="vindex" type="__m128i"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="scale" type="const int"/>
-	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.
-	</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*32
-	l := j*64
-	IF k[j]
-		MEM[base_addr + SignExtend(vindex[l+63:l])*scale] := a[i+31:i]
-		k[j] := 0
-	FI
-ENDFOR
-k[MAX:2] := 0
-	</operation>
-	<instruction name="vscatterqps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_shuffle_f32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-SELECT2(src, control){
-	CASE(control[0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT2(a[255:0], imm8[0])
-tmp_dst[255:128] := SELECT2(b[255:0], imm8[1])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshuff32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_shuffle_f32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT2(src, control){
-	CASE(control[0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT2(a[255:0], imm8[0])
-tmp_dst[255:128] := SELECT2(b[255:0], imm8[1])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshuff32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_shuffle_f32x4">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-SELECT2(src, control){
-	CASE(control[0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := SELECT2(a[255:0], imm8[0])
-dst[255:128] := SELECT2(b[255:0], imm8[1])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshuff32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_shuffle_f64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT2(a[255:0], imm8[0])
-tmp_dst[255:128] := SELECT2(b[255:0], imm8[1])
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshuff64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_shuffle_f64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT2(a[255:0], imm8[0])
-tmp_dst[255:128] := SELECT2(b[255:0], imm8[1])
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshuff64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_shuffle_f64x2">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := SELECT2(a[255:0], imm8[0])
-dst[255:128] := SELECT2(b[255:0], imm8[1])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshuff64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_shuffle_i32x4">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-SELECT2(src, control){
-	CASE(control[0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT2(a[255:0], imm8[1:0])
-tmp_dst[255:128] := SELECT2(b[255:0], imm8[3:2])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshufi32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_shuffle_i32x4">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT2(src, control){
-	CASE(control[0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT2(a[255:0], imm8[1:0])
-tmp_dst[255:128] := SELECT2(b[255:0], imm8[3:2])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshufi32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_shuffle_i32x4">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
-	<operation>
-SELECT2(src, control){
-	CASE(control[0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := SELECT2(a[255:0], imm8[1:0])
-dst[255:128] := SELECT2(b[255:0], imm8[3:2])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshufi32x4"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_shuffle_i64x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-SELECT2(src, control){
-	CASE(control[0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT2(a[255:0], imm8[1:0])
-tmp_dst[255:128] := SELECT2(b[255:0], imm8[3:2])
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshufi64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_shuffle_i64x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT2(src, control){
-	CASE(control[0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-tmp_dst[127:0] := SELECT2(a[255:0], imm8[1:0])
-tmp_dst[255:128] := SELECT2(b[255:0], imm8[3:2])
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshufi64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_shuffle_i64x2">
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
-	<operation>
-SELECT2(src, control){
-	CASE(control[0])
-	0:	tmp[127:0] := src[127:0]
-	1:	tmp[127:0] := src[255:128]
-	ESAC
-	RETURN tmp[127:0]
-}
-
-dst[127:0] := SELECT2(a[255:0], imm8[1:0])
-dst[255:128] := SELECT2(b[255:0], imm8[3:2])
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshufi64x2"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_shuffle_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
-tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
-tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
-tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshufpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_shuffle_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
-tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
-tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
-tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshufpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_shuffle_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
-tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vshufpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_shuffle_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
-tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vshufpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_shuffle_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
-tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4])
-tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshufps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_shuffle_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
-tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
-tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
-tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4])
-tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vshufps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_shuffle_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vshufps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_shuffle_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<parameter varname="imm8" type="const int"/>
-	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-SELECT4(src, control){
-	CASE(control[1:0])
-	0:	tmp[31:0] := src[31:0]
-	1:	tmp[31:0] := src[63:32]
-	2:	tmp[31:0] := src[95:64]
-	3:	tmp[31:0] := src[127:96]
-	ESAC
-	RETURN tmp[31:0]
-}
-
-tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
-tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
-tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
-tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vshufps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_sqrt_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SQRT(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vsqrtpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_sqrt_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SQRT(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vsqrtpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_sqrt_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SQRT(a[i+63:i])
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vsqrtpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_sqrt_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := SQRT(a[i+63:i])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vsqrtpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_sqrt_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SQRT(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vsqrtps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_sqrt_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SQRT(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vsqrtps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_sqrt_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SQRT(a[i+31:i])
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vsqrtps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_sqrt_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := SQRT(a[i+31:i])
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vsqrtps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_sub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vsubpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_sub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vsubpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_sub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vsubpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_sub_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] - b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vsubpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_sub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vsubps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_sub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vsubps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_sub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vsubps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_sub_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] - b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI	
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vsubps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_unpackhi_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vunpckhpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_unpackhi_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vunpckhpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_unpackhi_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vunpckhpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_unpackhi_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[127:64] 
-	dst[127:64] := src2[127:64] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vunpckhpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_unpackhi_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vunpckhps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_unpackhi_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vunpckhps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_unpackhi_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vunpckhps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_unpackhi_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[95:64] 
-	dst[63:32] := src2[95:64] 
-	dst[95:64] := src1[127:96] 
-	dst[127:96] := src2[127:96] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vunpckhps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_unpacklo_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vunpcklpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_unpacklo_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vunpcklpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_unpacklo_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vunpcklpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_unpacklo_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_QWORDS(src1[127:0], src2[127:0]){
-	dst[63:0] := src1[63:0] 
-	dst[127:64] := src2[63:0] 
-	RETURN dst[127:0]
-}
-
-tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := tmp_dst[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vunpcklpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_unpacklo_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vunpcklps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_unpacklo_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
-
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vunpcklps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_unpacklo_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vunpcklps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_unpacklo_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512F</CPUID>
-	<category>Miscellaneous</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). </description>
-	<operation>
-INTERLEAVE_DWORDS(src1[127:0], src2[127:0]){
-	dst[31:0] := src1[31:0] 
-	dst[63:32] := src2[31:0] 
-	dst[95:64] := src1[63:32] 
-	dst[127:96] := src2[63:32] 
-	RETURN dst[127:0]
-}	
-
-tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
-
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := tmp_dst[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vunpcklps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_mask_xor_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vxorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256d" name="_mm256_maskz_xor_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256d"/>
-	<parameter varname="b" type="__m256d"/>
-	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vxorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_mask_xor_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vxorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_maskz_xor_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vxorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512d" name="_mm512_xor_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512d"/>
-	<parameter varname="b" type="__m512d"/>
-	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vxorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_mask_xor_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128d"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-	ELSE
-		dst[i+63:i] := src[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vxorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128d" name="_mm_maskz_xor_pd">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128d"/>
-	<parameter varname="b" type="__m128d"/>
-	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vxorpd"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_mask_xor_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m256"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vxorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256" name="_mm256_maskz_xor_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256"/>
-	<parameter varname="b" type="__m256"/>
-	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vxorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_mask_xor_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m512"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vxorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_maskz_xor_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vxorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512" name="_mm512_xor_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="a" type="__m512"/>
-	<parameter varname="b" type="__m512"/>
-	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
-	</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*32
-	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vxorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_mask_xor_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="src" type="__m128"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). </description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-	ELSE
-		dst[i+31:i] := src[i+31:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vxorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128" name="_mm_maskz_xor_ps">
-	<type>Floating Point</type>
-	<CPUID>AVX512VL</CPUID>
-	<CPUID>AVX512DQ</CPUID>
-	<category>Logical</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128"/>
-	<parameter varname="b" type="__m128"/>
-	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*32
-	IF k[j]
-		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
-	ELSE
-		dst[i+31:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vxorps"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_madd52lo_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="c" type="__m512i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmadd52luq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_madd52lo_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="c" type="__m512i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmadd52luq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_madd52lo_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="c" type="__m512i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmadd52luq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_madd52lo_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="c" type="__m256i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmadd52luq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_madd52lo_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="c" type="__m256i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmadd52luq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_madd52lo_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="c" type="__m256i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmadd52luq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_madd52lo_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="c" type="__m128i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmadd52luq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_madd52lo_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="c" type="__m128i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmadd52luq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_madd52lo_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="c" type="__m128i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmadd52luq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_madd52hi_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="c" type="__m512i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmadd52huq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_madd52hi_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="c" type="__m512i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmadd52huq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_madd52hi_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<parameter varname="c" type="__m512i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 7
-	i := j*64
-	IF k[j]
-		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmadd52huq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_madd52hi_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="c" type="__m256i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmadd52huq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_madd52hi_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="c" type="__m256i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmadd52huq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_madd52hi_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<parameter varname="c" type="__m256i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 3
-	i := j*64
-	IF k[j]
-		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmadd52huq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_madd52hi_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="c" type="__m128i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmadd52huq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_madd52hi_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="c" type="__m128i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
-	ELSE
-		dst[i+63:i] := a[i+63:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmadd52huq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_madd52hi_epu64">
-	<CPUID>AVX512IFMA52</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Arithmetic</category>
-	<parameter varname="k" type="__mmask8"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<parameter varname="c" type="__m128i"/>
-	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 1
-	i := j*64
-	IF k[j]
-		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
-		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
-	ELSE
-		dst[i+63:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmadd52huq"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_multishift_epi64_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst".</description>
-	<operation>
-FOR i := 0 to 7
-	q := i * 64
-	FOR j := 0 to 7
-		tmp8 := 0
-		ctrl := a[q+j*8+7:q+j*8] &amp; 63
-		FOR l := 0 to 7
-			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
-		ENDFOR
-		dst[q+j*8+7:q+j*8] := tmp8[7:0]
-	ENDFOR
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmultishiftqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_multishift_epi64_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR i := 0 to 7
-	q := i * 64
-	FOR j := 0 to 7
-		tmp8 := 0
-		ctrl := a[q+j*8+7:q+j*8] &amp; 63
-		FOR l := 0 to 7
-			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
-		ENDFOR
-		IF k[i*8+j]
-			dst[q+j*8+7:q+j*8] := tmp8[7:0]
-		ELSE
-			dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8]
-		FI
-	ENDFOR
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmultishiftqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_multishift_epi64_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR i := 0 to 7
-	q := i * 64
-	FOR j := 0 to 7
-		tmp8 := 0
-		ctrl := a[q+j*8+7:q+j*8] &amp; 63
-		FOR l := 0 to 7
-			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
-		ENDFOR
-		IF k[i*8+j]
-			dst[q+j*8+7:q+j*8] := tmp8[7:0]
-		ELSE
-			dst[q+j*8+7:q+j*8] := 0
-		FI
-	ENDFOR
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpmultishiftqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_multishift_epi64_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst".</description>
-	<operation>
-FOR i := 0 to 3
-	q := i * 64
-	FOR j := 0 to 7
-		tmp8 := 0
-		ctrl := a[q+j*8+7:q+j*8] &amp; 63
-		FOR l := 0 to 7
-			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
-		ENDFOR
-		dst[q+j*8+7:q+j*8] := tmp8[7:0]
-	ENDFOR
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmultishiftqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_multishift_epi64_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR i := 0 to 3
-	q := i * 64
-	FOR j := 0 to 7
-		tmp8 := 0
-		ctrl := a[q+j*8+7:q+j*8] &amp; 63
-		FOR l := 0 to 7
-			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
-		ENDFOR
-		IF k[i*8+j]
-			dst[q+j*8+7:q+j*8] := tmp8[7:0]
-		ELSE
-			dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8]
-		FI
-	ENDFOR
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmultishiftqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_multishift_epi64_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR i := 0 to 3
-	q := i * 64
-	FOR j := 0 to 7
-		tmp8 := 0
-		ctrl := a[q+j*8+7:q+j*8] &amp; 63
-		FOR l := 0 to 7
-			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
-		ENDFOR
-		IF k[i*8+j]
-			dst[q+j*8+7:q+j*8] := tmp8[7:0]
-		ELSE
-			dst[q+j*8+7:q+j*8] := 0
-		FI
-	ENDFOR
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpmultishiftqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_multishift_epi64_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst".</description>
-	<operation>
-FOR i := 0 to 1
-	q := i * 64
-	FOR j := 0 to 7
-		tmp8 := 0
-		ctrl := a[q+j*8+7:q+j*8] &amp; 63
-		FOR l := 0 to 7
-			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
-		ENDFOR
-		dst[q+j*8+7:q+j*8] := tmp8[7:0]
-	ENDFOR
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmultishiftqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_multishift_epi64_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR i := 0 to 1
-	q := i * 64
-	FOR j := 0 to 7
-		tmp8 := 0
-		ctrl := a[q+j*8+7:q+j*8] &amp; 63
-		FOR l := 0 to 7
-			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
-		ENDFOR
-		IF k[i*8+j]
-			dst[q+j*8+7:q+j*8] := tmp8[7:0]
-		ELSE
-			dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8]
-		FI
-	ENDFOR
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmultishiftqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_multishift_epi64_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Bit Manipulation</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR i := 0 to 1
-	q := i * 64
-	FOR j := 0 to 7
-		tmp8 := 0
-		ctrl := a[q+j*8+7:q+j*8] &amp; 63
-		FOR l := 0 to 7
-			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
-		ENDFOR
-		IF k[i*8+j]
-			dst[q+j*8+7:q+j*8] := tmp8[7:0]
-		ELSE
-			dst[q+j*8+7:q+j*8] := 0
-		FI
-	ENDFOR
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpmultishiftqb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutexvar_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	id := idx[i+5:i]*8
-	dst[i+7:i] := a[id+7:id]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutexvar_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	id := idx[i+5:i]*8
-	IF k[j]
-		dst[i+7:i] := a[id+7:id]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutexvar_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="a" type="__m512i"/>
-	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	id := idx[i+5:i]*8
-	IF k[j]
-		dst[i+7:i] := a[id+7:id]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutexvar_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	id := idx[i+4:i]*8
-	dst[i+7:i] := a[id+7:id]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutexvar_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	id := idx[i+4:i]*8
-	IF k[j]
-		dst[i+7:i] := a[id+7:id]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutexvar_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="a" type="__m256i"/>
-	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	id := idx[i+4:i]*8
-	IF k[j]
-		dst[i+7:i] := a[id+7:id]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_permutexvar_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	id := idx[i+3:i]*8
-	dst[i+7:i] := a[id+7:id]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_permutexvar_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="src" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	id := idx[i+3:i]*8
-	IF k[j]
-		dst[i+7:i] := a[id+7:id]
-	ELSE
-		dst[i+7:i] := src[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_permutexvar_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="a" type="__m128i"/>
-	<description>Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	id := idx[i+3:i]*8
-	IF k[j]
-		dst[i+7:i] := a[id+7:id]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermb"/>
-	<header>immintrin.h</header>
-</intrinsic>
-
-
-
-
-
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_permutex2var_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	off := 8*idx[i+5:i]
-	dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermi2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask_permutex2var_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		off := 8*idx[i+5:i]
-		dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermt2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_mask2_permutex2var_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		off := 8*idx[i+5:i]
-		dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermi2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m512i" name="_mm512_maskz_permutex2var_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask64"/>
-	<parameter varname="a" type="__m512i"/>
-	<parameter varname="idx" type="__m512i"/>
-	<parameter varname="b" type="__m512i"/>
-	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 63
-	i := j*8
-	IF k[j]
-		off := 8*idx[i+5:i]
-		dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:512] := 0
-	</operation>
-	<instruction name="vpermi2b"/>
-	<instruction name="vpermt2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_permutex2var_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	off := 8*idx[i+4:i]
-	dst[i+7:i] := idx[i+6] ? b[off+5:off] : a[off+7:off]
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask_permutex2var_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		off := 8*idx[i+4:i]
-		dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off]
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermt2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_mask2_permutex2var_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		off := 8*idx[i+4:i]
-		dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off]
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m256i" name="_mm256_maskz_permutex2var_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask32"/>
-	<parameter varname="a" type="__m256i"/>
-	<parameter varname="idx" type="__m256i"/>
-	<parameter varname="b" type="__m256i"/>
-	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 31
-	i := j*8
-	IF k[j]
-		off := 8*idx[i+4:i]
-		dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:256] := 0
-	</operation>
-	<instruction name="vpermi2b"/>
-	<instruction name="vpermt2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_permutex2var_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	off := 8*idx[i+3:i]
-	dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off]
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask_permutex2var_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		off := 8*idx[i+3:i]
-		dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off]
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermt2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_mask2_permutex2var_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		off := 8*idx[i+3:i]
-		dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off]
-	ELSE
-		dst[i+7:i] := a[i+7:i]
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-<intrinsic tech="AVX-512" rettype="__m128i" name="_mm_maskz_permutex2var_epi8">
-	<CPUID>AVX512VBMI</CPUID>
-	<CPUID>AVX512VL</CPUID>
-	<category>Swizzle</category>
-	<parameter varname="k" type="__mmask16"/>
-	<parameter varname="a" type="__m128i"/>
-	<parameter varname="idx" type="__m128i"/>
-	<parameter varname="b" type="__m128i"/>
-	<description>Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
-	<operation>
-FOR j := 0 to 15
-	i := j*8
-	IF k[j]
-		off := 8*idx[i+3:i]
-		dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off]
-	ELSE
-		dst[i+7:i] := 0
-	FI
-ENDFOR
-dst[MAX:128] := 0
-	</operation>
-	<instruction name="vpermi2b"/>
-	<instruction name="vpermt2b"/>
-	<header>immintrin.h</header>
-</intrinsic>
-</intrinsics_list>
diff --git a/library/stdarch/examples/Cargo.toml b/library/stdarch/examples/Cargo.toml
index 855a5d4aaf6..6f00d46230b 100644
--- a/library/stdarch/examples/Cargo.toml
+++ b/library/stdarch/examples/Cargo.toml
@@ -1,12 +1,12 @@
 [package]
-name = "stdsimd_examples"
+name = "stdarch_examples"
 version = "0.0.0"
 authors = [
     "Alex Crichton <alex@alexcrichton.com>",
     "Andrew Gallant <jamslam@gmail.com>",
     "Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>",
 ]
-description = "Examples of the stdsimd crate."
+description = "Examples of the stdarch crate."
 
 [dependencies]
 core_arch = { path = "../crates/core_arch" }
diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs
index 39ea10c7071..b3d6fb0786d 100644
--- a/library/stdarch/examples/hex.rs
+++ b/library/stdarch/examples/hex.rs
@@ -171,7 +171,7 @@ fn hex_encode_fallback<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usi
     unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2])) }
 }
 
-// Run these with `cargo +nightly test --example hex -p stdsimd`
+// Run these with `cargo +nightly test --example hex -p stdarch`
 #[cfg(test)]
 mod tests {
     use std::iter;
@@ -271,7 +271,7 @@ mod tests {
     }
 }
 
-// Run these with `cargo +nightly bench --example hex -p stdsimd`
+// Run these with `cargo +nightly bench --example hex -p stdarch`
 #[cfg(test)]
 mod benches {
     extern crate rand;
diff --git a/library/stdarch/vendor.yml b/library/stdarch/vendor.yml
index a9af0052c9f..ac5b408ac48 100644
--- a/library/stdarch/vendor.yml
+++ b/library/stdarch/vendor.yml
@@ -1,2 +1,2 @@
-- crates/stdsimd-verify/arm-intrinsics.html
-- crates/stdsimd-verify/x86-intel.xml
+- crates/stdarch-verify/arm-intrinsics.html
+- crates/stdarch-verify/x86-intel.xml
-- 
cgit 1.4.1-3-g733a5


From 430744f46a9f1be5b6dd52c428340fb8e8b49218 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Sat, 31 Aug 2019 09:30:10 +0200
Subject: Minimal miri support

Should address https://github.com/rust-lang/miri/issues/932
---
 library/stdarch/crates/std_detect/src/detect/mod.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index f446e88eedc..70a50820b36 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -64,7 +64,10 @@ mod bit;
 mod cache;
 
 cfg_if! {
-    if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
+    if #[cfg(miri)] {
+        #[path = "os/other.rs"]
+        mod os;
+    } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
         // On x86/x86_64 no OS specific functionality is required.
         #[path = "os/x86.rs"]
         mod os;
-- 
cgit 1.4.1-3-g733a5


From 5b11935d4331f9a908f00359cc3e96e99e055be1 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Sat, 31 Aug 2019 14:55:21 +0200
Subject: Document how miri support works

Co-Authored-By: gnzlbg <gnzlbg@users.noreply.github.com>
---
 library/stdarch/crates/std_detect/src/detect/mod.rs | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 70a50820b36..80207daad8f 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -65,6 +65,11 @@ mod cache;
 
 cfg_if! {
     if #[cfg(miri)] {
+        // When running under miri all target-features that are not enabled at
+        // compile-time are reported as disabled at run-time.
+        //
+        // For features for which `cfg(target_feature)` returns true,
+        // this run-time detection logic is never called.
         #[path = "os/other.rs"]
         mod os;
     } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
-- 
cgit 1.4.1-3-g733a5


From f3140f4b25860a1aa6257c6afc064efe177b0b67 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Mon, 9 Sep 2019 11:45:41 +0200
Subject: Factor out check_for

All the os-specific code implements a `check_for` and a `detect_features`.

Move the always identical check_for in the mod.rs and use
`os::detect_features` there.
---
 library/stdarch/crates/std_detect/src/detect/mod.rs              | 7 ++++++-
 .../stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs   | 9 +--------
 library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs   | 8 +-------
 .../stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs   | 8 +-------
 library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs | 8 +-------
 library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs     | 8 +-------
 library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs    | 8 +-------
 library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs | 8 +-------
 library/stdarch/crates/std_detect/src/detect/os/other.rs         | 8 +++-----
 library/stdarch/crates/std_detect/src/detect/os/x86.rs           | 8 +-------
 10 files changed, 17 insertions(+), 63 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 80207daad8f..3e00c205547 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -90,4 +90,9 @@ cfg_if! {
         mod os;
     }
 }
-pub use self::os::check_for;
+
+/// Performs run-time feature detection.
+#[inline]
+pub fn check_for(x: Feature) -> bool {
+    cache::test(x as u32, self::os::detect_features)
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
index 910d2f33b39..e5df9ba4c9b 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
@@ -1,13 +1,6 @@
 //! Run-time feature detection for Aarch64 on FreeBSD.
 
-use crate::detect::{Feature, cache};
-use super::super::aarch64::detect_features;
-
-/// Performs run-time feature detection.
-#[inline]
-pub fn check_for(x: Feature) -> bool {
-    cache::test(x as u32, detect_features)
-}
+pub use super::super::aarch64::detect_features;
 
 #[cfg(test)]
 mod tests {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
index e13847dcbd8..38e37721ef3 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
@@ -3,14 +3,8 @@
 use crate::detect::{Feature, cache};
 use super::{auxvec};
 
-/// Performs run-time feature detection.
-#[inline]
-pub fn check_for(x: Feature) -> bool {
-    cache::test(x as u32, detect_features)
-}
-
 /// Try to read the features from the auxiliary vector
-fn detect_features() -> cache::Initializer {
+pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
     let enable_feature = |value: &mut cache::Initializer, f, enable| {
         if enable {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs
index c7f761d4d60..52a71e672a3 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs
@@ -3,13 +3,7 @@
 use crate::detect::{Feature, cache};
 use super::{auxvec};
 
-/// Performs run-time feature detection.
-#[inline]
-pub fn check_for(x: Feature) -> bool {
-    cache::test(x as u32, detect_features)
-}
-
-fn detect_features() -> cache::Initializer {
+pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
     let enable_feature = |value: &mut cache::Initializer, f, enable| {
         if enable {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index f7dc0f0222e..43c0976daf3 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -3,15 +3,9 @@
 use crate::detect::{Feature, cache, bit};
 use super::{auxvec, cpuinfo};
 
-/// Performs run-time feature detection.
-#[inline]
-pub fn check_for(x: Feature) -> bool {
-    cache::test(x as u32, detect_features)
-}
-
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from /proc/cpuinfo.
-fn detect_features() -> cache::Initializer {
+pub(crate) fn detect_features() -> cache::Initializer {
     if let Ok(auxv) = auxvec::auxv() {
         let hwcap: AtHwcap = auxv.into();
         return hwcap.cache();
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
index 0d58a847cd6..95624e1285d 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
@@ -3,15 +3,9 @@
 use crate::detect::{Feature, cache, bit};
 use super::{auxvec, cpuinfo};
 
-/// Performs run-time feature detection.
-#[inline]
-pub fn check_for(x: Feature) -> bool {
-    cache::test(x as u32, detect_features)
-}
-
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from /proc/cpuinfo.
-fn detect_features() -> cache::Initializer {
+pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
     let enable_feature = |value: &mut cache::Initializer, f, enable| {
         if enable {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
index c0a5fb2e5d8..6486673f805 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
@@ -3,15 +3,9 @@
 use crate::detect::{Feature, cache, bit};
 use super::auxvec;
 
-/// Performs run-time feature detection.
-#[inline]
-pub fn check_for(x: Feature) -> bool {
-    cache::test(x as u32, detect_features)
-}
-
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from `/proc/cpuinfo`.
-fn detect_features() -> cache::Initializer {
+pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
     let enable_feature = |value: &mut cache::Initializer, f, enable| {
         if enable {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
index 1c08a58443d..5be35b96dda 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
@@ -3,15 +3,9 @@
 use crate::detect::{Feature, cache};
 use super::{auxvec, cpuinfo};
 
-/// Performs run-time feature detection.
-#[inline]
-pub fn check_for(x: Feature) -> bool {
-    cache::test(x as u32, detect_features)
-}
-
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from /proc/cpuinfo.
-fn detect_features() -> cache::Initializer {
+pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
     let enable_feature = |value: &mut cache::Initializer, f, enable| {
         if enable {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/other.rs b/library/stdarch/crates/std_detect/src/detect/os/other.rs
index 23e399ea790..bf7be87f073 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/other.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/other.rs
@@ -1,9 +1,7 @@
 //! Other operating systems
 
-use crate::detect::Feature;
+use crate::detect::cache;
 
-/// Performs run-time feature detection.
-#[inline]
-pub fn check_for(_x: Feature) -> bool {
-    false
+pub(crate) fn detect_features() -> cache::Initializer {
+    cache::Initializer::default()
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 9257b8a4be6..d9d286071a2 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -9,12 +9,6 @@ use crate::mem;
 
 use crate::detect::{Feature, cache, bit};
 
-/// Performs run-time feature detection.
-#[inline]
-pub fn check_for(x: Feature) -> bool {
-    cache::test(x as u32, detect_features)
-}
-
 /// Run-time feature detection on x86 works by using the CPUID instruction.
 ///
 /// The [CPUID Wikipedia page][wiki_cpuid] contains
@@ -31,7 +25,7 @@ pub fn check_for(x: Feature) -> bool {
 /// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
 #[allow(clippy::similar_names)]
-fn detect_features() -> cache::Initializer {
+pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
 
     // If the x86 CPU does not support the CPUID instruction then it is too
-- 
cgit 1.4.1-3-g733a5


From 1f44c1407dc6e6f91600f9303739ca8acbcc8194 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 23 Apr 2019 15:51:20 +0200
Subject: Add std_detect::detect::features() -> impl Iterator<Item=(&'static
 str, bool)> API

---
 .../crates/std_detect/src/detect/arch/aarch64.rs   | 114 ++----
 .../crates/std_detect/src/detect/arch/arm.rs       |  46 +--
 .../crates/std_detect/src/detect/arch/mips.rs      |  32 +-
 .../crates/std_detect/src/detect/arch/mips64.rs    |  32 +-
 .../crates/std_detect/src/detect/arch/powerpc.rs   |  45 +--
 .../crates/std_detect/src/detect/arch/powerpc64.rs |  45 +--
 .../crates/std_detect/src/detect/arch/x86.rs       | 406 ++++++---------------
 .../stdarch/crates/std_detect/src/detect/macros.rs |  74 ++++
 .../stdarch/crates/std_detect/src/detect/mod.rs    |  36 ++
 .../stdarch/crates/std_detect/src/detect/os/x86.rs |  11 +-
 .../crates/std_detect/tests/cpu-detection.rs       |  31 +-
 11 files changed, 314 insertions(+), 558 deletions(-)
 create mode 100644 library/stdarch/crates/std_detect/src/detect/macros.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index ebae2bd2854..73e54052b82 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -1,106 +1,36 @@
 //! Aarch64 run-time features.
 
-/// Checks if `aarch64` feature is enabled.
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable(stdsimd_internal,stdsimd)]
-macro_rules! is_aarch64_feature_detected {
-    ("neon") => {
-        // FIXME: this should be removed once we rename Aarch64 neon to asimd
-        cfg!(target_feature = "neon") ||
-            $crate::detect::check_for($crate::detect::Feature::asimd)
-    };
-    ("asimd") => {
-        cfg!(target_feature = "neon") ||
-            $crate::detect::check_for($crate::detect::Feature::asimd)
-    };
-    ("pmull") => {
-        cfg!(target_feature = "pmull") ||
-            $crate::detect::check_for($crate::detect::Feature::pmull)
-    };
-    ("fp") => {
-        cfg!(target_feature = "fp") ||
-            $crate::detect::check_for($crate::detect::Feature::fp)
-    };
-    ("fp16") => {
-        cfg!(target_feature = "fp16") ||
-            $crate::detect::check_for($crate::detect::Feature::fp16)
-    };
-    ("sve") => {
-        cfg!(target_feature = "sve") ||
-            $crate::detect::check_for($crate::detect::Feature::sve)
-    };
-    ("crc") => {
-        cfg!(target_feature = "crc") ||
-            $crate::detect::check_for($crate::detect::Feature::crc)
-    };
-    ("crypto") => {
-        cfg!(target_feature = "crypto") ||
-            $crate::detect::check_for($crate::detect::Feature::crypto)
-    };
-    ("lse") => {
-        cfg!(target_feature = "lse") ||
-            $crate::detect::check_for($crate::detect::Feature::lse)
-    };
-    ("rdm") => {
-        cfg!(target_feature = "rdm") ||
-            $crate::detect::check_for($crate::detect::Feature::rdm)
-    };
-    ("rcpc") => {
-        cfg!(target_feature = "rcpc") ||
-            $crate::detect::check_for($crate::detect::Feature::rcpc)
-    };
-    ("dotprod") => {
-        cfg!(target_feature = "dotprod") ||
-            $crate::detect::check_for($crate::detect::Feature::dotprod)
-    };
-    ("ras") => {
-        compile_error!("\"ras\" feature cannot be detected at run-time")
-    };
-    ("v8.1a") => {
-        compile_error!("\"v8.1a\" feature cannot be detected at run-time")
-    };
-    ("v8.2a") => {
-        compile_error!("\"v8.2a\" feature cannot be detected at run-time")
-    };
-    ("v8.3a") => {
-        compile_error!("\"v8.3a\" feature cannot be detected at run-time")
-    };
-    ($t:tt,) => {
-        is_aarch64_feature_detected!($t);
-    };
-    ($t:tt) => { compile_error!(concat!("unknown aarch64 target feature: ", $t)) };
-}
-
-/// ARM Aarch64 CPU Feature enum. Each variant denotes a position in a bitset
-/// for a particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-#[unstable(feature = "stdsimd_internal", issue = "0")]
-pub enum Feature {
+features! {
+    @TARGET: aarch64;
+    @MACRO_NAME: is_aarch64_feature_detected;
+    @MACRO_ATTRS:
+    /// Checks if `aarch64` feature is enabled.
+    #[unstable(feature = "stdsimd", issue = "27731")]
+    @BIND_FEATURE_NAME: "asimd"; "neon";
+    @NO_RUNTIME_DETECTION: "ras";
+    @NO_RUNTIME_DETECTION: "v8.1a";
+    @NO_RUNTIME_DETECTION: "v8.2a";
+    @NO_RUNTIME_DETECTION: "v8.3a";
+    @FEATURE: asimd: "neon";
     /// ARM Advanced SIMD (ASIMD)
-    asimd,
+    @FEATURE: pmull: "pmull";
     /// Polynomial Multiply
-    pmull,
+    @FEATURE: fp: "fp";
     /// Floating point support
-    fp,
+    @FEATURE: fp16: "fp16";
     /// Half-float support.
-    fp16,
+    @FEATURE: sve: "sve";
     /// Scalable Vector Extension (SVE)
-    sve,
+    @FEATURE: crc: "crc";
     /// CRC32 (Cyclic Redundancy Check)
-    crc,
+    @FEATURE: crypto: "crypto";
     /// Crypto: AES + PMULL + SHA1 + SHA2
-    crypto,
+    @FEATURE: lse: "lse";
     /// Atomics (Large System Extension)
-    lse,
+    @FEATURE: rdm: "rdm";
     /// Rounding Double Multiply (ASIMDRDM)
-    rdm,
+    @FEATURE: rcpc: "rcpc";
     /// Release consistent Processor consistent (RcPc)
-    rcpc,
+    @FEATURE: dotprod: "dotprod";
     /// Vector Dot-Product (ASIMDDP)
-    dotprod,
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index b2626bf2923..86dc6dbbf23 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -1,39 +1,17 @@
 //! Run-time feature detection on ARM Aarch32.
 
-/// Checks if `arm` feature is enabled.
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable(stdsimd_internal,stdsimd)]
-macro_rules! is_arm_feature_detected {
-    ("neon") => {
-        cfg!(target_feature = "neon") ||
-            $crate::detect::check_for($crate::detect::Feature::neon)
-    };
-    ("pmull") => {
-        cfg!(target_feature = "pmull") ||
-            $crate::detect::check_for($crate::detect::Feature::pmull)
-    };
-    ("v7") => { compile_error!("\"v7\" feature cannot be detected at run-time") };
-    ("vfp2") => { compile_error!("\"vfp2\" feature cannot be detected at run-time") };
-    ("vfp3") => { compile_error!("\"vfp3\" feature cannot be detected at run-time") };
-    ("vfp4") => { compile_error!("\"vfp4\" feature cannot be detected at run-time") };
-    ($t:tt,) => {
-        is_arm_feature_detected!($t);
-    };
-    ($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
-}
-
-/// ARM CPU Feature enum. Each variant denotes a position in a bitset for a
-/// particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-#[unstable(feature = "stdsimd_internal", issue = "0")]
-pub enum Feature {
+features! {
+    @TARGET: arm;
+    @MACRO_NAME: is_arm_feature_detected;
+    @MACRO_ATTRS:
+    /// Checks if `arm` feature is enabled.
+    #[unstable(feature = "stdsimd", issue = "27731")]
+    @NO_RUNTIME_DETECTION: "v7";
+    @NO_RUNTIME_DETECTION: "vfp2";
+    @NO_RUNTIME_DETECTION: "vfp3";
+    @NO_RUNTIME_DETECTION: "vfp4";
+    @FEATURE: neon: "neon";
     /// ARM Advanced SIMD (NEON) - Aarch32
-    neon,
+    @FEATURE: pmull: "pmull";
     /// Polynomial Multiply
-    pmull,
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
index f4381b811cd..1ac540ef737 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
@@ -1,29 +1,11 @@
 //! Run-time feature detection on MIPS.
 
-/// Checks if `mips` feature is enabled.
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable(stdsimd_internal,stdsimd)]
-macro_rules! is_mips_feature_detected {
-    ("msa") => {
-        cfg!(target_feature = "msa") ||
-            $crate::detect::check_for($crate::detect::Feature::msa)
-    };
-    ($t:tt,) => {
-        is_mips_feature_detected!($t);
-    };
-    ($t:tt) => { compile_error!(concat!("unknown mips target feature: ", $t)) };
-}
-
-/// MIPS CPU Feature enum. Each variant denotes a position in a bitset for a
-/// particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-#[unstable(feature = "stdsimd_internal", issue = "0")]
-pub enum Feature {
+features! {
+    @TARGET: mips;
+    @MACRO_NAME: is_mips_feature_detected;
+    @MACRO_ATTRS:
+    /// Checks if `mips` feature is enabled.
+    #[unstable(feature = "stdsimd", issue = "27731")]
+    @FEATURE: msa: "msa";
     /// MIPS SIMD Architecture (MSA)
-    msa,
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
index 2663bc68ba9..33ad3bca768 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
@@ -1,29 +1,11 @@
 //! Run-time feature detection on MIPS64.
 
-/// Checks if `mips64` feature is enabled.
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable(stdsimd_internal,stdsimd)]
-macro_rules! is_mips64_feature_detected {
-    ("msa") => {
-        cfg!(target_feature = "msa") ||
-            $crate::detect::check_for($crate::detect::Feature::msa)
-    };
-    ($t:tt,) => {
-        is_mips64_feature_detected!($t);
-    };
-    ($t:tt) => { compile_error!(concat!("unknown mips64 target feature: ", $t)) };
-}
-
-/// MIPS64 CPU Feature enum. Each variant denotes a position in a bitset
-/// for a particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-#[unstable(feature = "stdsimd_internal", issue = "0")]
-pub enum Feature {
+features! {
+    @TARGET: mips64;
+    @MACRO_NAME: is_mips64_feature_detected;
+    @MACRO_ATTRS:
+    /// Checks if `mips64` feature is enabled.
+    #[unstable(feature = "stdsimd", issue = "27731")]
+    @FEATURE: msa: "msa";
     /// MIPS SIMD Architecture (MSA)
-    msa,
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
index a342dc1aacc..6cc35a24110 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
@@ -1,42 +1,15 @@
 //! Run-time feature detection on PowerPC.
 
-/// Checks if `powerpc` feature is enabled.
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable(stdsimd_internal,stdsimd)]
-macro_rules! is_powerpc_feature_detected {
-    ("altivec") => {
-        cfg!(target_feature = "altivec") ||
-            $crate::detect::check_for($crate::detect::Feature::altivec)
-    };
-    ("vsx") => {
-        cfg!(target_feature = "vsx") ||
-            $crate::detect::check_for($crate::detect::Feature::vsx)
-    };
-    ("power8") => {
-        cfg!(target_feature = "power8") ||
-            $crate::detect::check_for($crate::detect::Feature::power8)
-    };
-    ($t:tt,) => {
-        is_powerpc_feature_detected!($t);
-    };
-    ($t:tt) => { compile_error!(concat!("unknown powerpc target feature: ", $t)) };
-}
-
-
-/// PowerPC CPU Feature enum. Each variant denotes a position in a bitset
-/// for a particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-#[unstable(feature = "stdsimd_internal", issue = "0")]
-pub enum Feature {
+features! {
+    @TARGET: powerpc;
+    @MACRO_NAME: is_powerpc_feature_detected;
+    @MACRO_ATTRS:
+    /// Checks if `powerpc` feature is enabled.
+    #[unstable(feature = "stdsimd", issue = "27731")]
+    @FEATURE: altivec: "altivec";
     /// Altivec
-    altivec,
+    @FEATURE: vsx: "vsx";
     /// VSX
-    vsx,
+    @FEATURE: power8: "power8";
     /// Power8
-    power8,
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
index 2e82c569252..a075c94190e 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
@@ -1,42 +1,15 @@
 //! Run-time feature detection on PowerPC64.
 
-/// Checks if `powerpc64` feature is enabled.
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-#[allow_internal_unstable(stdsimd_internal,stdsimd)]
-macro_rules! is_powerpc64_feature_detected {
-    ("altivec") => {
-        cfg!(target_feature = "altivec") ||
-            $crate::detect::check_for($crate::detect::Feature::altivec)
-    };
-    ("vsx") => {
-        cfg!(target_feature = "vsx") ||
-            $crate::detect::check_for($crate::detect::Feature::vsx)
-    };
-    ("power8") => {
-        cfg!(target_feature = "power8") ||
-            $crate::detect::check_for($crate::detect::Feature::power8)
-    };
-    ($t:tt,) => {
-        is_powerpc64_feature_detected!($t);
-    };
-    ($t:tt) => { compile_error!(concat!("unknown powerpc64 target feature: ", $t)) };
-}
-
-
-/// PowerPC64 CPU Feature enum. Each variant denotes a position in a bitset
-/// for a particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-#[unstable(feature = "stdsimd_internal", issue = "0")]
-pub enum Feature {
+features! {
+    @TARGET: powerpc64;
+    @MACRO_NAME: is_powerpc64_feature_detected;
+    @MACRO_ATTRS:
+    /// Checks if `powerpc` feature is enabled.
+    #[unstable(feature = "stdsimd", issue = "27731")]
+    @FEATURE: altivec: "altivec";
     /// Altivec
-    altivec,
+    @FEATURE: vsx: "vsx";
     /// VSX
-    vsx,
+    @FEATURE: power8: "power8";
     /// Power8
-    power8,
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 50d5cfa87c4..470b8978dbc 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -15,334 +15,154 @@
 //! in a global `AtomicUsize` variable. The query is performed by just checking
 //! whether the feature bit in this global variable is set or cleared.
 
-/// A macro to test at *runtime* whether a CPU feature is available on
-/// x86/x86-64 platforms.
-///
-/// This macro is provided in the standard library and will detect at runtime
-/// whether the specified CPU feature is detected. This does **not** resolve at
-/// compile time unless the specified feature is already enabled for the entire
-/// crate. Runtime detection currently relies mostly on the `cpuid` instruction.
-///
-/// This macro only takes one argument which is a string literal of the feature
-/// being tested for. The feature names supported are the lowercase versions of
-/// the ones defined by Intel in [their documentation][docs].
-///
-/// ## Supported arguments
-///
-/// This macro supports the same names that `#[target_feature]` supports. Unlike
-/// `#[target_feature]`, however, this macro does not support names separated
-/// with a comma. Instead testing for multiple features must be done through
-/// separate macro invocations for now.
-///
-/// Supported arguments are:
-///
-/// * `"aes"`
-/// * `"pclmulqdq"`
-/// * `"rdrand"`
-/// * `"rdseed"`
-/// * `"tsc"`
-/// * `"mmx"`
-/// * `"sse"`
-/// * `"sse2"`
-/// * `"sse3"`
-/// * `"ssse3"`
-/// * `"sse4.1"`
-/// * `"sse4.2"`
-/// * `"sse4a"`
-/// * `"sha"`
-/// * `"avx"`
-/// * `"avx2"`
-/// * `"avx512f"`
-/// * `"avx512cd"`
-/// * `"avx512er"`
-/// * `"avx512pf"`
-/// * `"avx512bw"`
-/// * `"avx512dq"`
-/// * `"avx512vl"`
-/// * `"avx512ifma"`
-/// * `"avx512vbmi"`
-/// * `"avx512vpopcntdq"`
-/// * `"f16c"`
-/// * `"fma"`
-/// * `"bmi1"`
-/// * `"bmi2"`
-/// * `"abm"`
-/// * `"lzcnt"`
-/// * `"tbm"`
-/// * `"popcnt"`
-/// * `"fxsr"`
-/// * `"xsave"`
-/// * `"xsaveopt"`
-/// * `"xsaves"`
-/// * `"xsavec"`
-/// * `"adx"`
-/// * `"rtm"`
-///
-/// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
-#[macro_export]
-#[stable(feature = "simd_x86", since = "1.27.0")]
-#[allow_internal_unstable(stdsimd_internal,stdsimd)]
-macro_rules! is_x86_feature_detected {
-    ("aes") => {
-        cfg!(target_feature = "aes") || $crate::detect::check_for(
-            $crate::detect::Feature::aes)  };
-    ("pclmulqdq") => {
-        cfg!(target_feature = "pclmulqdq") || $crate::detect::check_for(
-            $crate::detect::Feature::pclmulqdq)  };
-    ("rdrand") => {
-        cfg!(target_feature = "rdrand") || $crate::detect::check_for(
-            $crate::detect::Feature::rdrand)  };
-    ("rdseed") => {
-        cfg!(target_feature = "rdseed") || $crate::detect::check_for(
-            $crate::detect::Feature::rdseed)  };
-    ("tsc") => {
-        cfg!(target_feature = "tsc") || $crate::detect::check_for(
-            $crate::detect::Feature::tsc)  };
-    ("mmx") => {
-        cfg!(target_feature = "mmx") || $crate::detect::check_for(
-            $crate::detect::Feature::mmx)  };
-    ("sse") => {
-        cfg!(target_feature = "sse") || $crate::detect::check_for(
-            $crate::detect::Feature::sse)  };
-    ("sse2") => {
-        cfg!(target_feature = "sse2") || $crate::detect::check_for(
-            $crate::detect::Feature::sse2)
-    };
-    ("sse3") => {
-        cfg!(target_feature = "sse3") || $crate::detect::check_for(
-            $crate::detect::Feature::sse3)
-    };
-    ("ssse3") => {
-        cfg!(target_feature = "ssse3") || $crate::detect::check_for(
-            $crate::detect::Feature::ssse3)
-    };
-    ("sse4.1") => {
-        cfg!(target_feature = "sse4.1") || $crate::detect::check_for(
-            $crate::detect::Feature::sse4_1)
-    };
-    ("sse4.2") => {
-        cfg!(target_feature = "sse4.2") || $crate::detect::check_for(
-            $crate::detect::Feature::sse4_2)
-    };
-    ("sse4a") => {
-        cfg!(target_feature = "sse4a") || $crate::detect::check_for(
-            $crate::detect::Feature::sse4a)
-    };
-    ("sha") => {
-        cfg!(target_feature = "sha") || $crate::detect::check_for(
-            $crate::detect::Feature::sha)
-    };
-    ("avx") => {
-        cfg!(target_feature = "avx") || $crate::detect::check_for(
-            $crate::detect::Feature::avx)
-    };
-    ("avx2") => {
-        cfg!(target_feature = "avx2") || $crate::detect::check_for(
-            $crate::detect::Feature::avx2)
-    };
-    ("avx512f") => {
-        cfg!(target_feature = "avx512f") || $crate::detect::check_for(
-            $crate::detect::Feature::avx512f)
-    };
-    ("avx512cd") => {
-        cfg!(target_feature = "avx512cd") || $crate::detect::check_for(
-            $crate::detect::Feature::avx512cd)
-    };
-    ("avx512er") => {
-        cfg!(target_feature = "avx512er") || $crate::detect::check_for(
-            $crate::detect::Feature::avx512er)
-    };
-    ("avx512pf") => {
-        cfg!(target_feature = "avx512pf") || $crate::detect::check_for(
-            $crate::detect::Feature::avx512pf)
-    };
-    ("avx512bw") => {
-        cfg!(target_feature = "avx512bw") || $crate::detect::check_for(
-            $crate::detect::Feature::avx512bw)
-    };
-    ("avx512dq") => {
-        cfg!(target_feature = "avx512dq") || $crate::detect::check_for(
-            $crate::detect::Feature::avx512dq)
-    };
-    ("avx512vl") => {
-        cfg!(target_Feature = "avx512vl") || $crate::detect::check_for(
-            $crate::detect::Feature::avx512vl)
-    };
-    ("avx512ifma") => {
-        cfg!(target_feature = "avx512ifma") || $crate::detect::check_for(
-            $crate::detect::Feature::avx512_ifma)
-    };
-    ("avx512vbmi") => {
-        cfg!(target_feature = "avx512vbmi") || $crate::detect::check_for(
-            $crate::detect::Feature::avx512_vbmi)
-    };
-    ("avx512vpopcntdq") => {
-        cfg!(target_feature = "avx512vpopcntdq") || $crate::detect::check_for(
-            $crate::detect::Feature::avx512_vpopcntdq)
-    };
-    ("f16c") => {
-        cfg!(target_feature = "f16c") || $crate::detect::check_for(
-            $crate::detect::Feature::f16c)
-    };
-    ("fma") => {
-        cfg!(target_feature = "fma") || $crate::detect::check_for(
-            $crate::detect::Feature::fma)
-    };
-    ("bmi1") => {
-        cfg!(target_feature = "bmi1") || $crate::detect::check_for(
-            $crate::detect::Feature::bmi)
-    };
-    ("bmi2") => {
-        cfg!(target_feature = "bmi2") || $crate::detect::check_for(
-            $crate::detect::Feature::bmi2)
-    };
-    ("abm") => {
-        cfg!(target_feature = "abm") || $crate::detect::check_for(
-            $crate::detect::Feature::abm)
-    };
-    ("lzcnt") => {
-        cfg!(target_feature = "lzcnt") || $crate::detect::check_for(
-            $crate::detect::Feature::abm)
-    };
-    ("tbm") => {
-        cfg!(target_feature = "tbm") || $crate::detect::check_for(
-            $crate::detect::Feature::tbm)
-    };
-    ("popcnt") => {
-        cfg!(target_feature = "popcnt") || $crate::detect::check_for(
-            $crate::detect::Feature::popcnt)
-    };
-    ("fxsr") => {
-        cfg!(target_feature = "fxsr") || $crate::detect::check_for(
-            $crate::detect::Feature::fxsr)
-    };
-    ("xsave") => {
-        cfg!(target_feature = "xsave") || $crate::detect::check_for(
-            $crate::detect::Feature::xsave)
-    };
-    ("xsaveopt") => {
-        cfg!(target_feature = "xsaveopt") || $crate::detect::check_for(
-            $crate::detect::Feature::xsaveopt)
-    };
-    ("xsaves") => {
-        cfg!(target_feature = "xsaves") || $crate::detect::check_for(
-            $crate::detect::Feature::xsaves)
-    };
-    ("xsavec") => {
-        cfg!(target_feature = "xsavec") || $crate::detect::check_for(
-            $crate::detect::Feature::xsavec)
-    };
-    ("cmpxchg16b") => {
-        cfg!(target_feature = "cmpxchg16b") || $crate::detect::check_for(
-            $crate::detect::Feature::cmpxchg16b)
-    };
-    ("adx") => {
-        cfg!(target_feature = "adx") || $crate::detect::check_for(
-            $crate::detect::Feature::adx)
-    };
-    ("rtm") => {
-        cfg!(target_feature = "rtm") || $crate::detect::check_for(
-            $crate::detect::Feature::rtm)
-    };
-    ($t:tt,) => {
-        is_x86_feature_detected!($t);
-    };
-    ($t:tt) => {
-        compile_error!(concat!("unknown target feature: ", $t))
-    };
-}
-
-/// X86 CPU Feature enum. Each variant denotes a position in a bitset for a
-/// particular feature.
-///
-/// This is an unstable implementation detail subject to change.
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-#[doc(hidden)]
-#[unstable(feature = "stdsimd_internal", issue = "0")]
-pub enum Feature {
+features! {
+    @TARGET: x86;
+    @MACRO_NAME: is_x86_feature_detected;
+    @MACRO_ATTRS:
+    /// A macro to test at *runtime* whether a CPU feature is available on
+    /// x86/x86-64 platforms.
+    ///
+    /// This macro is provided in the standard library and will detect at runtime
+    /// whether the specified CPU feature is detected. This does **not** resolve at
+    /// compile time unless the specified feature is already enabled for the entire
+    /// crate. Runtime detection currently relies mostly on the `cpuid` instruction.
+    ///
+    /// This macro only takes one argument which is a string literal of the feature
+    /// being tested for. The feature names supported are the lowercase versions of
+    /// the ones defined by Intel in [their documentation][docs].
+    ///
+    /// ## Supported arguments
+    ///
+    /// This macro supports the same names that `#[target_feature]` supports. Unlike
+    /// `#[target_feature]`, however, this macro does not support names separated
+    /// with a comma. Instead testing for multiple features must be done through
+    /// separate macro invocations for now.
+    ///
+    /// Supported arguments are:
+    ///
+    /// * `"aes"`
+    /// * `"pclmulqdq"`
+    /// * `"rdrand"`
+    /// * `"rdseed"`
+    /// * `"tsc"`
+    /// * `"mmx"`
+    /// * `"sse"`
+    /// * `"sse2"`
+    /// * `"sse3"`
+    /// * `"ssse3"`
+    /// * `"sse4.1"`
+    /// * `"sse4.2"`
+    /// * `"sse4a"`
+    /// * `"sha"`
+    /// * `"avx"`
+    /// * `"avx2"`
+    /// * `"avx512f"`
+    /// * `"avx512cd"`
+    /// * `"avx512er"`
+    /// * `"avx512pf"`
+    /// * `"avx512bw"`
+    /// * `"avx512dq"`
+    /// * `"avx512vl"`
+    /// * `"avx512ifma"`
+    /// * `"avx512vbmi"`
+    /// * `"avx512vpopcntdq"`
+    /// * `"fma"`
+    /// * `"bmi1"`
+    /// * `"bmi2"`
+    /// * `"abm"`
+    /// * `"lzcnt"`
+    /// * `"tbm"`
+    /// * `"popcnt"`
+    /// * `"fxsr"`
+    /// * `"xsave"`
+    /// * `"xsaveopt"`
+    /// * `"xsaves"`
+    /// * `"xsavec"`
+    ///
+    /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
+    #[stable(feature = "simd_x86", since = "1.27.0")]
+    @BIND_FEATURE_NAME: "abm"; "lzcnt"; // abm is a synonym for lzcnt
+    @FEATURE: aes: "aes";
     /// AES (Advanced Encryption Standard New Instructions AES-NI)
-    aes,
+    @FEATURE: pclmulqdq: "pclmulqdq";
     /// CLMUL (Carry-less Multiplication)
-    pclmulqdq,
+    @FEATURE: rdrand: "rdrand";
     /// RDRAND
-    rdrand,
+    @FEATURE: rdseed: "rdseed";
     /// RDSEED
-    rdseed,
+    @FEATURE: tsc: "tsc";
     /// TSC (Time Stamp Counter)
-    tsc,
-    /// MMX
-    mmx,
+    @FEATURE: mmx: "mmx";
+    /// MMX (MultiMedia eXtensions)
+    @FEATURE: sse: "sse";
     /// SSE (Streaming SIMD Extensions)
-    sse,
+    @FEATURE: sse2: "sse2";
     /// SSE2 (Streaming SIMD Extensions 2)
-    sse2,
+    @FEATURE: sse3: "sse3";
     /// SSE3 (Streaming SIMD Extensions 3)
-    sse3,
+    @FEATURE: ssse3: "ssse3";
     /// SSSE3 (Supplemental Streaming SIMD Extensions 3)
-    ssse3,
+    @FEATURE: sse4_1: "sse4.1";
     /// SSE4.1 (Streaming SIMD Extensions 4.1)
-    sse4_1,
+    @FEATURE: sse4_2: "sse4.2";
     /// SSE4.2 (Streaming SIMD Extensions 4.2)
-    sse4_2,
+    @FEATURE: sse4a: "sse4a";
     /// SSE4a (Streaming SIMD Extensions 4a)
-    sse4a,
+    @FEATURE: sha: "sha";
     /// SHA
-    sha,
+    @FEATURE: avx: "avx";
     /// AVX (Advanced Vector Extensions)
-    avx,
+    @FEATURE: avx2: "avx2";
     /// AVX2 (Advanced Vector Extensions 2)
-    avx2,
+    @FEATURE: avx512f: "avx512f" ;
     /// AVX-512 F (Foundation)
-    avx512f,
+    @FEATURE: avx512cd: "avx512cd" ;
     /// AVX-512 CD (Conflict Detection Instructions)
-    avx512cd,
-    /// AVX-512 ER (Exponential and Reciprocal Instructions)
-    avx512er,
+    @FEATURE: avx512er: "avx512er";
+    /// AVX-512 ER (Expo nential and Reciprocal Instructions)
+    @FEATURE: avx512pf: "avx512pf";
     /// AVX-512 PF (Prefetch Instructions)
-    avx512pf,
+    @FEATURE: avx512bw: "avx512bw";
     /// AVX-512 BW (Byte and Word Instructions)
-    avx512bw,
+    @FEATURE: avx512dq: "avx512dq";
     /// AVX-512 DQ (Doubleword and Quadword)
-    avx512dq,
+    @FEATURE: avx512vl: "avx512vl";
     /// AVX-512 VL (Vector Length Extensions)
-    avx512vl,
+    @FEATURE: avx512ifma: "avx512ifma";
     /// AVX-512 IFMA (Integer Fused Multiply Add)
-    avx512_ifma,
+    @FEATURE: avx512vbmi: "avx512vbmi";
     /// AVX-512 VBMI (Vector Byte Manipulation Instructions)
-    avx512_vbmi,
+    @FEATURE: avx512vpopcntdq: "avx512vpopcntdq";
     /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
     /// Quadword)
-    avx512_vpopcntdq,
+    @FEATURE: f16c: "f16c";
     /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
-    f16c,
+    @FEATURE: fma: "fma";
     /// FMA (Fused Multiply Add)
-    fma,
+    @FEATURE: bmi1: "bmi1" ;
     /// BMI1 (Bit Manipulation Instructions 1)
-    bmi,
-    /// BMI1 (Bit Manipulation Instructions 2)
-    bmi2,
-    /// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero
-    /// Count) on Intel
-    abm,
+    @FEATURE: bmi2: "bmi2" ;
+    /// BMI2 (Bit Manipulation Instructions 2)
+    @FEATURE: lzcnt: "lzcnt";
+    /// ABM (Advanced Bit Manipulation) / LZCNT (Leading Zero Count)
+    @FEATURE: tbm: "tbm";
     /// TBM (Trailing Bit Manipulation)
-    tbm,
+    @FEATURE: popcnt: "popcnt";
     /// POPCNT (Population Count)
-    popcnt,
+    @FEATURE: fxsr: "fxsr";
     /// FXSR (Floating-point context fast save and restor)
-    fxsr,
+    @FEATURE: xsave: "xsave";
     /// XSAVE (Save Processor Extended States)
-    xsave,
+    @FEATURE: xsaveopt: "xsaveopt";
     /// XSAVEOPT (Save Processor Extended States Optimized)
-    xsaveopt,
+    @FEATURE: xsaves: "xsaves";
     /// XSAVES (Save Processor Extended States Supervisor)
-    xsaves,
+    @FEATURE: xsavec: "xsavec";
     /// XSAVEC (Save Processor Extended States Compacted)
-    xsavec,
-    /// CMPXCH16B, a 16-byte compare-and-swap instruction
-    cmpxchg16b,
+    @FEATURE: cmpxchg16b: "cmpxchg16b";
+    /// CMPXCH16B (16-byte compare-and-swap instruction)
+    @FEATURE: adx: "adx";
     /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-    adx,
+    @FEATURE: rtm: "rtm";
     /// RTM, Intel (Restricted Transactional Memory)
-    rtm,
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
new file mode 100644
index 00000000000..09f9f524b52
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -0,0 +1,74 @@
+macro_rules! features {
+    (
+      @TARGET: $target:ident;
+      @MACRO_NAME: $macro_name:ident;
+      @MACRO_ATTRS: $(#[$macro_attrs:meta])*
+      $(@BIND_FEATURE_NAME: $bind_feature:tt; $feature_impl:tt; )*
+      $(@NO_RUNTIME_DETECTION: $nort_feature:tt; )*
+      $(@FEATURE: $feature:ident: $feature_lit:tt; $(#[$feature_comment:meta])*)*
+    ) => {
+        #[macro_export]
+        $(#[$macro_attrs])*
+        #[allow_internal_unstable(stdsimd_internal,stdsimd)]
+        macro_rules! $macro_name {
+            $(
+                ($feature_lit) => {
+                    cfg!(target_feature = $feature_lit) ||
+                        $crate::detect::check_for($crate::detect::Feature::$feature)
+                };
+            )*
+            $(
+                ($bind_feature) => { $macro_name!($feature_impl); };
+            )*
+            $(
+                ($nort_feature) => {
+                    compile_error!(
+                        concat!(
+                            stringify!(nort_feature),
+                            " feature cannot be detected at run-time"
+                        )
+                    )
+                };
+            )*
+            ($t:tt,) => {
+                    $macro_name!($t);
+            };
+            ($t:tt) => {
+                compile_error!(
+                    concat!(
+                        concat!("unknown ", stringify!($target)),
+                        concat!(" target feature: ", $t)
+                    )
+                )
+            };
+        }
+
+        /// Each variant denotes a position in a bitset for a particular feature.
+        ///
+        /// PLEASE: do not use this, it is an implementation detail subject
+        /// to change.
+        #[doc(hidden)]
+        #[allow(non_camel_case_types)]
+        #[derive(Copy, Clone)]
+        #[repr(u8)]
+        #[unstable(feature = "stdsimd_internal", issue = "0")]
+        pub enum Feature {
+            $(
+                $(#[$feature_comment])*
+                $feature,
+            )*
+
+            // Do not add variants after last:
+            _last
+        }
+
+        impl Feature {
+            pub fn to_str(self) -> &'static str {
+                match self {
+                    $(Feature::$feature => $feature_lit,)*
+                    Feature::_last => unreachable!(),
+                }
+            }
+        }
+    };
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 3e00c205547..21062782b71 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -20,6 +20,9 @@
 #[macro_use]
 mod error_macros;
 
+#[macro_use]
+mod macros;
+
 cfg_if! {
     if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
         #[path = "arch/x86.rs"]
@@ -96,3 +99,36 @@ cfg_if! {
 pub fn check_for(x: Feature) -> bool {
     cache::test(x as u32, self::os::detect_features)
 }
+
+/// Returns an `Iterator<Item=(&'static str, bool)>` where
+/// `Item.0` is the feature name, and `Item.1` is a `bool` which
+/// is `true` if the feature is supported by the host and `false` otherwise.
+#[unstable(feature = "stdsimd", issue = "27731")]
+pub fn features() -> impl Iterator<Item = (&'static str, bool)> {
+    cfg_if! {
+        if #[cfg(any(
+            target_arch = "x86",
+            target_arch = "x86_64",
+            target_arch = "arm",
+            target_arch = "aarch64",
+            target_arch = "powerpc",
+            target_arch = "powerpc64",
+            target_arch = "mips",
+            target_arch = "mips64",
+        ))] {
+            fn impl_() -> impl Iterator<Item=(&'static str, bool)> {
+                (0_u8..Feature::_last as u8).map(|discriminant: u8| {
+                    let f: Feature = unsafe { crate::mem::transmute(discriminant) };
+                    let name: &'static str = f.to_str();
+                    let enabled: bool = check_for(f);
+                    (name, enabled)
+                })
+            }
+        } else {
+            fn impl_() -> impl Iterator<Item=(&'static str, bool)> {
+                (0_u8..0_u8).map(|_x: u8| ("", false))
+            }
+        }
+    }
+    impl_()
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index d9d286071a2..6daeb2d1a1e 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -126,7 +126,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
         enable(proc_info_edx, 26, Feature::sse2);
         enable(extended_features_ebx, 29, Feature::sha);
 
-        enable(extended_features_ebx, 3, Feature::bmi);
+        enable(extended_features_ebx, 3, Feature::bmi1);
         enable(extended_features_ebx, 8, Feature::bmi2);
 
         // `XSAVE` and `AVX` support:
@@ -203,17 +203,17 @@ pub(crate) fn detect_features() -> cache::Initializer {
                     if os_avx512_support {
                         enable(extended_features_ebx, 16, Feature::avx512f);
                         enable(extended_features_ebx, 17, Feature::avx512dq);
-                        enable(extended_features_ebx, 21, Feature::avx512_ifma);
+                        enable(extended_features_ebx, 21, Feature::avx512ifma);
                         enable(extended_features_ebx, 26, Feature::avx512pf);
                         enable(extended_features_ebx, 27, Feature::avx512er);
                         enable(extended_features_ebx, 28, Feature::avx512cd);
                         enable(extended_features_ebx, 30, Feature::avx512bw);
                         enable(extended_features_ebx, 31, Feature::avx512vl);
-                        enable(extended_features_ecx, 1, Feature::avx512_vbmi);
+                        enable(extended_features_ecx, 1, Feature::avx512vbmi);
                         enable(
                             extended_features_ecx,
                             14,
-                            Feature::avx512_vpopcntdq,
+                            Feature::avx512vpopcntdq,
                         );
                     }
                 }
@@ -227,7 +227,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
         //
         // The `is_x86_feature_detected!("lzcnt")` macro then
         // internally maps to Feature::abm.
-        enable(extended_proc_info_ecx, 5, Feature::abm);
+        enable(extended_proc_info_ecx, 5, Feature::lzcnt);
+
         // As Hygon Dhyana originates from AMD technology and shares most of the architecture with
         // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series
         // number(Family 18h).
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index d0c9901c4cd..d441f4e1eff 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -1,7 +1,6 @@
 #![feature(stdsimd)]
 #![allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout)]
-
-#[cfg(any(
+#![cfg(any(
     target_arch = "arm",
     target_arch = "aarch64",
     target_arch = "x86",
@@ -9,9 +8,17 @@
     target_arch = "powerpc",
     target_arch = "powerpc64"
 ))]
+
 #[macro_use]
 extern crate std_detect;
 
+#[test]
+fn all() {
+    for (f, e) in std_detect::detect::features() {
+        println!("{}: {}", f, e);
+    }
+}
+
 #[test]
 #[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android")))]
 fn arm_linux() {
@@ -73,17 +80,17 @@ fn x86_all() {
     println!("sha: {:?}", is_x86_feature_detected!("sha"));
     println!("avx: {:?}", is_x86_feature_detected!("avx"));
     println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
-    println!("avx512f {:?}", is_x86_feature_detected!("avx512f"));
-    println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd"));
-    println!("avx512er {:?}", is_x86_feature_detected!("avx512er"));
-    println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf"));
-    println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw"));
-    println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq"));
-    println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl"));
-    println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma"));
-    println!("avx512_vbmi {:?}", is_x86_feature_detected!("avx512vbmi"));
+    println!("avx512f: {:?}", is_x86_feature_detected!("avx512f"));
+    println!("avx512cd: {:?}", is_x86_feature_detected!("avx512cd"));
+    println!("avx512er: {:?}", is_x86_feature_detected!("avx512er"));
+    println!("avx512pf: {:?}", is_x86_feature_detected!("avx512pf"));
+    println!("avx512bw: {:?}", is_x86_feature_detected!("avx512bw"));
+    println!("avx512dq: {:?}", is_x86_feature_detected!("avx512dq"));
+    println!("avx512vl: {:?}", is_x86_feature_detected!("avx512vl"));
+    println!("avx512ifma: {:?}", is_x86_feature_detected!("avx512ifma"));
+    println!("avx512vbmi: {:?}", is_x86_feature_detected!("avx512vbmi"));
     println!(
-        "avx512_vpopcntdq {:?}",
+        "avx512vpopcntdq: {:?}",
         is_x86_feature_detected!("avx512vpopcntdq")
     );
     println!("f16c: {:?}", is_x86_feature_detected!("f16c"));
-- 
cgit 1.4.1-3-g733a5


From 599bcf28ad02c0ea7473a07b088553813a649487 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Mon, 16 Sep 2019 16:56:12 +0200
Subject: Enforce staged_api on a per-feature basis

---
 .../crates/std_detect/src/detect/arch/aarch64.rs   | 22 +++---
 .../crates/std_detect/src/detect/arch/arm.rs       |  4 +-
 .../crates/std_detect/src/detect/arch/mips.rs      |  2 +-
 .../crates/std_detect/src/detect/arch/mips64.rs    |  2 +-
 .../crates/std_detect/src/detect/arch/powerpc.rs   |  6 +-
 .../crates/std_detect/src/detect/arch/powerpc64.rs |  6 +-
 .../crates/std_detect/src/detect/arch/x86.rs       | 82 +++++++++++-----------
 .../stdarch/crates/std_detect/src/detect/macros.rs | 28 ++++++--
 .../stdarch/crates/std_detect/src/detect/mod.rs    |  2 +-
 9 files changed, 87 insertions(+), 67 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 73e54052b82..154207e5ad1 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -11,26 +11,26 @@ features! {
     @NO_RUNTIME_DETECTION: "v8.1a";
     @NO_RUNTIME_DETECTION: "v8.2a";
     @NO_RUNTIME_DETECTION: "v8.3a";
-    @FEATURE: asimd: "neon";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] asimd: "neon";
     /// ARM Advanced SIMD (ASIMD)
-    @FEATURE: pmull: "pmull";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] pmull: "pmull";
     /// Polynomial Multiply
-    @FEATURE: fp: "fp";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] fp: "fp";
     /// Floating point support
-    @FEATURE: fp16: "fp16";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] fp16: "fp16";
     /// Half-float support.
-    @FEATURE: sve: "sve";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve: "sve";
     /// Scalable Vector Extension (SVE)
-    @FEATURE: crc: "crc";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crc: "crc";
     /// CRC32 (Cyclic Redundancy Check)
-    @FEATURE: crypto: "crypto";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crypto: "crypto";
     /// Crypto: AES + PMULL + SHA1 + SHA2
-    @FEATURE: lse: "lse";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] lse: "lse";
     /// Atomics (Large System Extension)
-    @FEATURE: rdm: "rdm";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rdm: "rdm";
     /// Rounding Double Multiply (ASIMDRDM)
-    @FEATURE: rcpc: "rcpc";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rcpc: "rcpc";
     /// Release consistent Processor consistent (RcPc)
-    @FEATURE: dotprod: "dotprod";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dotprod: "dotprod";
     /// Vector Dot-Product (ASIMDDP)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index 86dc6dbbf23..02bf654945d 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -10,8 +10,8 @@ features! {
     @NO_RUNTIME_DETECTION: "vfp2";
     @NO_RUNTIME_DETECTION: "vfp3";
     @NO_RUNTIME_DETECTION: "vfp4";
-    @FEATURE: neon: "neon";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] neon: "neon";
     /// ARM Advanced SIMD (NEON) - Aarch32
-    @FEATURE: pmull: "pmull";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] pmull: "pmull";
     /// Polynomial Multiply
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
index 1ac540ef737..ada81b83ec5 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
@@ -6,6 +6,6 @@ features! {
     @MACRO_ATTRS:
     /// Checks if `mips` feature is enabled.
     #[unstable(feature = "stdsimd", issue = "27731")]
-    @FEATURE: msa: "msa";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] msa: "msa";
     /// MIPS SIMD Architecture (MSA)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
index 33ad3bca768..6a0bb159ba7 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
@@ -6,6 +6,6 @@ features! {
     @MACRO_ATTRS:
     /// Checks if `mips64` feature is enabled.
     #[unstable(feature = "stdsimd", issue = "27731")]
-    @FEATURE: msa: "msa";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] msa: "msa";
     /// MIPS SIMD Architecture (MSA)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
index 6cc35a24110..44bd7f337f6 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
@@ -6,10 +6,10 @@ features! {
     @MACRO_ATTRS:
     /// Checks if `powerpc` feature is enabled.
     #[unstable(feature = "stdsimd", issue = "27731")]
-    @FEATURE: altivec: "altivec";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] altivec: "altivec";
     /// Altivec
-    @FEATURE: vsx: "vsx";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] vsx: "vsx";
     /// VSX
-    @FEATURE: power8: "power8";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] power8: "power8";
     /// Power8
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
index a075c94190e..17e4d958bf1 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
@@ -6,10 +6,10 @@ features! {
     @MACRO_ATTRS:
     /// Checks if `powerpc` feature is enabled.
     #[unstable(feature = "stdsimd", issue = "27731")]
-    @FEATURE: altivec: "altivec";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] altivec: "altivec";
     /// Altivec
-    @FEATURE: vsx: "vsx";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] vsx: "vsx";
     /// VSX
-    @FEATURE: power8: "power8";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] power8: "power8";
     /// Power8
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 470b8978dbc..ae0bebdae5b 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -82,87 +82,87 @@ features! {
     /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
     #[stable(feature = "simd_x86", since = "1.27.0")]
     @BIND_FEATURE_NAME: "abm"; "lzcnt"; // abm is a synonym for lzcnt
-    @FEATURE: aes: "aes";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] aes: "aes";
     /// AES (Advanced Encryption Standard New Instructions AES-NI)
-    @FEATURE: pclmulqdq: "pclmulqdq";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] pclmulqdq: "pclmulqdq";
     /// CLMUL (Carry-less Multiplication)
-    @FEATURE: rdrand: "rdrand";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")]  rdrand: "rdrand";
     /// RDRAND
-    @FEATURE: rdseed: "rdseed";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rdseed: "rdseed";
     /// RDSEED
-    @FEATURE: tsc: "tsc";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] tsc: "tsc";
     /// TSC (Time Stamp Counter)
-    @FEATURE: mmx: "mmx";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] mmx: "mmx";
     /// MMX (MultiMedia eXtensions)
-    @FEATURE: sse: "sse";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse: "sse";
     /// SSE (Streaming SIMD Extensions)
-    @FEATURE: sse2: "sse2";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse2: "sse2";
     /// SSE2 (Streaming SIMD Extensions 2)
-    @FEATURE: sse3: "sse3";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse3: "sse3";
     /// SSE3 (Streaming SIMD Extensions 3)
-    @FEATURE: ssse3: "ssse3";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ssse3: "ssse3";
     /// SSSE3 (Supplemental Streaming SIMD Extensions 3)
-    @FEATURE: sse4_1: "sse4.1";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4_1: "sse4.1";
     /// SSE4.1 (Streaming SIMD Extensions 4.1)
-    @FEATURE: sse4_2: "sse4.2";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4_2: "sse4.2";
     /// SSE4.2 (Streaming SIMD Extensions 4.2)
-    @FEATURE: sse4a: "sse4a";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sse4a: "sse4a";
     /// SSE4a (Streaming SIMD Extensions 4a)
-    @FEATURE: sha: "sha";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sha: "sha";
     /// SHA
-    @FEATURE: avx: "avx";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx: "avx";
     /// AVX (Advanced Vector Extensions)
-    @FEATURE: avx2: "avx2";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx2: "avx2";
     /// AVX2 (Advanced Vector Extensions 2)
-    @FEATURE: avx512f: "avx512f" ;
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512f: "avx512f" ;
     /// AVX-512 F (Foundation)
-    @FEATURE: avx512cd: "avx512cd" ;
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512cd: "avx512cd" ;
     /// AVX-512 CD (Conflict Detection Instructions)
-    @FEATURE: avx512er: "avx512er";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512er: "avx512er";
     /// AVX-512 ER (Expo nential and Reciprocal Instructions)
-    @FEATURE: avx512pf: "avx512pf";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512pf: "avx512pf";
     /// AVX-512 PF (Prefetch Instructions)
-    @FEATURE: avx512bw: "avx512bw";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512bw: "avx512bw";
     /// AVX-512 BW (Byte and Word Instructions)
-    @FEATURE: avx512dq: "avx512dq";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512dq: "avx512dq";
     /// AVX-512 DQ (Doubleword and Quadword)
-    @FEATURE: avx512vl: "avx512vl";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vl: "avx512vl";
     /// AVX-512 VL (Vector Length Extensions)
-    @FEATURE: avx512ifma: "avx512ifma";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512ifma: "avx512ifma";
     /// AVX-512 IFMA (Integer Fused Multiply Add)
-    @FEATURE: avx512vbmi: "avx512vbmi";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vbmi: "avx512vbmi";
     /// AVX-512 VBMI (Vector Byte Manipulation Instructions)
-    @FEATURE: avx512vpopcntdq: "avx512vpopcntdq";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vpopcntdq: "avx512vpopcntdq";
     /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
     /// Quadword)
-    @FEATURE: f16c: "f16c";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] f16c: "f16c";
     /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
-    @FEATURE: fma: "fma";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma";
     /// FMA (Fused Multiply Add)
-    @FEATURE: bmi1: "bmi1" ;
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] bmi1: "bmi1" ;
     /// BMI1 (Bit Manipulation Instructions 1)
-    @FEATURE: bmi2: "bmi2" ;
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] bmi2: "bmi2" ;
     /// BMI2 (Bit Manipulation Instructions 2)
-    @FEATURE: lzcnt: "lzcnt";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] lzcnt: "lzcnt";
     /// ABM (Advanced Bit Manipulation) / LZCNT (Leading Zero Count)
-    @FEATURE: tbm: "tbm";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] tbm: "tbm";
     /// TBM (Trailing Bit Manipulation)
-    @FEATURE: popcnt: "popcnt";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] popcnt: "popcnt";
     /// POPCNT (Population Count)
-    @FEATURE: fxsr: "fxsr";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fxsr: "fxsr";
     /// FXSR (Floating-point context fast save and restor)
-    @FEATURE: xsave: "xsave";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsave: "xsave";
     /// XSAVE (Save Processor Extended States)
-    @FEATURE: xsaveopt: "xsaveopt";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsaveopt: "xsaveopt";
     /// XSAVEOPT (Save Processor Extended States Optimized)
-    @FEATURE: xsaves: "xsaves";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsaves: "xsaves";
     /// XSAVES (Save Processor Extended States Supervisor)
-    @FEATURE: xsavec: "xsavec";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsavec: "xsavec";
     /// XSAVEC (Save Processor Extended States Compacted)
-    @FEATURE: cmpxchg16b: "cmpxchg16b";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] cmpxchg16b: "cmpxchg16b";
     /// CMPXCH16B (16-byte compare-and-swap instruction)
-    @FEATURE: adx: "adx";
+    @FEATURE: #[stable(feature = "simd_x86_adx", since = "1.33.0")] adx: "adx";
     /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-    @FEATURE: rtm: "rtm";
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rtm: "rtm";
     /// RTM, Intel (Restricted Transactional Memory)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 09f9f524b52..8fc8b2b4060 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -5,16 +5,15 @@ macro_rules! features {
       @MACRO_ATTRS: $(#[$macro_attrs:meta])*
       $(@BIND_FEATURE_NAME: $bind_feature:tt; $feature_impl:tt; )*
       $(@NO_RUNTIME_DETECTION: $nort_feature:tt; )*
-      $(@FEATURE: $feature:ident: $feature_lit:tt; $(#[$feature_comment:meta])*)*
+      $(@FEATURE: #[$stability_attr:meta] $feature:ident: $feature_lit:tt; $(#[$feature_comment:meta])*)*
     ) => {
         #[macro_export]
         $(#[$macro_attrs])*
-        #[allow_internal_unstable(stdsimd_internal,stdsimd)]
+        #[allow_internal_unstable(stdsimd_internal,stdsimd,staged_api)]
         macro_rules! $macro_name {
             $(
                 ($feature_lit) => {
-                    cfg!(target_feature = $feature_lit) ||
-                        $crate::detect::check_for($crate::detect::Feature::$feature)
+                    $crate::detect::__is_feature_detected::$feature()
                 };
             )*
             $(
@@ -70,5 +69,26 @@ macro_rules! features {
                 }
             }
         }
+
+        /// Each function performs run-time feature detection for a single
+        /// feature. This allow us to use stability attributes on a per feature
+        /// basis.
+        ///
+        /// PLEASE: do not use this, it is an implementation detail subject
+        /// to change.
+        #[doc(hidden)]
+        pub mod __is_feature_detected {
+            $(
+
+                /// PLEASE: do not use this, it is an implementation detail
+                /// subject to change.
+                #[doc(hidden)]
+                #[$stability_attr]
+                pub fn $feature() -> bool {
+                    cfg!(target_feature = $feature_lit) ||
+                        $crate::detect::check_for($crate::detect::Feature::$feature)
+                }
+            )*
+        }
     };
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 21062782b71..04db5c402ed 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -61,7 +61,7 @@ cfg_if! {
         }
     }
 }
-pub use self::arch::Feature;
+pub use self::arch::{Feature, __is_feature_detected};
 
 mod bit;
 mod cache;
-- 
cgit 1.4.1-3-g733a5


From 226b3265c8fda97254c6b52f778ac7c54a76a3ad Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Mon, 16 Sep 2019 17:42:00 +0200
Subject: Format

---
 library/stdarch/crates/core_arch/src/mips/msa.rs   |   6 +-
 .../crates/std_detect/src/detect/os/aarch64.rs     |  19 ++-
 .../crates/std_detect/src/detect/os/freebsd/arm.rs |   4 +-
 .../std_detect/src/detect/os/freebsd/auxvec.rs     |  36 +++---
 .../std_detect/src/detect/os/freebsd/powerpc.rs    |   4 +-
 .../std_detect/src/detect/os/linux/aarch64.rs      |  21 ++--
 .../crates/std_detect/src/detect/os/linux/arm.rs   |   9 +-
 .../std_detect/src/detect/os/linux/auxvec.rs       |  33 +++--
 .../std_detect/src/detect/os/linux/cpuinfo.rs      |   5 +-
 .../crates/std_detect/src/detect/os/linux/mips.rs  |   2 +-
 .../std_detect/src/detect/os/linux/powerpc.rs      |   2 +-
 .../stdarch/crates/std_detect/src/detect/os/x86.rs |  26 ++--
 .../stdarch/crates/stdarch-test/src/disassembly.rs | 139 ++++++++++-----------
 library/stdarch/crates/stdarch-test/src/wasm.rs    |   3 +-
 14 files changed, 155 insertions(+), 154 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/mips/msa.rs b/library/stdarch/crates/core_arch/src/mips/msa.rs
index f93bcdbae70..29009b5d7c6 100644
--- a/library/stdarch/crates/core_arch/src/mips/msa.rs
+++ b/library/stdarch/crates/core_arch/src/mips/msa.rs
@@ -13531,7 +13531,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_frint_w() {
         #[rustfmt::skip]
-        let a = f32x4::new(2.6, -2.7, 1.3, -1.7);;
+        let a = f32x4::new(2.6, -2.7, 1.3, -1.7);
         #[rustfmt::skip]
         let r = f32x4::new(3.0, -3.0, 1.0, -2.0);
 
@@ -13551,7 +13551,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_frcp_w() {
         #[rustfmt::skip]
-        let a = f32x4::new(2.6, -2.7, 1.3, -1.7);;
+        let a = f32x4::new(2.6, -2.7, 1.3, -1.7);
         #[rustfmt::skip]
         let r = f32x4::new(
             0.3846154, -0.37037036, 
@@ -13574,7 +13574,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_frsqrt_w() {
         #[rustfmt::skip]
-        let a = f32x4::new(2.6, 2.7, 1.3, 1.7);;
+        let a = f32x4::new(2.6, 2.7, 1.3, 1.7);
         #[rustfmt::skip]
         let r = f32x4::new(
             0.6201737, 0.6085806, 
diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
index dfb8c87707f..9adc938a264 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -16,7 +16,7 @@
 //! - [Zircon implementation](https://fuchsia.googlesource.com/zircon/+/master/kernel/arch/arm64/feature.cpp)
 //! - [Linux documentation](https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt)
 
-use crate::detect::{Feature, cache};
+use crate::detect::{cache, Feature};
 
 /// Try to read the features from the system registers.
 ///
@@ -33,7 +33,9 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
         // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
         let aa64isar0: u64;
-        unsafe { asm!("mrs $0, ID_AA64ISAR0_EL1" : "=r"(aa64isar0)); }
+        unsafe {
+            asm!("mrs $0, ID_AA64ISAR0_EL1" : "=r"(aa64isar0));
+        }
 
         let aes = bits_shift(aa64isar0, 7, 4) >= 1;
         let pmull = bits_shift(aa64isar0, 7, 4) >= 2;
@@ -47,7 +49,9 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
         // ID_AA64PFR0_EL1 - Processor Feature Register 0
         let aa64pfr0: u64;
-        unsafe { asm!("mrs $0, ID_AA64PFR0_EL1" : "=r"(aa64pfr0)); }
+        unsafe {
+            asm!("mrs $0, ID_AA64PFR0_EL1" : "=r"(aa64pfr0));
+        }
 
         let fp = bits_shift(aa64pfr0, 19, 16) < 0xF;
         let fphp = bits_shift(aa64pfr0, 19, 16) >= 1;
@@ -60,12 +64,17 @@ pub(crate) fn detect_features() -> cache::Initializer {
         enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp));
         // SIMD extensions require SIMD support:
         enable_feature(Feature::rdm, asimd && bits_shift(aa64isar0, 31, 28) >= 1);
-        enable_feature(Feature::dotprod, asimd && bits_shift(aa64isar0, 47, 44) >= 1);
+        enable_feature(
+            Feature::dotprod,
+            asimd && bits_shift(aa64isar0, 47, 44) >= 1,
+        );
         enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1);
 
         // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1
         let aa64isar1: u64;
-        unsafe { asm!("mrs $0, ID_AA64ISAR1_EL1" : "=r"(aa64isar1)); }
+        unsafe {
+            asm!("mrs $0, ID_AA64ISAR1_EL1" : "=r"(aa64isar1));
+        }
 
         enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1);
     }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
index 38e37721ef3..4c9d763b446 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for ARM on FreeBSD
 
-use crate::detect::{Feature, cache};
-use super::{auxvec};
+use super::auxvec;
+use crate::detect::{cache, Feature};
 
 /// Try to read the features from the auxiliary vector
 pub(crate) fn detect_features() -> cache::Initializer {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
index a2bac767601..c595ec459b5 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
@@ -44,13 +44,13 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
 fn archauxv(key: usize) -> Result<usize, ()> {
     use crate::mem;
 
-    #[derive (Copy, Clone)]
+    #[derive(Copy, Clone)]
     #[repr(C)]
     pub struct Elf_Auxinfo {
         pub a_type: usize,
         pub a_un: unnamed,
     }
-    #[derive (Copy, Clone)]
+    #[derive(Copy, Clone)]
     #[repr(C)]
     pub union unnamed {
         pub a_val: libc::c_long,
@@ -58,22 +58,30 @@ fn archauxv(key: usize) -> Result<usize, ()> {
         pub a_fcn: Option<unsafe extern "C" fn() -> ()>,
     }
 
-    let mut auxv: [Elf_Auxinfo; 27] =
-        [Elf_Auxinfo{a_type: 0, a_un: unnamed{a_val: 0,},}; 27];
+    let mut auxv: [Elf_Auxinfo; 27] = [Elf_Auxinfo {
+        a_type: 0,
+        a_un: unnamed { a_val: 0 },
+    }; 27];
 
     let mut len: libc::c_uint = mem::size_of_val(&auxv) as libc::c_uint;
 
     unsafe {
-        let mut mib = [libc::CTL_KERN, libc::KERN_PROC, libc::KERN_PROC_AUXV, libc::getpid()];
-    
-        let ret = libc::sysctl(mib.as_mut_ptr(),
-                       mib.len() as u32,
-                       &mut auxv as *mut _ as *mut _,
-                       &mut len as *mut _ as *mut _,
-                       0 as *mut libc::c_void,
-                       0,
-                );
-    
+        let mut mib = [
+            libc::CTL_KERN,
+            libc::KERN_PROC,
+            libc::KERN_PROC_AUXV,
+            libc::getpid(),
+        ];
+
+        let ret = libc::sysctl(
+            mib.as_mut_ptr(),
+            mib.len() as u32,
+            &mut auxv as *mut _ as *mut _,
+            &mut len as *mut _ as *mut _,
+            0 as *mut libc::c_void,
+            0,
+        );
+
         if ret != -1 {
             for i in 0..auxv.len() {
                 if auxv[i].a_type == key {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs
index 52a71e672a3..6bfab631a9c 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for PowerPC on FreeBSD.
 
-use crate::detect::{Feature, cache};
-use super::{auxvec};
+use super::auxvec;
+use crate::detect::{cache, Feature};
 
 pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index 43c0976daf3..b1b68f763e5 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for Aarch64 on Linux.
 
-use crate::detect::{Feature, cache, bit};
 use super::{auxvec, cpuinfo};
+use crate::detect::{bit, cache, Feature};
 
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from /proc/cpuinfo.
@@ -21,16 +21,16 @@ pub(crate) fn detect_features() -> cache::Initializer {
 ///
 /// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
 struct AtHwcap {
-    fp: bool, // 0
+    fp: bool,    // 0
     asimd: bool, // 1
     // evtstrm: bool, // 2
-    aes: bool, // 3
-    pmull: bool, // 4
-    sha1: bool, // 5
-    sha2: bool, // 6
-    crc32: bool, // 7
+    aes: bool,     // 3
+    pmull: bool,   // 4
+    sha1: bool,    // 5
+    sha2: bool,    // 6
+    crc32: bool,   // 7
     atomics: bool, // 8
-    fphp: bool, // 9
+    fphp: bool,    // 9
     asimdhp: bool, // 10
     // cpuid: bool, // 11
     asimdrdm: bool, // 12
@@ -144,7 +144,10 @@ impl AtHwcap {
             enable_feature(Feature::sve, self.sve && asimd);
 
             // Crypto is specified as AES + PMULL + SHA1 + SHA2 per LLVM/hosts.cpp
-            enable_feature(Feature::crypto, self.aes && self.pmull && self.sha1 && self.sha2);
+            enable_feature(
+                Feature::crypto,
+                self.aes && self.pmull && self.sha1 && self.sha2,
+            );
         }
         value
     }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
index 95624e1285d..a1b28dad70f 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for ARM on Linux.
 
-use crate::detect::{Feature, cache, bit};
 use super::{auxvec, cpuinfo};
+use crate::detect::{bit, cache, Feature};
 
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from /proc/cpuinfo.
@@ -23,8 +23,11 @@ pub(crate) fn detect_features() -> cache::Initializer {
     }
 
     if let Ok(c) = cpuinfo::CpuInfo::new() {
-        enable_feature(&mut value, Feature::neon, c.field("Features").has("neon") &&
-            !has_broken_neon(&c));
+        enable_feature(
+            &mut value,
+            Feature::neon,
+            c.field("Features").has("neon") && !has_broken_neon(&c),
+        );
         enable_feature(&mut value, Feature::pmull, c.field("Features").has("pmull"));
         return value;
     }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 07b6432eafd..6ebae67fbf8 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -51,12 +51,12 @@ pub(crate) struct AuxVec {
 /// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
 /// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/
 pub(crate) fn auxv() -> Result<AuxVec, ()> {
-    #[cfg(feature = "std_detect_dlsym_getauxval")] {
+    #[cfg(feature = "std_detect_dlsym_getauxval")]
+    {
         // Try to call a dynamically-linked getauxval function.
         if let Ok(hwcap) = getauxval(AT_HWCAP) {
             // Targets with only AT_HWCAP:
-            #[cfg(any(target_arch = "aarch64", target_arch = "mips",
-                      target_arch = "mips64"))]
+            #[cfg(any(target_arch = "aarch64", target_arch = "mips", target_arch = "mips64"))]
             {
                 if hwcap != 0 {
                     return Ok(AuxVec { hwcap });
@@ -74,22 +74,24 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
             }
             drop(hwcap);
         }
-        #[cfg(feature = "std_detect_file_io")] {
+        #[cfg(feature = "std_detect_file_io")]
+        {
             // If calling getauxval fails, try to read the auxiliary vector from
             // its file:
             auxv_from_file("/proc/self/auxv")
         }
-        #[cfg(not(feature = "std_detect_file_io"))] {
+        #[cfg(not(feature = "std_detect_file_io"))]
+        {
             Err(())
         }
     }
 
-    #[cfg(not(feature = "std_detect_dlsym_getauxval"))] {
+    #[cfg(not(feature = "std_detect_dlsym_getauxval"))]
+    {
         let hwcap = unsafe { ffi_getauxval(AT_HWCAP) };
 
         // Targets with only AT_HWCAP:
-        #[cfg(any(target_arch = "aarch64", target_arch = "mips",
-                  target_arch = "mips64"))]
+        #[cfg(any(target_arch = "aarch64", target_arch = "mips", target_arch = "mips64"))]
         {
             if hwcap != 0 {
                 return Ok(AuxVec { hwcap });
@@ -115,10 +117,7 @@ fn getauxval(key: usize) -> Result<usize, ()> {
     use libc;
     pub type F = unsafe extern "C" fn(usize) -> usize;
     unsafe {
-        let ptr = libc::dlsym(
-            libc::RTLD_DEFAULT,
-            "getauxval\0".as_ptr() as *const _,
-        );
+        let ptr = libc::dlsym(libc::RTLD_DEFAULT, "getauxval\0".as_ptr() as *const _);
         if ptr.is_null() {
             return Err(());
         }
@@ -141,8 +140,7 @@ fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
     // 2*32 `usize` elements is enough to read the whole vector.
     let mut buf = [0_usize; 64];
     {
-        let raw: &mut [u8; 64 * mem::size_of::<usize>()] =
-            unsafe { mem::transmute(&mut buf) };
+        let raw: &mut [u8; 64 * mem::size_of::<usize>()] = unsafe { mem::transmute(&mut buf) };
         file.read(raw).map_err(|_| ())?;
     }
     auxv_from_buf(&buf)
@@ -153,8 +151,7 @@ fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
 #[cfg(feature = "std_detect_file_io")]
 fn auxv_from_buf(buf: &[usize; 64]) -> Result<AuxVec, ()> {
     // Targets with only AT_HWCAP:
-    #[cfg(any(target_arch = "aarch64", target_arch = "mips",
-              target_arch = "mips64"))]
+    #[cfg(any(target_arch = "aarch64", target_arch = "mips", target_arch = "mips64"))]
     {
         for el in buf.chunks(2) {
             match el[0] {
@@ -193,8 +190,8 @@ mod tests {
     // using the auxv crate.
     #[cfg(feature = "std_detect_file_io")]
     fn auxv_crate_getprocfs(key: usize) -> Option<usize> {
-        use self::auxv_crate::AuxvType;
         use self::auxv_crate::procfs::search_procfs_auxv;
+        use self::auxv_crate::AuxvType;
         let k = key as AuxvType;
         match search_procfs_auxv(&[k]) {
             Ok(v) => Some(v[&k] as usize),
@@ -206,8 +203,8 @@ mod tests {
     // using the auxv crate.
     #[cfg(not(any(target_arch = "mips", target_arch = "mips64")))]
     fn auxv_crate_getauxval(key: usize) -> Option<usize> {
-        use self::auxv_crate::AuxvType;
         use self::auxv_crate::getauxval::Getauxval;
+        use self::auxv_crate::AuxvType;
         let q = auxv_crate::getauxval::NativeGetauxval {};
         match q.getauxval(key as AuxvType) {
             Ok(v) => Some(v as usize),
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
index b3168578537..f76c48a4b16 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
@@ -2,7 +2,7 @@
 #![cfg_attr(not(target_arch = "arm"), allow(dead_code))]
 
 extern crate std;
-use self::std::{prelude::v1::*, fs::File, io, io::Read};
+use self::std::{fs::File, io, io::Read, prelude::v1::*};
 
 /// cpuinfo
 pub(crate) struct CpuInfo {
@@ -150,8 +150,7 @@ power management:
         assert!(!cpuinfo.field("flags").has("avx"));
     }
 
-    const ARM_CORTEX_A53: &str =
-        r"Processor   : AArch64 Processor rev 3 (aarch64)
+    const ARM_CORTEX_A53: &str = r"Processor   : AArch64 Processor rev 3 (aarch64)
         processor   : 0
         processor   : 1
         processor   : 2
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
index 6486673f805..9c030f41a00 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for MIPS on Linux.
 
-use crate::detect::{Feature, cache, bit};
 use super::auxvec;
+use crate::detect::{bit, cache, Feature};
 
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from `/proc/cpuinfo`.
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
index 5be35b96dda..97afe49fe50 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for PowerPC on Linux.
 
-use crate::detect::{Feature, cache};
 use super::{auxvec, cpuinfo};
+use crate::detect::{cache, Feature};
 
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from /proc/cpuinfo.
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 6daeb2d1a1e..c653771c435 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -7,7 +7,7 @@ use crate::arch::x86_64::*;
 
 use crate::mem;
 
-use crate::detect::{Feature, cache, bit};
+use crate::detect::{bit, cache, Feature};
 
 /// Run-time feature detection on x86 works by using the CPUID instruction.
 ///
@@ -73,8 +73,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
     // EAX = 7, ECX = 0: Queries "Extended Features";
     // Contains information about bmi,bmi2, and avx2 support.
-    let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7
-    {
+    let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7 {
         let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
         (ebx, ecx)
     } else {
@@ -210,11 +209,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
                         enable(extended_features_ebx, 30, Feature::avx512bw);
                         enable(extended_features_ebx, 31, Feature::avx512vl);
                         enable(extended_features_ecx, 1, Feature::avx512vbmi);
-                        enable(
-                            extended_features_ecx,
-                            14,
-                            Feature::avx512vpopcntdq,
-                        );
+                        enable(extended_features_ecx, 14, Feature::avx512vpopcntdq);
                     }
                 }
             }
@@ -304,7 +299,10 @@ mod tests {
     fn compare_with_cupid() {
         let information = cupid::master().unwrap();
         assert_eq!(is_x86_feature_detected!("aes"), information.aesni());
-        assert_eq!(is_x86_feature_detected!("pclmulqdq"), information.pclmulqdq());
+        assert_eq!(
+            is_x86_feature_detected!("pclmulqdq"),
+            information.pclmulqdq()
+        );
         assert_eq!(is_x86_feature_detected!("rdrand"), information.rdrand());
         assert_eq!(is_x86_feature_detected!("rdseed"), information.rdseed());
         assert_eq!(is_x86_feature_detected!("tsc"), information.tsc());
@@ -358,13 +356,7 @@ mod tests {
             is_x86_feature_detected!("cmpxchg16b"),
             information.cmpxchg16b(),
         );
-        assert_eq!(
-            is_x86_feature_detected!("adx"),
-            information.adx(),
-        );
-        assert_eq!(
-            is_x86_feature_detected!("rtm"),
-            information.rtm(),
-        );
+        assert_eq!(is_x86_feature_detected!("adx"), information.adx(),);
+        assert_eq!(is_x86_feature_detected!("rtm"), information.rtm(),);
     }
 }
diff --git a/library/stdarch/crates/stdarch-test/src/disassembly.rs b/library/stdarch/crates/stdarch-test/src/disassembly.rs
index 0decd889d2f..d82b07d0a8c 100644
--- a/library/stdarch/crates/stdarch-test/src/disassembly.rs
+++ b/library/stdarch/crates/stdarch-test/src/disassembly.rs
@@ -1,7 +1,7 @@
 //! Disassembly calling function for most targets.
 
-use std::{env, collections::HashSet, process::Command, str};
 use crate::Function;
+use std::{collections::HashSet, env, process::Command, str};
 
 // Extracts the "shim" name from the `symbol`.
 fn normalize(mut symbol: &str) -> String {
@@ -38,63 +38,56 @@ fn normalize(mut symbol: &str) -> String {
 pub(crate) fn disassemble_myself() -> HashSet<Function> {
     let me = env::current_exe().expect("failed to get current exe");
 
-    let disassembly = if cfg!(target_arch = "x86_64")
-        && cfg!(target_os = "windows")
-        && cfg!(target_env = "msvc")
-    {
-        let mut cmd = cc::windows_registry::find(
-            "x86_64-pc-windows-msvc",
-            "dumpbin.exe",
-        ).expect("failed to find `dumpbin` tool");
-        let output = cmd
-            .arg("/DISASM")
-            .arg(&me)
-            .output()
-            .expect("failed to execute dumpbin");
-        println!(
-            "{}\n{}",
-            output.status,
-            String::from_utf8_lossy(&output.stderr)
-        );
-        assert!(output.status.success());
-        // Windows does not return valid UTF-8 output:
-        String::from_utf8_lossy(Vec::leak(output.stdout))
-    } else if cfg!(target_os = "windows") {
-        panic!("disassembly unimplemented")
-    } else if cfg!(target_os = "macos") {
-        let output = Command::new("otool")
-            .arg("-vt")
-            .arg(&me)
-            .output()
-            .expect("failed to execute otool");
-        println!(
-            "{}\n{}",
-            output.status,
-            String::from_utf8_lossy(&output.stderr)
-        );
-        assert!(output.status.success());
-
-        String::from_utf8_lossy(Vec::leak(output.stdout))
-    } else {
-        let objdump =
-            env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string());
-        let output = Command::new(objdump.clone())
-            .arg("--disassemble")
-            .arg(&me)
-            .output()
-            .unwrap_or_else(|_| panic!(
-                "failed to execute objdump. OBJDUMP={}",
-                objdump
-            ));
-        println!(
-            "{}\n{}",
-            output.status,
-            String::from_utf8_lossy(&output.stderr)
-        );
-        assert!(output.status.success());
-
-        String::from_utf8_lossy(Vec::leak(output.stdout))
-    };
+    let disassembly =
+        if cfg!(target_arch = "x86_64") && cfg!(target_os = "windows") && cfg!(target_env = "msvc")
+        {
+            let mut cmd = cc::windows_registry::find("x86_64-pc-windows-msvc", "dumpbin.exe")
+                .expect("failed to find `dumpbin` tool");
+            let output = cmd
+                .arg("/DISASM")
+                .arg(&me)
+                .output()
+                .expect("failed to execute dumpbin");
+            println!(
+                "{}\n{}",
+                output.status,
+                String::from_utf8_lossy(&output.stderr)
+            );
+            assert!(output.status.success());
+            // Windows does not return valid UTF-8 output:
+            String::from_utf8_lossy(Vec::leak(output.stdout))
+        } else if cfg!(target_os = "windows") {
+            panic!("disassembly unimplemented")
+        } else if cfg!(target_os = "macos") {
+            let output = Command::new("otool")
+                .arg("-vt")
+                .arg(&me)
+                .output()
+                .expect("failed to execute otool");
+            println!(
+                "{}\n{}",
+                output.status,
+                String::from_utf8_lossy(&output.stderr)
+            );
+            assert!(output.status.success());
+
+            String::from_utf8_lossy(Vec::leak(output.stdout))
+        } else {
+            let objdump = env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string());
+            let output = Command::new(objdump.clone())
+                .arg("--disassemble")
+                .arg(&me)
+                .output()
+                .unwrap_or_else(|_| panic!("failed to execute objdump. OBJDUMP={}", objdump));
+            println!(
+                "{}\n{}",
+                output.status,
+                String::from_utf8_lossy(&output.stderr)
+            );
+            assert!(output.status.success());
+
+            String::from_utf8_lossy(Vec::leak(output.stdout))
+        };
 
     parse(&disassembly)
 }
@@ -102,7 +95,10 @@ pub(crate) fn disassemble_myself() -> HashSet<Function> {
 fn parse(output: &str) -> HashSet<Function> {
     let mut lines = output.lines();
 
-    println!("First 100 lines of the disassembly input containing {} lines:", lines.clone().count());
+    println!(
+        "First 100 lines of the disassembly input containing {} lines:",
+        lines.clone().count()
+    );
     for line in output.lines().take(100) {
         println!("{}", line);
     }
@@ -111,7 +107,7 @@ fn parse(output: &str) -> HashSet<Function> {
     let mut cached_header = None;
     while let Some(header) = cached_header.take().or_else(|| lines.next()) {
         if !header.ends_with(':') || !header.contains("stdarch_test_shim") {
-            continue
+            continue;
         }
         eprintln!("header: {}", header);
         let symbol = normalize(header);
@@ -146,9 +142,8 @@ fn parse(output: &str) -> HashSet<Function> {
                 instruction
                     .split_whitespace()
                     .skip(1)
-                    .skip_while(|s| {
-                        s.len() == 2 && usize::from_str_radix(s, 16).is_ok()
-                    }).map(std::string::ToString::to_string)
+                    .skip_while(|s| s.len() == 2 && usize::from_str_radix(s, 16).is_ok())
+                    .map(std::string::ToString::to_string)
                     .skip_while(|s| *s == "lock") // skip x86-specific prefix
                     .collect::<Vec<String>>()
             } else {
@@ -156,20 +151,16 @@ fn parse(output: &str) -> HashSet<Function> {
                 // Each line of instructions should look like:
                 //
                 //      $rel_offset: ab cd ef 00    $instruction...
-                let expected_len
-                    = if cfg!(target_arch = "arm") || cfg!(target_arch = "aarch64") {
-                        8
-                    } else {
-                        2
-                    };
+                let expected_len = if cfg!(target_arch = "arm") || cfg!(target_arch = "aarch64") {
+                    8
+                } else {
+                    2
+                };
 
                 instruction
                     .split_whitespace()
                     .skip(1)
-                    .skip_while(|s| {
-                        s.len() == expected_len
-                            && usize::from_str_radix(s, 16).is_ok()
-                    })
+                    .skip_while(|s| s.len() == expected_len && usize::from_str_radix(s, 16).is_ok())
                     .skip_while(|s| *s == "lock") // skip x86-specific prefix
                     .map(std::string::ToString::to_string)
                     .collect::<Vec<String>>()
@@ -178,7 +169,7 @@ fn parse(output: &str) -> HashSet<Function> {
         }
         let function = Function {
             name: symbol,
-            instrs: instructions
+            instrs: instructions,
         };
         assert!(functions.insert(function));
     }
diff --git a/library/stdarch/crates/stdarch-test/src/wasm.rs b/library/stdarch/crates/stdarch-test/src/wasm.rs
index b31051bc68f..fc4ced6282c 100644
--- a/library/stdarch/crates/stdarch-test/src/wasm.rs
+++ b/library/stdarch/crates/stdarch-test/src/wasm.rs
@@ -42,8 +42,7 @@ pub(crate) fn disassemble_myself() -> HashSet<Function> {
     args.push(&js_shim.display().to_string().into());
     args.push(&"--enable-simd".into());
     let opts = js_sys::Object::new();
-    js_sys::Reflect::set(&opts, &"maxBuffer".into(), &(200 * 1024 * 1024).into())
-        .unwrap();
+    js_sys::Reflect::set(&opts, &"maxBuffer".into(), &(200 * 1024 * 1024).into()).unwrap();
     let output = exec_file_sync("wasm2wat", &args, &opts).to_string();
 
     let mut ret: HashSet<Function> = HashSet::new();
-- 
cgit 1.4.1-3-g733a5


From 4821a68959e9cf2f51cfb241dca3eb36dbb82aa6 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Mon, 16 Sep 2019 17:58:36 +0200
Subject: Fix std_detect on targets without feature detection

---
 library/stdarch/crates/std_detect/src/detect/macros.rs | 1 +
 library/stdarch/crates/std_detect/src/detect/mod.rs    | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 8fc8b2b4060..7285f59548a 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -1,3 +1,4 @@
+#[allow(unused)]
 macro_rules! features {
     (
       @TARGET: $target:ident;
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 04db5c402ed..6389a3fd75b 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -55,9 +55,12 @@ cfg_if! {
     } else {
         // Unimplemented architecture:
         mod arch {
+            #[doc(hidden)]
             pub enum Feature {
                 Null
             }
+            #[doc(hidden)]
+            pub mod __is_feature_detected {}
         }
     }
 }
-- 
cgit 1.4.1-3-g733a5


From 42b7041e9430ce14fd42fe7163c3b9ae7bd11790 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 17 Sep 2019 01:34:57 +0200
Subject: Remove staged_api from the allowed_internal_unstabled of the feature
 macros

---
 library/stdarch/crates/std_detect/src/detect/macros.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 7285f59548a..67ea76d19a3 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -10,7 +10,7 @@ macro_rules! features {
     ) => {
         #[macro_export]
         $(#[$macro_attrs])*
-        #[allow_internal_unstable(stdsimd_internal,stdsimd,staged_api)]
+        #[allow_internal_unstable(stdsimd_internal,stdsimd)]
         macro_rules! $macro_name {
             $(
                 ($feature_lit) => {
-- 
cgit 1.4.1-3-g733a5


From 13fffd5fdec3ab1a0476db1af1357cd2965833be Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 17 Sep 2019 02:43:48 +0200
Subject: Try harder to error on usage of unstable features

---
 library/stdarch/crates/std_detect/src/detect/macros.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 67ea76d19a3..750cf50aa70 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -10,7 +10,7 @@ macro_rules! features {
     ) => {
         #[macro_export]
         $(#[$macro_attrs])*
-        #[allow_internal_unstable(stdsimd_internal,stdsimd)]
+        #[allow_internal_unstable(stdsimd_internal)]
         macro_rules! $macro_name {
             $(
                 ($feature_lit) => {
-- 
cgit 1.4.1-3-g733a5


From 1855195f402e1873ca40eaae365e20104bad866d Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Tue, 17 Sep 2019 15:36:02 +0200
Subject: Add a mean to unset a bit in the cache

---
 library/stdarch/crates/std_detect/src/detect/cache.rs | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 92bc4b58d16..0056cd3026f 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -23,6 +23,12 @@ const fn test_bit(x: u64, bit: u32) -> bool {
     x & (1 << bit) != 0
 }
 
+/// Unset the `bit of `x`.
+#[inline]
+const fn unset_bit(x: u64, bit: u32) -> u64 {
+    x & !(1 << bit)
+}
+
 /// Maximum number of features that can be cached.
 const CACHE_CAPACITY: u32 = 63;
 
@@ -63,6 +69,19 @@ impl Initializer {
         let v = self.0;
         self.0 = set_bit(v, bit);
     }
+
+    /// Unsets the `bit` of the cache.
+    #[inline]
+    pub(crate) fn unset(&mut self, bit: u32) {
+        // FIXME: this way of making sure that the cache is large enough is
+        // brittle.
+        debug_assert!(
+            bit < CACHE_CAPACITY,
+            "too many features, time to increase the cache size!"
+        );
+        let v = self.0;
+        self.0 = unset_bit(v, bit);
+    }
 }
 
 /// This global variable is a cache of the features supported by the CPU.
-- 
cgit 1.4.1-3-g733a5


From 6420fa4fb07a7aeab5fc57480bb6c3d619505539 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Tue, 17 Sep 2019 15:36:23 +0200
Subject: Override the features detected using an env::var

Fixes: #804
---
 library/stdarch/crates/std_detect/Cargo.toml       |  5 +++--
 .../stdarch/crates/std_detect/src/detect/cache.rs  | 25 +++++++++++++++++++++-
 .../stdarch/crates/std_detect/src/detect/macros.rs |  6 ++++++
 library/stdarch/crates/std_detect/src/lib.rs       | 13 ++++++++++-
 4 files changed, 45 insertions(+), 4 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/Cargo.toml b/library/stdarch/crates/std_detect/Cargo.toml
index acfcc116f42..857d8a5b9b9 100644
--- a/library/stdarch/crates/std_detect/Cargo.toml
+++ b/library/stdarch/crates/std_detect/Cargo.toml
@@ -31,6 +31,7 @@ auxv = "0.3.3"
 cupid = "0.6.0"
 
 [features]
-default = [ "std_detect_dlsym_getauxval", "std_detect_file_io" ]
+default = [ "std_detect_dlsym_getauxval", "std_detect_file_io", "std_detect_env_override" ]
 std_detect_file_io = []
-std_detect_dlsym_getauxval = [ "libc" ]
\ No newline at end of file
+std_detect_dlsym_getauxval = [ "libc" ]
+std_detect_env_override = []
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 0056cd3026f..1de4316b8f4 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -162,6 +162,25 @@ impl Cache {
         self.1.store(hi, Ordering::Relaxed);
     }
 }
+cfg_if! {
+    if #[cfg(feature = "std_detect_env_override")] {
+        fn env_override(mut value: Initializer) -> Initializer {
+            if let Ok(disable) = crate::env::var("RUST_STD_DETECT_UNSTABLE") {
+                for v in disable.split(" ") {
+                    let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32));
+                }
+                value
+            } else {
+                value
+            }
+        }
+    } else {
+        #[inline]
+        fn env_override(value: Initializer) -> Initializer {
+            value
+        }
+    }
+}
 
 /// Tests the `bit` of the storage. If the storage has not been initialized,
 /// initializes it with the result of `f()`.
@@ -171,13 +190,17 @@ impl Cache {
 ///
 /// It uses the `Feature` variant to index into this variable as a bitset. If
 /// the bit is set, the feature is enabled, and otherwise it is disabled.
+///
+/// If the feature `std_detect_env_override` is enabled looks for the env
+/// variable `RUST_STD_DETECT_UNSTABLE` and uses its its content to disable
+/// Features that would had been otherwise detected.
 #[inline]
 pub(crate) fn test<F>(bit: u32, f: F) -> bool
 where
     F: FnOnce() -> Initializer,
 {
     if CACHE.is_uninitialized() {
-        CACHE.initialize(f());
+        CACHE.initialize(env_override(f()));
     }
     CACHE.test(bit)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 750cf50aa70..c9200d2f1f1 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -69,6 +69,12 @@ macro_rules! features {
                     Feature::_last => unreachable!(),
                 }
             }
+            pub fn from_str(s: &str) -> Result<Feature, ()> {
+                match s {
+                    $($feature_lit => Ok(Feature::$feature),)*
+                    _ => Err(())
+                }
+            }
         }
 
         /// Each function performs run-time feature detection for a single
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 7737719c3b0..9f4b9918e7a 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -24,12 +24,23 @@
 extern crate cfg_if;
 
 cfg_if! {
-    if #[cfg(feature = "std_detect_file_io")] {
+    if #[cfg(all(feature = "std_detect_file_io", feature = "std_detect_env_override"))] {
+        #[cfg_attr(test, macro_use(println))]
+        extern crate std;
+
+        #[allow(unused_imports)]
+        use std::{arch, env, fs, io, mem, sync};
+    } else if #[cfg(feature = "std_detect_file_io")] {
         #[cfg_attr(test, macro_use(println))]
         extern crate std;
 
         #[allow(unused_imports)]
         use std::{arch, fs, io, mem, sync};
+    } else if #[cfg(feature = "std_detect_env_override")] {
+        #[cfg_attr(test, macro_use(println))]
+        extern crate std;
+
+        use std::env;
     } else {
         #[cfg(test)]
         #[macro_use(println)]
-- 
cgit 1.4.1-3-g733a5


From 33688eaa1048c9d2a13e2d53f9e9ff9727f1de0b Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Tue, 17 Sep 2019 17:02:45 +0200
Subject: Remove the FIXME about the cache size checks

And leave a NOTE.
---
 library/stdarch/crates/std_detect/src/detect/cache.rs | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 1de4316b8f4..20f72b6fd46 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -43,13 +43,13 @@ impl Default for Initializer {
     }
 }
 
+// NOTE: the `debug_assert!` would catch that we do not add more Features than
+// the one fitting our cache.
 impl Initializer {
     /// Tests the `bit` of the cache.
     #[allow(dead_code)]
     #[inline]
     pub(crate) fn test(self, bit: u32) -> bool {
-        // FIXME: this way of making sure that the cache is large enough is
-        // brittle.
         debug_assert!(
             bit < CACHE_CAPACITY,
             "too many features, time to increase the cache size!"
@@ -60,8 +60,6 @@ impl Initializer {
     /// Sets the `bit` of the cache.
     #[inline]
     pub(crate) fn set(&mut self, bit: u32) {
-        // FIXME: this way of making sure that the cache is large enough is
-        // brittle.
         debug_assert!(
             bit < CACHE_CAPACITY,
             "too many features, time to increase the cache size!"
@@ -73,8 +71,6 @@ impl Initializer {
     /// Unsets the `bit` of the cache.
     #[inline]
     pub(crate) fn unset(&mut self, bit: u32) {
-        // FIXME: this way of making sure that the cache is large enough is
-        // brittle.
         debug_assert!(
             bit < CACHE_CAPACITY,
             "too many features, time to increase the cache size!"
-- 
cgit 1.4.1-3-g733a5


From ee35b1848eb3f24369bb52d34cc5bfb6e98284bf Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Tue, 17 Sep 2019 17:04:49 +0200
Subject: Simplify the std imports

---
 library/stdarch/crates/std_detect/src/lib.rs | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 9f4b9918e7a..bb70baaf61a 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -24,23 +24,12 @@
 extern crate cfg_if;
 
 cfg_if! {
-    if #[cfg(all(feature = "std_detect_file_io", feature = "std_detect_env_override"))] {
+    if #[cfg(any(feature = "std_detect_file_io", feature = "std_detect_env_override"))] {
         #[cfg_attr(test, macro_use(println))]
         extern crate std;
 
         #[allow(unused_imports)]
         use std::{arch, env, fs, io, mem, sync};
-    } else if #[cfg(feature = "std_detect_file_io")] {
-        #[cfg_attr(test, macro_use(println))]
-        extern crate std;
-
-        #[allow(unused_imports)]
-        use std::{arch, fs, io, mem, sync};
-    } else if #[cfg(feature = "std_detect_env_override")] {
-        #[cfg_attr(test, macro_use(println))]
-        extern crate std;
-
-        use std::env;
     } else {
         #[cfg(test)]
         #[macro_use(println)]
-- 
cgit 1.4.1-3-g733a5


From b70d574394e360c87528fc65a44bd31082801cd3 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Tue, 17 Sep 2019 17:19:44 +0200
Subject: Make the test function smaller

---
 library/stdarch/crates/std_detect/src/detect/cache.rs | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 20f72b6fd46..9179fbd9389 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -160,20 +160,19 @@ impl Cache {
 }
 cfg_if! {
     if #[cfg(feature = "std_detect_env_override")] {
-        fn env_override(mut value: Initializer) -> Initializer {
+        #[inline(never)]
+        fn initialize(mut value: Initializer) {
             if let Ok(disable) = crate::env::var("RUST_STD_DETECT_UNSTABLE") {
                 for v in disable.split(" ") {
                     let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32));
                 }
-                value
-            } else {
-                value
             }
+            CACHE.initialize(value);
         }
     } else {
         #[inline]
-        fn env_override(value: Initializer) -> Initializer {
-            value
+        fn initialize(value: Initializer) {
+            CACHE.initialize(value);
         }
     }
 }
@@ -196,7 +195,7 @@ where
     F: FnOnce() -> Initializer,
 {
     if CACHE.is_uninitialized() {
-        CACHE.initialize(env_override(f()));
+        initialize(f());
     }
     CACHE.test(bit)
 }
-- 
cgit 1.4.1-3-g733a5


From efd19f4a133528d902fe7b565e2fa421f77973ec Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Tue, 17 Sep 2019 17:55:45 +0200
Subject: Add a test for the env_override

---
 library/stdarch/crates/std_detect/src/detect/mod.rs   |  7 +++++++
 .../stdarch/crates/std_detect/src/detect/os/x86.rs    | 19 +++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 6389a3fd75b..748299f7f84 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -135,3 +135,10 @@ pub fn features() -> impl Iterator<Item = (&'static str, bool)> {
     }
     impl_()
 }
+
+#[test]
+fn features_roundtrip() {
+    for (f, _) in features() {
+        let _ = Feature::from_str(f).unwrap();
+    }
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index c653771c435..9da3d77c50a 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -295,6 +295,25 @@ mod tests {
         println!("rtm: {:?}", is_x86_feature_detected!("rtm"));
     }
 
+    #[cfg(feature = "std_detect_env_override")]
+    #[test]
+    fn env_override_no_avx() {
+        if let Ok(disable) = crate::env::var("RUST_STD_DETECT_UNSTABLE") {
+            let information = cupid::master().unwrap();
+            for d in disable.split(" ") {
+                match d {
+                    "avx" => if information.avx() {
+                        assert_ne!(is_x86_feature_detected!("avx"), information.avx())
+                    }
+                    "avx2" => if information.avx2() {
+                        assert_ne!(is_x86_feature_detected!("avx2"), information.avx2())
+                    }
+                    _ => {}
+                }
+            }
+        }
+    }
+
     #[test]
     fn compare_with_cupid() {
         let information = cupid::master().unwrap();
-- 
cgit 1.4.1-3-g733a5


From 8cad95c8ab687f3cecbcba393683b51c1d53a817 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Tue, 17 Sep 2019 18:38:19 +0200
Subject: Move the tests away from the code

---
 .../stdarch/crates/std_detect/src/detect/mod.rs    |   7 --
 .../stdarch/crates/std_detect/src/detect/os/x86.rs | 136 --------------------
 .../stdarch/crates/std_detect/tests/features.rs    |  13 ++
 .../crates/std_detect/tests/x86-specific.rs        | 140 +++++++++++++++++++++
 4 files changed, 153 insertions(+), 143 deletions(-)
 create mode 100644 library/stdarch/crates/std_detect/tests/features.rs
 create mode 100644 library/stdarch/crates/std_detect/tests/x86-specific.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 748299f7f84..6389a3fd75b 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -135,10 +135,3 @@ pub fn features() -> impl Iterator<Item = (&'static str, bool)> {
     }
     impl_()
 }
-
-#[test]
-fn features_roundtrip() {
-    for (f, _) in features() {
-        let _ = Feature::from_str(f).unwrap();
-    }
-}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 9da3d77c50a..7888776335d 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -243,139 +243,3 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
     value
 }
-
-#[cfg(test)]
-mod tests {
-    extern crate cupid;
-
-    #[test]
-    fn dump() {
-        println!("aes: {:?}", is_x86_feature_detected!("aes"));
-        println!("pclmulqdq: {:?}", is_x86_feature_detected!("pclmulqdq"));
-        println!("rdrand: {:?}", is_x86_feature_detected!("rdrand"));
-        println!("rdseed: {:?}", is_x86_feature_detected!("rdseed"));
-        println!("tsc: {:?}", is_x86_feature_detected!("tsc"));
-        println!("sse: {:?}", is_x86_feature_detected!("sse"));
-        println!("sse2: {:?}", is_x86_feature_detected!("sse2"));
-        println!("sse3: {:?}", is_x86_feature_detected!("sse3"));
-        println!("ssse3: {:?}", is_x86_feature_detected!("ssse3"));
-        println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1"));
-        println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2"));
-        println!("sse4a: {:?}", is_x86_feature_detected!("sse4a"));
-        println!("sha: {:?}", is_x86_feature_detected!("sha"));
-        println!("avx: {:?}", is_x86_feature_detected!("avx"));
-        println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
-        println!("avx512f {:?}", is_x86_feature_detected!("avx512f"));
-        println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd"));
-        println!("avx512er {:?}", is_x86_feature_detected!("avx512er"));
-        println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf"));
-        println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw"));
-        println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq"));
-        println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl"));
-        println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma"));
-        println!("avx512_vbmi {:?}", is_x86_feature_detected!("avx512vbmi"));
-        println!(
-            "avx512_vpopcntdq {:?}",
-            is_x86_feature_detected!("avx512vpopcntdq")
-        );
-        println!("fma: {:?}", is_x86_feature_detected!("fma"));
-        println!("abm: {:?}", is_x86_feature_detected!("abm"));
-        println!("bmi: {:?}", is_x86_feature_detected!("bmi1"));
-        println!("bmi2: {:?}", is_x86_feature_detected!("bmi2"));
-        println!("tbm: {:?}", is_x86_feature_detected!("tbm"));
-        println!("popcnt: {:?}", is_x86_feature_detected!("popcnt"));
-        println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt"));
-        println!("fxsr: {:?}", is_x86_feature_detected!("fxsr"));
-        println!("xsave: {:?}", is_x86_feature_detected!("xsave"));
-        println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt"));
-        println!("xsaves: {:?}", is_x86_feature_detected!("xsaves"));
-        println!("xsavec: {:?}", is_x86_feature_detected!("xsavec"));
-        println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b"));
-        println!("adx: {:?}", is_x86_feature_detected!("adx"));
-        println!("rtm: {:?}", is_x86_feature_detected!("rtm"));
-    }
-
-    #[cfg(feature = "std_detect_env_override")]
-    #[test]
-    fn env_override_no_avx() {
-        if let Ok(disable) = crate::env::var("RUST_STD_DETECT_UNSTABLE") {
-            let information = cupid::master().unwrap();
-            for d in disable.split(" ") {
-                match d {
-                    "avx" => if information.avx() {
-                        assert_ne!(is_x86_feature_detected!("avx"), information.avx())
-                    }
-                    "avx2" => if information.avx2() {
-                        assert_ne!(is_x86_feature_detected!("avx2"), information.avx2())
-                    }
-                    _ => {}
-                }
-            }
-        }
-    }
-
-    #[test]
-    fn compare_with_cupid() {
-        let information = cupid::master().unwrap();
-        assert_eq!(is_x86_feature_detected!("aes"), information.aesni());
-        assert_eq!(
-            is_x86_feature_detected!("pclmulqdq"),
-            information.pclmulqdq()
-        );
-        assert_eq!(is_x86_feature_detected!("rdrand"), information.rdrand());
-        assert_eq!(is_x86_feature_detected!("rdseed"), information.rdseed());
-        assert_eq!(is_x86_feature_detected!("tsc"), information.tsc());
-        assert_eq!(is_x86_feature_detected!("sse"), information.sse());
-        assert_eq!(is_x86_feature_detected!("sse2"), information.sse2());
-        assert_eq!(is_x86_feature_detected!("sse3"), information.sse3());
-        assert_eq!(is_x86_feature_detected!("ssse3"), information.ssse3());
-        assert_eq!(is_x86_feature_detected!("sse4.1"), information.sse4_1());
-        assert_eq!(is_x86_feature_detected!("sse4.2"), information.sse4_2());
-        assert_eq!(is_x86_feature_detected!("sse4a"), information.sse4a());
-        assert_eq!(is_x86_feature_detected!("sha"), information.sha());
-        assert_eq!(is_x86_feature_detected!("avx"), information.avx());
-        assert_eq!(is_x86_feature_detected!("avx2"), information.avx2());
-        assert_eq!(is_x86_feature_detected!("avx512f"), information.avx512f());
-        assert_eq!(is_x86_feature_detected!("avx512cd"), information.avx512cd());
-        assert_eq!(is_x86_feature_detected!("avx512er"), information.avx512er());
-        assert_eq!(is_x86_feature_detected!("avx512pf"), information.avx512pf());
-        assert_eq!(is_x86_feature_detected!("avx512bw"), information.avx512bw());
-        assert_eq!(is_x86_feature_detected!("avx512dq"), information.avx512dq());
-        assert_eq!(is_x86_feature_detected!("avx512vl"), information.avx512vl());
-        assert_eq!(
-            is_x86_feature_detected!("avx512ifma"),
-            information.avx512_ifma()
-        );
-        assert_eq!(
-            is_x86_feature_detected!("avx512vbmi"),
-            information.avx512_vbmi()
-        );
-        assert_eq!(
-            is_x86_feature_detected!("avx512vpopcntdq"),
-            information.avx512_vpopcntdq()
-        );
-        assert_eq!(is_x86_feature_detected!("fma"), information.fma());
-        assert_eq!(is_x86_feature_detected!("bmi1"), information.bmi1());
-        assert_eq!(is_x86_feature_detected!("bmi2"), information.bmi2());
-        assert_eq!(is_x86_feature_detected!("popcnt"), information.popcnt());
-        assert_eq!(is_x86_feature_detected!("abm"), information.lzcnt());
-        assert_eq!(is_x86_feature_detected!("tbm"), information.tbm());
-        assert_eq!(is_x86_feature_detected!("lzcnt"), information.lzcnt());
-        assert_eq!(is_x86_feature_detected!("xsave"), information.xsave());
-        assert_eq!(is_x86_feature_detected!("xsaveopt"), information.xsaveopt());
-        assert_eq!(
-            is_x86_feature_detected!("xsavec"),
-            information.xsavec_and_xrstor()
-        );
-        assert_eq!(
-            is_x86_feature_detected!("xsaves"),
-            information.xsaves_xrstors_and_ia32_xss()
-        );
-        assert_eq!(
-            is_x86_feature_detected!("cmpxchg16b"),
-            information.cmpxchg16b(),
-        );
-        assert_eq!(is_x86_feature_detected!("adx"), information.adx(),);
-        assert_eq!(is_x86_feature_detected!("rtm"), information.rtm(),);
-    }
-}
diff --git a/library/stdarch/crates/std_detect/tests/features.rs b/library/stdarch/crates/std_detect/tests/features.rs
new file mode 100644
index 00000000000..7fe81d3e7fe
--- /dev/null
+++ b/library/stdarch/crates/std_detect/tests/features.rs
@@ -0,0 +1,13 @@
+#![feature(stdsimd)]
+#![feature(stdsimd_internal)]
+
+extern crate std_detect;
+
+use std_detect::detect::{features, Feature};
+
+#[test]
+fn features_roundtrip() {
+    for (f, _) in features() {
+        let _ = Feature::from_str(f).unwrap();
+    }
+}
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
new file mode 100644
index 00000000000..cb51649725a
--- /dev/null
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -0,0 +1,140 @@
+#![feature(stdsimd)]
+
+extern crate cupid;
+#[macro_use]
+extern crate std_detect;
+
+#[test]
+fn dump() {
+    println!("aes: {:?}", is_x86_feature_detected!("aes"));
+    println!("pclmulqdq: {:?}", is_x86_feature_detected!("pclmulqdq"));
+    println!("rdrand: {:?}", is_x86_feature_detected!("rdrand"));
+    println!("rdseed: {:?}", is_x86_feature_detected!("rdseed"));
+    println!("tsc: {:?}", is_x86_feature_detected!("tsc"));
+    println!("sse: {:?}", is_x86_feature_detected!("sse"));
+    println!("sse2: {:?}", is_x86_feature_detected!("sse2"));
+    println!("sse3: {:?}", is_x86_feature_detected!("sse3"));
+    println!("ssse3: {:?}", is_x86_feature_detected!("ssse3"));
+    println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1"));
+    println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2"));
+    println!("sse4a: {:?}", is_x86_feature_detected!("sse4a"));
+    println!("sha: {:?}", is_x86_feature_detected!("sha"));
+    println!("avx: {:?}", is_x86_feature_detected!("avx"));
+    println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
+    println!("avx512f {:?}", is_x86_feature_detected!("avx512f"));
+    println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd"));
+    println!("avx512er {:?}", is_x86_feature_detected!("avx512er"));
+    println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf"));
+    println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw"));
+    println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq"));
+    println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl"));
+    println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma"));
+    println!("avx512_vbmi {:?}", is_x86_feature_detected!("avx512vbmi"));
+    println!(
+        "avx512_vpopcntdq {:?}",
+        is_x86_feature_detected!("avx512vpopcntdq")
+    );
+    println!("fma: {:?}", is_x86_feature_detected!("fma"));
+    println!("abm: {:?}", is_x86_feature_detected!("abm"));
+    println!("bmi: {:?}", is_x86_feature_detected!("bmi1"));
+    println!("bmi2: {:?}", is_x86_feature_detected!("bmi2"));
+    println!("tbm: {:?}", is_x86_feature_detected!("tbm"));
+    println!("popcnt: {:?}", is_x86_feature_detected!("popcnt"));
+    println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt"));
+    println!("fxsr: {:?}", is_x86_feature_detected!("fxsr"));
+    println!("xsave: {:?}", is_x86_feature_detected!("xsave"));
+    println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt"));
+    println!("xsaves: {:?}", is_x86_feature_detected!("xsaves"));
+    println!("xsavec: {:?}", is_x86_feature_detected!("xsavec"));
+    println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b"));
+    println!("adx: {:?}", is_x86_feature_detected!("adx"));
+    println!("rtm: {:?}", is_x86_feature_detected!("rtm"));
+}
+
+#[cfg(feature = "std_detect_env_override")]
+#[test]
+fn env_override_no_avx() {
+    if let Ok(disable) = std::env::var("RUST_STD_DETECT_UNSTABLE") {
+        let information = cupid::master().unwrap();
+        for d in disable.split(" ") {
+            match d {
+                "avx" => {
+                    if information.avx() {
+                        assert_ne!(is_x86_feature_detected!("avx"), information.avx())
+                    }
+                }
+                "avx2" => {
+                    if information.avx2() {
+                        assert_ne!(is_x86_feature_detected!("avx2"), information.avx2())
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+}
+
+#[test]
+fn compare_with_cupid() {
+    let information = cupid::master().unwrap();
+    assert_eq!(is_x86_feature_detected!("aes"), information.aesni());
+    assert_eq!(
+        is_x86_feature_detected!("pclmulqdq"),
+        information.pclmulqdq()
+    );
+    assert_eq!(is_x86_feature_detected!("rdrand"), information.rdrand());
+    assert_eq!(is_x86_feature_detected!("rdseed"), information.rdseed());
+    assert_eq!(is_x86_feature_detected!("tsc"), information.tsc());
+    assert_eq!(is_x86_feature_detected!("sse"), information.sse());
+    assert_eq!(is_x86_feature_detected!("sse2"), information.sse2());
+    assert_eq!(is_x86_feature_detected!("sse3"), information.sse3());
+    assert_eq!(is_x86_feature_detected!("ssse3"), information.ssse3());
+    assert_eq!(is_x86_feature_detected!("sse4.1"), information.sse4_1());
+    assert_eq!(is_x86_feature_detected!("sse4.2"), information.sse4_2());
+    assert_eq!(is_x86_feature_detected!("sse4a"), information.sse4a());
+    assert_eq!(is_x86_feature_detected!("sha"), information.sha());
+    assert_eq!(is_x86_feature_detected!("avx"), information.avx());
+    assert_eq!(is_x86_feature_detected!("avx2"), information.avx2());
+    assert_eq!(is_x86_feature_detected!("avx512f"), information.avx512f());
+    assert_eq!(is_x86_feature_detected!("avx512cd"), information.avx512cd());
+    assert_eq!(is_x86_feature_detected!("avx512er"), information.avx512er());
+    assert_eq!(is_x86_feature_detected!("avx512pf"), information.avx512pf());
+    assert_eq!(is_x86_feature_detected!("avx512bw"), information.avx512bw());
+    assert_eq!(is_x86_feature_detected!("avx512dq"), information.avx512dq());
+    assert_eq!(is_x86_feature_detected!("avx512vl"), information.avx512vl());
+    assert_eq!(
+        is_x86_feature_detected!("avx512ifma"),
+        information.avx512_ifma()
+    );
+    assert_eq!(
+        is_x86_feature_detected!("avx512vbmi"),
+        information.avx512_vbmi()
+    );
+    assert_eq!(
+        is_x86_feature_detected!("avx512vpopcntdq"),
+        information.avx512_vpopcntdq()
+    );
+    assert_eq!(is_x86_feature_detected!("fma"), information.fma());
+    assert_eq!(is_x86_feature_detected!("bmi1"), information.bmi1());
+    assert_eq!(is_x86_feature_detected!("bmi2"), information.bmi2());
+    assert_eq!(is_x86_feature_detected!("popcnt"), information.popcnt());
+    assert_eq!(is_x86_feature_detected!("abm"), information.lzcnt());
+    assert_eq!(is_x86_feature_detected!("tbm"), information.tbm());
+    assert_eq!(is_x86_feature_detected!("lzcnt"), information.lzcnt());
+    assert_eq!(is_x86_feature_detected!("xsave"), information.xsave());
+    assert_eq!(is_x86_feature_detected!("xsaveopt"), information.xsaveopt());
+    assert_eq!(
+        is_x86_feature_detected!("xsavec"),
+        information.xsavec_and_xrstor()
+    );
+    assert_eq!(
+        is_x86_feature_detected!("xsaves"),
+        information.xsaves_xrstors_and_ia32_xss()
+    );
+    assert_eq!(
+        is_x86_feature_detected!("cmpxchg16b"),
+        information.cmpxchg16b(),
+    );
+    assert_eq!(is_x86_feature_detected!("adx"), information.adx(),);
+    assert_eq!(is_x86_feature_detected!("rtm"), information.rtm(),);
+}
-- 
cgit 1.4.1-3-g733a5


From e0d42221ffbd5ad5d6d72b1d9bb69a90d9fb63b1 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Tue, 17 Sep 2019 20:30:25 +0200
Subject: Implement a fallback for the No-op Feature

---
 library/stdarch/crates/std_detect/src/detect/mod.rs | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 6389a3fd75b..13397638498 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -61,6 +61,13 @@ cfg_if! {
             }
             #[doc(hidden)]
             pub mod __is_feature_detected {}
+
+            impl Feature {
+                #[doc(hidden)]
+                pub fn from_str(_s: &str) -> Result<Feature, ()> { Err(()) }
+                #[doc(hidden)]
+                pub fn to_str(self) -> &'static str { "" }
+            }
         }
     }
 }
-- 
cgit 1.4.1-3-g733a5


From 88fe414dd38bd1e2c02937fc9d0b55900e1c8015 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 18 Sep 2019 09:08:59 +0200
Subject: These items do not need to be public

---
 library/stdarch/crates/std_detect/src/detect/macros.rs |  7 ++++---
 library/stdarch/crates/std_detect/src/detect/mod.rs    | 16 +++++++++++-----
 2 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index c9200d2f1f1..d1e4730900a 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -52,7 +52,7 @@ macro_rules! features {
         #[derive(Copy, Clone)]
         #[repr(u8)]
         #[unstable(feature = "stdsimd_internal", issue = "0")]
-        pub enum Feature {
+        pub(crate) enum Feature {
             $(
                 $(#[$feature_comment])*
                 $feature,
@@ -63,13 +63,13 @@ macro_rules! features {
         }
 
         impl Feature {
-            pub fn to_str(self) -> &'static str {
+            pub(crate) fn to_str(self) -> &'static str {
                 match self {
                     $(Feature::$feature => $feature_lit,)*
                     Feature::_last => unreachable!(),
                 }
             }
-            pub fn from_str(s: &str) -> Result<Feature, ()> {
+            pub(crate) fn from_str(s: &str) -> Result<Feature, ()> {
                 match s {
                     $($feature_lit => Ok(Feature::$feature),)*
                     _ => Err(())
@@ -89,6 +89,7 @@ macro_rules! features {
 
                 /// PLEASE: do not use this, it is an implementation detail
                 /// subject to change.
+                #[inline]
                 #[doc(hidden)]
                 #[$stability_attr]
                 pub fn $feature() -> bool {
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 13397638498..78b37019154 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -56,7 +56,7 @@ cfg_if! {
         // Unimplemented architecture:
         mod arch {
             #[doc(hidden)]
-            pub enum Feature {
+            pub(crate) enum Feature {
                 Null
             }
             #[doc(hidden)]
@@ -64,14 +64,20 @@ cfg_if! {
 
             impl Feature {
                 #[doc(hidden)]
-                pub fn from_str(_s: &str) -> Result<Feature, ()> { Err(()) }
+                pub(crate) fn from_str(_s: &str) -> Result<Feature, ()> { Err(()) }
                 #[doc(hidden)]
-                pub fn to_str(self) -> &'static str { "" }
+                pub(crate) fn to_str(self) -> &'static str { "" }
             }
         }
     }
 }
-pub use self::arch::{Feature, __is_feature_detected};
+
+// This module needs to be public because the `is_{arch}_feature_detected!`
+// macros expand calls to items within it in user crates.
+#[doc(hidden)]
+pub use self::arch::__is_feature_detected;
+
+pub(crate) use self::arch::Feature;
 
 mod bit;
 mod cache;
@@ -106,7 +112,7 @@ cfg_if! {
 
 /// Performs run-time feature detection.
 #[inline]
-pub fn check_for(x: Feature) -> bool {
+fn check_for(x: Feature) -> bool {
     cache::test(x as u32, self::os::detect_features)
 }
 
-- 
cgit 1.4.1-3-g733a5


From 128aa330ea09816637c0947250a08cef003f8783 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 18 Sep 2019 10:35:42 +0200
Subject: Feature::from_str is not always needed

---
 library/stdarch/crates/std_detect/src/detect/macros.rs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index d1e4730900a..f061ff74f81 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -69,6 +69,7 @@ macro_rules! features {
                     Feature::_last => unreachable!(),
                 }
             }
+            #[cfg(feature = "std_detect_env_override")]
             pub(crate) fn from_str(s: &str) -> Result<Feature, ()> {
                 match s {
                     $($feature_lit => Ok(Feature::$feature),)*
-- 
cgit 1.4.1-3-g733a5


From 036b6348d9ec5ef0a5d58cebc6b160f25e1d6df3 Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Tue, 24 Sep 2019 12:13:35 -0700
Subject: Remove need for `#[macro_use]` with `cfg-if`

Modernizes usage of `cfg_if!` slightly
---
 library/stdarch/crates/std_detect/Cargo.toml        |  4 ++--
 .../stdarch/crates/std_detect/src/detect/cache.rs   |  2 +-
 library/stdarch/crates/std_detect/src/detect/mod.rs | 21 +++++++++------------
 .../crates/std_detect/src/detect/os/freebsd/mod.rs  |  2 +-
 library/stdarch/crates/std_detect/src/lib.rs        |  5 +----
 5 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/Cargo.toml b/library/stdarch/crates/std_detect/Cargo.toml
index 2da3ee43bff..252e42c569f 100644
--- a/library/stdarch/crates/std_detect/Cargo.toml
+++ b/library/stdarch/crates/std_detect/Cargo.toml
@@ -14,7 +14,7 @@ readme = "README.md"
 keywords = ["std", "run-time", "feature", "detection"]
 categories = ["hardware-support"]
 license = "MIT/Apache-2.0"
-edition = "2015"
+edition = "2018"
 
 [badges]
 is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" }
@@ -23,7 +23,7 @@ maintenance = { status = "experimental" }
 
 [dependencies]
 libc = { version = "0.2", optional = true, default-features = false }
-cfg-if = "0.1"
+cfg-if = "0.1.10"
 
 [dev-dependencies]
 auxv = "0.3.3"
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 9179fbd9389..fe2abf5cf5e 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -158,7 +158,7 @@ impl Cache {
         self.1.store(hi, Ordering::Relaxed);
     }
 }
-cfg_if! {
+cfg_if::cfg_if! {
     if #[cfg(feature = "std_detect_env_override")] {
         #[inline(never)]
         fn initialize(mut value: Initializer) {
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 78b37019154..e8b5b3e3b5f 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -17,6 +17,8 @@
 //! due to security concerns (x86 is the big exception). These functions are
 //! implemented in the `os/{target_os}.rs` modules.
 
+use cfg_if::cfg_if;
+
 #[macro_use]
 mod error_macros;
 
@@ -132,19 +134,14 @@ pub fn features() -> impl Iterator<Item = (&'static str, bool)> {
             target_arch = "mips",
             target_arch = "mips64",
         ))] {
-            fn impl_() -> impl Iterator<Item=(&'static str, bool)> {
-                (0_u8..Feature::_last as u8).map(|discriminant: u8| {
-                    let f: Feature = unsafe { crate::mem::transmute(discriminant) };
-                    let name: &'static str = f.to_str();
-                    let enabled: bool = check_for(f);
-                    (name, enabled)
-                })
-            }
+            (0_u8..Feature::_last as u8).map(|discriminant: u8| {
+                let f: Feature = unsafe { crate::mem::transmute(discriminant) };
+                let name: &'static str = f.to_str();
+                let enabled: bool = check_for(f);
+                (name, enabled)
+            })
         } else {
-            fn impl_() -> impl Iterator<Item=(&'static str, bool)> {
-                (0_u8..0_u8).map(|_x: u8| ("", false))
-            }
+            None.into_iter()
         }
     }
-    impl_()
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
index 1a5338a3555..4321bce74dd 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
@@ -2,7 +2,7 @@
 
 mod auxvec;
 
-cfg_if! {
+cfg_if::cfg_if! {
     if #[cfg(target_arch = "aarch64")] {
         mod aarch64;
         pub use self::aarch64::check_for;
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index bb70baaf61a..88a00e40aac 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -20,10 +20,7 @@
 #![cfg_attr(test, allow(unused_imports))]
 #![no_std]
 
-#[macro_use]
-extern crate cfg_if;
-
-cfg_if! {
+cfg_if::cfg_if! {
     if #[cfg(any(feature = "std_detect_file_io", feature = "std_detect_env_override"))] {
         #[cfg_attr(test, macro_use(println))]
         extern crate std;
-- 
cgit 1.4.1-3-g733a5


From cca9a86637c3307f2d25c9e63567b726b1612c0a Mon Sep 17 00:00:00 2001
From: Makoto Kato <m_kato@ga2.so-net.ne.jp>
Date: Sat, 30 Nov 2019 15:16:19 +0900
Subject: Add CRC32 detection to arm32

armv8 has 32-bit mode, but it can use crc32 instruction sets even if 32-bit.
---
 library/stdarch/crates/std_detect/src/detect/arch/arm.rs     | 2 ++
 library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs | 4 +++-
 library/stdarch/crates/std_detect/tests/cpu-detection.rs     | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index 02bf654945d..96978b70098 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -14,4 +14,6 @@ features! {
     /// ARM Advanced SIMD (NEON) - Aarch32
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] pmull: "pmull";
     /// Polynomial Multiply
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crc: "crc";
+    /// CRC32 (Cyclic Redundancy Check)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
index a1b28dad70f..f55bc30b849 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
@@ -15,10 +15,11 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
     // The values are part of the platform-specific [asm/hwcap.h][hwcap]
     //
-    // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
+    // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm/include/uapi/asm/hwcap.h
     if let Ok(auxv) = auxvec::auxv() {
         enable_feature(&mut value, Feature::neon, bit::test(auxv.hwcap, 12));
         enable_feature(&mut value, Feature::pmull, bit::test(auxv.hwcap2, 1));
+        enable_feature(&mut value, Feature::crc, bit::test(auxv.hwcap2, 4));
         return value;
     }
 
@@ -29,6 +30,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
             c.field("Features").has("neon") && !has_broken_neon(&c),
         );
         enable_feature(&mut value, Feature::pmull, c.field("Features").has("pmull"));
+        enable_feature(&mut value, Feature::crc, c.field("Features").has("crc32"));
         return value;
     }
     value
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index d441f4e1eff..7beb8a491a4 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -24,6 +24,7 @@ fn all() {
 fn arm_linux() {
     println!("neon: {}", is_arm_feature_detected!("neon"));
     println!("pmull: {}", is_arm_feature_detected!("pmull"));
+    println!("crc: {}", is_arm_feature_detected!("crc"));
 }
 
 #[test]
-- 
cgit 1.4.1-3-g733a5


From f5783f51934f873ec9f28d94bc02c993da80bc43 Mon Sep 17 00:00:00 2001
From: Makoto Kato <m_kato@ga2.so-net.ne.jp>
Date: Wed, 27 Nov 2019 00:15:03 +0900
Subject: Run-time feature detection for Aarch64 on Windows.

---
 .../stdarch/crates/std_detect/src/detect/mod.rs    |  3 ++
 .../std_detect/src/detect/os/windows/aarch64.rs    | 55 ++++++++++++++++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index e8b5b3e3b5f..e8cce6e5dca 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -106,6 +106,9 @@ cfg_if! {
         mod aarch64;
         #[path = "os/freebsd/mod.rs"]
         mod os;
+    } else if #[cfg(all(target_os = "windows", target_arch = "aarch64"))] {
+        #[path = "os/windows/aarch64.rs"]
+        mod os;
     } else {
         #[path = "os/other.rs"]
         mod os;
diff --git a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
new file mode 100644
index 00000000000..3c5edfb44ab
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
@@ -0,0 +1,55 @@
+//! Run-time feature detection for Aarch64 on Windows.
+
+use crate::detect::{cache, Feature};
+
+/// Try to read the features using IsProcessorFeaturePresent.
+pub(crate) fn detect_features() -> cache::Initializer {
+    type DWORD = u32;
+    type BOOL = i32;
+
+    const FALSE: BOOL = 0;
+    // The following Microsoft documents isn't updated for aarch64.
+    // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
+    // These are defined in winnt.h of Windows SDK
+    const PF_ARM_NEON_INSTRUCTIONS_AVAILABLE: u32 = 19;
+    const PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE: u32 = 30;
+    const PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE: u32 = 31;
+
+    extern "system" {
+        pub fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL;
+    }
+
+    let mut value = cache::Initializer::default();
+    {
+        let mut enable_feature = |f, enable| {
+            if enable {
+                value.set(f as u32);
+            }
+        };
+
+        // Some features such Feature::fp may be supported on current CPU,
+        // but no way to detect it by OS API.
+        // Also, we require unsafe block for the extern "system" calls.
+        unsafe {
+            enable_feature(
+                Feature::asimd,
+                IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
+            enable_feature(
+                Feature::crc,
+                IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
+            // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and
+            // pmull support
+            enable_feature(
+                Feature::crypto,
+                IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
+            enable_feature(
+                Feature::pmull,
+                IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
+        }
+    }
+    value
+}
-- 
cgit 1.4.1-3-g733a5


From c8c587d0cdea3062f9d9172427b4afe454b3999b Mon Sep 17 00:00:00 2001
From: Yuki Okushi <huyuumi.dev@gmail.com>
Date: Sun, 22 Dec 2019 17:23:07 +0900
Subject: Use issue = "none" instead of "0"

---
 library/stdarch/crates/core_arch/src/simd_llvm.rs      | 4 ++--
 library/stdarch/crates/core_arch/src/wasm32/simd128.rs | 2 +-
 library/stdarch/crates/core_arch/src/x86/mod.rs        | 8 ++++----
 library/stdarch/crates/std_detect/src/detect/macros.rs | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/simd_llvm.rs b/library/stdarch/crates/core_arch/src/simd_llvm.rs
index 1dad28299fd..02934f7cb8b 100644
--- a/library/stdarch/crates/core_arch/src/simd_llvm.rs
+++ b/library/stdarch/crates/core_arch/src/simd_llvm.rs
@@ -26,9 +26,9 @@ extern "platform-intrinsic" {
     #[rustc_args_required_const(2)]
     pub fn simd_shuffle128<T, U>(x: T, y: T, idx: [u32; 128]) -> U;
 
-    #[rustc_const_unstable(feature = "const_simd_insert", issue = "0")]
+    #[rustc_const_unstable(feature = "const_simd_insert", issue = "none")]
     pub fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
-    #[rustc_const_unstable(feature = "const_simd_extract", issue = "0")]
+    #[rustc_const_unstable(feature = "const_simd_extract", issue = "none")]
     pub fn simd_extract<T, U>(x: T, idx: u32) -> U;
     //pub fn simd_select
     pub fn simd_bitmask<T, U>(x: T) -> U;
diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
index 99d02a0a441..c7377e23a20 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@@ -24,7 +24,7 @@ types! {
 }
 
 #[allow(non_camel_case_types)]
-#[unstable(feature = "stdimd_internal", issue = "0")]
+#[unstable(feature = "stdimd_internal", issue = "none")]
 pub(crate) trait v128Ext: Sized {
     fn as_v128(self) -> v128;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs
index 6719a84b455..78a3e23179b 100644
--- a/library/stdarch/crates/core_arch/src/x86/mod.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mod.rs
@@ -352,7 +352,7 @@ mod test;
 pub use self::test::*;
 
 #[allow(non_camel_case_types)]
-#[unstable(feature = "stdimd_internal", issue = "0")]
+#[unstable(feature = "stdimd_internal", issue = "none")]
 pub(crate) trait m64Ext: Sized {
     fn as_m64(self) -> __m64;
 
@@ -395,7 +395,7 @@ impl m64Ext for __m64 {
 }
 
 #[allow(non_camel_case_types)]
-#[unstable(feature = "stdimd_internal", issue = "0")]
+#[unstable(feature = "stdimd_internal", issue = "none")]
 pub(crate) trait m128iExt: Sized {
     fn as_m128i(self) -> __m128i;
 
@@ -448,7 +448,7 @@ impl m128iExt for __m128i {
 }
 
 #[allow(non_camel_case_types)]
-#[unstable(feature = "stdimd_internal", issue = "0")]
+#[unstable(feature = "stdimd_internal", issue = "none")]
 pub(crate) trait m256iExt: Sized {
     fn as_m256i(self) -> __m256i;
 
@@ -501,7 +501,7 @@ impl m256iExt for __m256i {
 }
 
 #[allow(non_camel_case_types)]
-#[unstable(feature = "stdimd_internal", issue = "0")]
+#[unstable(feature = "stdimd_internal", issue = "none")]
 pub(crate) trait m512iExt: Sized {
     fn as_m512i(self) -> __m512i;
 
diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index f061ff74f81..b9dbf9184ec 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -51,7 +51,7 @@ macro_rules! features {
         #[allow(non_camel_case_types)]
         #[derive(Copy, Clone)]
         #[repr(u8)]
-        #[unstable(feature = "stdsimd_internal", issue = "0")]
+        #[unstable(feature = "stdsimd_internal", issue = "none")]
         pub(crate) enum Feature {
             $(
                 $(#[$feature_comment])*
-- 
cgit 1.4.1-3-g733a5


From 1601ce4f2f595f91c0245ba5e8abea9c157c08db Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Sun, 26 Jan 2020 20:10:29 +0100
Subject: Add Icelake avx512 features (#838)

* Add Icelake avx512 features

As documented in https://software.intel.com/sites/default/files/managed/c5/15//architecture-instruction-set-extensions-programming-reference.pdf

* Sort the avx512 feature checks by bit

* Unbreak macos

Force nightly.
---
 library/stdarch/.github/workflows/main.yml              |  6 +++++-
 library/stdarch/crates/core_arch/tests/cpu-detection.rs | 17 +++++++++++++++++
 .../stdarch/crates/std_detect/src/detect/arch/x86.rs    | 16 ++++++++++++++++
 library/stdarch/crates/std_detect/src/detect/os/x86.rs  |  8 ++++++++
 .../stdarch/crates/std_detect/tests/cpu-detection.rs    | 17 +++++++++++++++++
 library/stdarch/crates/std_detect/tests/x86-specific.rs | 17 +++++++++++++++++
 6 files changed, 80 insertions(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/.github/workflows/main.yml b/library/stdarch/.github/workflows/main.yml
index 55817d267c3..c5ef15004ca 100644
--- a/library/stdarch/.github/workflows/main.yml
+++ b/library/stdarch/.github/workflows/main.yml
@@ -173,8 +173,12 @@ jobs:
       run: |
         curl https://sh.rustup.rs | sh -s -- -y --default-toolchain nightly
         echo "##[add-path]$HOME/.cargo/bin"
+        rustup update nightly --no-self-update
+        rustup default nightly
       if: matrix.os == 'macos-latest'
-    - run: rustup target add ${{ matrix.target }}
+    - run: |
+        rustup default nightly
+        rustup target add ${{ matrix.target }}
       if: "!endsWith(matrix.target, 'emulated')"
     - run: cargo generate-lockfile
 
diff --git a/library/stdarch/crates/core_arch/tests/cpu-detection.rs b/library/stdarch/crates/core_arch/tests/cpu-detection.rs
index 07b56c616d6..e17050d8cab 100644
--- a/library/stdarch/crates/core_arch/tests/cpu-detection.rs
+++ b/library/stdarch/crates/core_arch/tests/cpu-detection.rs
@@ -30,6 +30,23 @@ fn x86_all() {
         "avx512_vpopcntdq {:?}",
         is_x86_feature_detected!("avx512vpopcntdq")
     );
+    println!("avx512vbmi2 {:?}", is_x86_feature_detected!("avx512vbmi2"));
+    println!("avx512gfni {:?}", is_x86_feature_detected!("avx512gfni"));
+    println!("avx512vaes {:?}", is_x86_feature_detected!("avx512vaes"));
+    println!(
+        "avx512vpclmulqdq {:?}",
+        is_x86_feature_detected!("avx512vpclmulqdq")
+    );
+    println!("avx512vnni {:?}", is_x86_feature_detected!("avx512vnni"));
+    println!(
+        "avx512bitalg {:?}",
+        is_x86_feature_detected!("avx512bitalg")
+    );
+    println!("avx512bf16 {:?}", is_x86_feature_detected!("avx512bf16"));
+    println!(
+        "avx512vp2intersect {:?}",
+        is_x86_feature_detected!("avx512vp2intersect")
+    );
     println!("f16c: {:?}", is_x86_feature_detected!("f16c"));
     println!("fma: {:?}", is_x86_feature_detected!("fma"));
     println!("abm: {:?}", is_x86_feature_detected!("abm"));
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index ae0bebdae5b..9a61e5e7683 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -135,6 +135,22 @@ features! {
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vpopcntdq: "avx512vpopcntdq";
     /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
     /// Quadword)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vbmi2: "avx512vbmi2";
+    /// AVX-512 VBMI2 (Additional byte, word, dword and qword capabilities)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512gfni: "avx512gfni";
+    /// AVX-512 GFNI (Galois Field New Instruction)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vaes: "avx512vaes";
+    /// AVX-512 VAES (Vector AES instruction)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vpclmulqdq: "avx512vpclmulqdq";
+    /// AVX-512 VPCLMULQDQ (Vector PCLMULQDQ instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vnni: "avx512vnni";
+    /// AVX-512 VNNI (Vector Neural Network Instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512bitalg: "avx512bitalg";
+    /// AVX-512 BITALG (Support for VPOPCNT[B,W] and VPSHUFBITQMB)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512bf16: "avx512bf16";
+    /// AVX-512 BF16 (BFLOAT16 instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vp2intersect: "avx512vp2intersect";
+    /// AVX-512 P2INTERSECT
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] f16c: "f16c";
     /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma";
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 7888776335d..436fb00f061 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -209,6 +209,14 @@ pub(crate) fn detect_features() -> cache::Initializer {
                         enable(extended_features_ebx, 30, Feature::avx512bw);
                         enable(extended_features_ebx, 31, Feature::avx512vl);
                         enable(extended_features_ecx, 1, Feature::avx512vbmi);
+                        enable(extended_features_ecx, 5, Feature::avx512bf16);
+                        enable(extended_features_ecx, 6, Feature::avx512vbmi2);
+                        enable(extended_features_ecx, 8, Feature::avx512gfni);
+                        enable(extended_features_ecx, 8, Feature::avx512vp2intersect);
+                        enable(extended_features_ecx, 9, Feature::avx512vaes);
+                        enable(extended_features_ecx, 10, Feature::avx512vpclmulqdq);
+                        enable(extended_features_ecx, 11, Feature::avx512vnni);
+                        enable(extended_features_ecx, 12, Feature::avx512bitalg);
                         enable(extended_features_ecx, 14, Feature::avx512vpopcntdq);
                     }
                 }
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 7beb8a491a4..9be40542c96 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -94,6 +94,23 @@ fn x86_all() {
         "avx512vpopcntdq: {:?}",
         is_x86_feature_detected!("avx512vpopcntdq")
     );
+    println!("avx512vbmi2 {:?}", is_x86_feature_detected!("avx512vbmi2"));
+    println!("avx512gfni {:?}", is_x86_feature_detected!("avx512gfni"));
+    println!("avx512vaes {:?}", is_x86_feature_detected!("avx512vaes"));
+    println!(
+        "avx512vpclmulqdq {:?}",
+        is_x86_feature_detected!("avx512vpclmulqdq")
+    );
+    println!("avx512vnni {:?}", is_x86_feature_detected!("avx512vnni"));
+    println!(
+        "avx512bitalg {:?}",
+        is_x86_feature_detected!("avx512bitalg")
+    );
+    println!("avx512bf16 {:?}", is_x86_feature_detected!("avx512bf16"));
+    println!(
+        "avx512vp2intersect {:?}",
+        is_x86_feature_detected!("avx512vp2intersect")
+    );
     println!("f16c: {:?}", is_x86_feature_detected!("f16c"));
     println!("fma: {:?}", is_x86_feature_detected!("fma"));
     println!("bmi1: {:?}", is_x86_feature_detected!("bmi1"));
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
index 04e3f88ca93..d4182644bc5 100644
--- a/library/stdarch/crates/std_detect/tests/x86-specific.rs
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -35,6 +35,23 @@ fn dump() {
         "avx512_vpopcntdq {:?}",
         is_x86_feature_detected!("avx512vpopcntdq")
     );
+    println!("avx512vbmi2 {:?}", is_x86_feature_detected!("avx512vbmi2"));
+    println!("avx512gfni {:?}", is_x86_feature_detected!("avx512gfni"));
+    println!("avx512vaes {:?}", is_x86_feature_detected!("avx512vaes"));
+    println!(
+        "avx512vpclmulqdq {:?}",
+        is_x86_feature_detected!("avx512vpclmulqdq")
+    );
+    println!("avx512vnni {:?}", is_x86_feature_detected!("avx512vnni"));
+    println!(
+        "avx512bitalg {:?}",
+        is_x86_feature_detected!("avx512bitalg")
+    );
+    println!("avx512bf16 {:?}", is_x86_feature_detected!("avx512bf16"));
+    println!(
+        "avx512vp2intersect {:?}",
+        is_x86_feature_detected!("avx512vp2intersect")
+    );
     println!("fma: {:?}", is_x86_feature_detected!("fma"));
     println!("abm: {:?}", is_x86_feature_detected!("abm"));
     println!("bmi: {:?}", is_x86_feature_detected!("bmi1"));
-- 
cgit 1.4.1-3-g733a5


From 0bd16446db5fdc57eeaf40d66c8791589dc8b647 Mon Sep 17 00:00:00 2001
From: Aleksey Kladov <aleksey.kladov@gmail.com>
Date: Tue, 28 Jan 2020 20:53:17 +0000
Subject: Fix race condition in feature cache on 32 platforms (#837)

* Fix race condition in feature cache on 32 platforms

If we observe that the second word is initialized, we can't really
assume that the first is initialized as well. So check each word
separately.

* Use stronger atomic ordering

Better SeqCst than sorry!

* Use two caches on x64 for simplicity
---
 .../stdarch/crates/std_detect/src/detect/cache.rs  | 99 ++++++++--------------
 1 file changed, 36 insertions(+), 63 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index fe2abf5cf5e..d421037f691 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -5,11 +5,7 @@
 
 use crate::sync::atomic::Ordering;
 
-#[cfg(target_pointer_width = "64")]
-use crate::sync::atomic::AtomicU64;
-
-#[cfg(target_pointer_width = "32")]
-use crate::sync::atomic::AtomicU32;
+use crate::sync::atomic::AtomicUsize;
 
 /// Sets the `bit` of `x`.
 #[inline]
@@ -30,7 +26,7 @@ const fn unset_bit(x: u64, bit: u32) -> u64 {
 }
 
 /// Maximum number of features that can be cached.
-const CACHE_CAPACITY: u32 = 63;
+const CACHE_CAPACITY: u32 = 62;
 
 /// This type is used to initialize the cache
 #[derive(Copy, Clone)]
@@ -81,83 +77,48 @@ impl Initializer {
 }
 
 /// This global variable is a cache of the features supported by the CPU.
-static CACHE: Cache = Cache::uninitialized();
+// Note: on x64, we only use the first slot
+static CACHE: [Cache; 2] = [Cache::uninitialized(), Cache::uninitialized()];
 
-/// Feature cache with capacity for `CACHE_CAPACITY` features.
+/// Feature cache with capacity for `usize::max_value() - 1` features.
 ///
 /// Note: the last feature bit is used to represent an
 /// uninitialized cache.
-#[cfg(target_pointer_width = "64")]
-struct Cache(AtomicU64);
+///
+/// Note: we can use `Relaxed` atomic operations, because we are only interested
+/// in the effects of operations on a single memory location. That is, we only
+/// need "modification order", and not the full-blown "happens before". However,
+/// we use `SeqCst` just to be on the safe side.
+struct Cache(AtomicUsize);
 
-#[cfg(target_pointer_width = "64")]
-#[allow(clippy::use_self)]
 impl Cache {
+    const CAPACITY: u32 = (core::mem::size_of::<usize>() * 8 - 1) as u32;
+    const MASK: usize = (1 << Cache::CAPACITY) - 1;
+
     /// Creates an uninitialized cache.
     #[allow(clippy::declare_interior_mutable_const)]
     const fn uninitialized() -> Self {
-        Cache(AtomicU64::new(u64::max_value()))
+        Cache(AtomicUsize::new(usize::max_value()))
     }
     /// Is the cache uninitialized?
     #[inline]
     pub(crate) fn is_uninitialized(&self) -> bool {
-        self.0.load(Ordering::Relaxed) == u64::max_value()
+        self.0.load(Ordering::SeqCst) == usize::max_value()
     }
 
     /// Is the `bit` in the cache set?
     #[inline]
     pub(crate) fn test(&self, bit: u32) -> bool {
-        test_bit(CACHE.0.load(Ordering::Relaxed), bit)
+        test_bit(self.0.load(Ordering::SeqCst) as u64, bit)
     }
 
     /// Initializes the cache.
     #[inline]
-    pub(crate) fn initialize(&self, value: Initializer) {
-        self.0.store(value.0, Ordering::Relaxed);
+    fn initialize(&self, value: usize) {
+        self.0.store(value, Ordering::SeqCst);
     }
 }
 
-/// Feature cache with capacity for `CACHE_CAPACITY` features.
-///
-/// Note: the last feature bit is used to represent an
-/// uninitialized cache.
-#[cfg(target_pointer_width = "32")]
-struct Cache(AtomicU32, AtomicU32);
-
-#[cfg(target_pointer_width = "32")]
-impl Cache {
-    /// Creates an uninitialized cache.
-    const fn uninitialized() -> Self {
-        Cache(
-            AtomicU32::new(u32::max_value()),
-            AtomicU32::new(u32::max_value()),
-        )
-    }
-    /// Is the cache uninitialized?
-    #[inline]
-    pub(crate) fn is_uninitialized(&self) -> bool {
-        self.1.load(Ordering::Relaxed) == u32::max_value()
-    }
-
-    /// Is the `bit` in the cache set?
-    #[inline]
-    pub(crate) fn test(&self, bit: u32) -> bool {
-        if bit < 32 {
-            test_bit(CACHE.0.load(Ordering::Relaxed) as u64, bit)
-        } else {
-            test_bit(CACHE.1.load(Ordering::Relaxed) as u64, bit - 32)
-        }
-    }
-
-    /// Initializes the cache.
-    #[inline]
-    pub(crate) fn initialize(&self, value: Initializer) {
-        let lo: u32 = value.0 as u32;
-        let hi: u32 = (value.0 >> 32) as u32;
-        self.0.store(lo, Ordering::Relaxed);
-        self.1.store(hi, Ordering::Relaxed);
-    }
-}
 cfg_if::cfg_if! {
     if #[cfg(feature = "std_detect_env_override")] {
         #[inline(never)]
@@ -167,16 +128,22 @@ cfg_if::cfg_if! {
                     let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32));
                 }
             }
-            CACHE.initialize(value);
+            do_initialize(value);
         }
     } else {
         #[inline]
         fn initialize(value: Initializer) {
-            CACHE.initialize(value);
+            do_initialize(value);
         }
     }
 }
 
+#[inline]
+fn do_initialize(value: Initializer) {
+    CACHE[0].initialize((value.0) as usize & Cache::MASK);
+    CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK);
+}
+
 /// Tests the `bit` of the storage. If the storage has not been initialized,
 /// initializes it with the result of `f()`.
 ///
@@ -194,8 +161,14 @@ pub(crate) fn test<F>(bit: u32, f: F) -> bool
 where
     F: FnOnce() -> Initializer,
 {
-    if CACHE.is_uninitialized() {
-        initialize(f());
+    let (bit, idx) = if bit < Cache::CAPACITY {
+        (bit, 0)
+    } else {
+        (bit - Cache::CAPACITY, 1)
+    };
+
+    if CACHE[idx].is_uninitialized() {
+        initialize(f())
     }
-    CACHE.test(bit)
+    CACHE[idx].test(bit)
 }
-- 
cgit 1.4.1-3-g733a5


From e367bcd7f90ea5e0e655d94e19480fa1eddbc35e Mon Sep 17 00:00:00 2001
From: Jack O'Connor <oconnor663@gmail.com>
Date: Thu, 19 Mar 2020 10:29:50 -0400
Subject: re-stabilize the AVX-512 features that were stabilized in Rust 1.27.0
 (#842)

* re-stabilize the AVX-512 features that were stabilized in Rust 1.27.0

https://github.com/rust-lang/stdarch/pull/739 added per-feature
stabilization of runtime CPU feature detection. In so doing, it
de-stabilized some detection features that had been stable since Rust
1.27.0, breaking some published crates (on nightly). This commit
re-stabilizes the subset of AVX-512 detection features that were
included in 1.27.0 (that is, the pre-Ice-Lake subset). Other instruction
sets (MMX in particular) remain de-stabilized, pending a decision about
whether should ever stabilize them.

See https://github.com/rust-lang/rust/issues/68905.

* add a comment explaining feature detection stability

* adjust stabilizations to match most recent proposal

https://github.com/rust-lang/rust/issues/68905#issuecomment-595376319
---
 .../crates/std_detect/src/detect/arch/x86.rs       | 24 +++++++++++++---------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 9a61e5e7683..578910054ed 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -92,7 +92,7 @@ features! {
     /// RDSEED
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] tsc: "tsc";
     /// TSC (Time Stamp Counter)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] mmx: "mmx";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] mmx: "mmx";
     /// MMX (MultiMedia eXtensions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse: "sse";
     /// SSE (Streaming SIMD Extensions)
@@ -106,7 +106,7 @@ features! {
     /// SSE4.1 (Streaming SIMD Extensions 4.1)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4_2: "sse4.2";
     /// SSE4.2 (Streaming SIMD Extensions 4.2)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sse4a: "sse4a";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4a: "sse4a";
     /// SSE4a (Streaming SIMD Extensions 4a)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sha: "sha";
     /// SHA
@@ -114,19 +114,23 @@ features! {
     /// AVX (Advanced Vector Extensions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx2: "avx2";
     /// AVX2 (Advanced Vector Extensions 2)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512f: "avx512f" ;
+    // Detection for the AVX-512 features below was accidentally stabilized in
+    // Rust 1.27.0, even though the corresponding intrinsics are still unstable
+    // or unimplemeted. There are stable callers who rely on detection support,
+    // e.g. to call AVX-512 C code via FFI.
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512f: "avx512f" ;
     /// AVX-512 F (Foundation)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512cd: "avx512cd" ;
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512cd: "avx512cd" ;
     /// AVX-512 CD (Conflict Detection Instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512er: "avx512er";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512er: "avx512er";
     /// AVX-512 ER (Expo nential and Reciprocal Instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512pf: "avx512pf";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512pf: "avx512pf";
     /// AVX-512 PF (Prefetch Instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512bw: "avx512bw";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bw: "avx512bw";
     /// AVX-512 BW (Byte and Word Instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512dq: "avx512dq";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512dq: "avx512dq";
     /// AVX-512 DQ (Doubleword and Quadword)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vl: "avx512vl";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vl: "avx512vl";
     /// AVX-512 VL (Vector Length Extensions)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512ifma: "avx512ifma";
     /// AVX-512 IFMA (Integer Fused Multiply Add)
@@ -161,7 +165,7 @@ features! {
     /// BMI2 (Bit Manipulation Instructions 2)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] lzcnt: "lzcnt";
     /// ABM (Advanced Bit Manipulation) / LZCNT (Leading Zero Count)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] tbm: "tbm";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] tbm: "tbm";
     /// TBM (Trailing Bit Manipulation)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] popcnt: "popcnt";
     /// POPCNT (Population Count)
-- 
cgit 1.4.1-3-g733a5


From 09ef01ade10c9d878062eb93f230de8042b4d3ec Mon Sep 17 00:00:00 2001
From: Makoto Kato <m_kato@ga2.so-net.ne.jp>
Date: Sun, 29 Mar 2020 20:28:17 +0900
Subject: Add crypto target feature detection to arm32 (#833)

---
 library/stdarch/crates/std_detect/src/detect/arch/arm.rs |  2 ++
 .../stdarch/crates/std_detect/src/detect/os/linux/arm.rs | 16 ++++++++++++++++
 library/stdarch/crates/std_detect/tests/cpu-detection.rs |  1 +
 3 files changed, 19 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index 96978b70098..46fd56384be 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -16,4 +16,6 @@ features! {
     /// Polynomial Multiply
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crc: "crc";
     /// CRC32 (Cyclic Redundancy Check)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crypto: "crypto";
+    /// Crypto: AES + PMULL + SHA1 + SHA2
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
index f55bc30b849..4b0cb586bba 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
@@ -20,6 +20,14 @@ pub(crate) fn detect_features() -> cache::Initializer {
         enable_feature(&mut value, Feature::neon, bit::test(auxv.hwcap, 12));
         enable_feature(&mut value, Feature::pmull, bit::test(auxv.hwcap2, 1));
         enable_feature(&mut value, Feature::crc, bit::test(auxv.hwcap2, 4));
+        enable_feature(
+            &mut value,
+            Feature::crypto,
+            bit::test(auxv.hwcap2, 0)
+                && bit::test(auxv.hwcap2, 1)
+                && bit::test(auxv.hwcap2, 2)
+                && bit::test(auxv.hwcap2, 3),
+        );
         return value;
     }
 
@@ -31,6 +39,14 @@ pub(crate) fn detect_features() -> cache::Initializer {
         );
         enable_feature(&mut value, Feature::pmull, c.field("Features").has("pmull"));
         enable_feature(&mut value, Feature::crc, c.field("Features").has("crc32"));
+        enable_feature(
+            &mut value,
+            Feature::crypto,
+            c.field("Features").has("aes")
+                && c.field("Features").has("pmull")
+                && c.field("Features").has("sha1")
+                && c.field("Features").has("sha2"),
+        );
         return value;
     }
     value
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 9be40542c96..b59ed32e12d 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -25,6 +25,7 @@ fn arm_linux() {
     println!("neon: {}", is_arm_feature_detected!("neon"));
     println!("pmull: {}", is_arm_feature_detected!("pmull"));
     println!("crc: {}", is_arm_feature_detected!("crc"));
+    println!("crypto: {}", is_arm_feature_detected!("crypto"));
 }
 
 #[test]
-- 
cgit 1.4.1-3-g733a5


From b852344de56d0998c13dd64ca6578ff8fa2370c9 Mon Sep 17 00:00:00 2001
From: Linus Färnstrand <faern@faern.net>
Date: Sun, 29 Mar 2020 18:08:21 +0200
Subject: Replace module MIN/MAX and min/max_value() with assoc consts (#843)

---
 library/stdarch/crates/core_arch/src/wasm32/memory.rs  |  2 +-
 library/stdarch/crates/core_arch/src/wasm32/simd128.rs | 12 ++++++------
 library/stdarch/crates/core_arch/src/x86/sse.rs        |  4 ++--
 library/stdarch/crates/core_arch/src/x86_64/sse.rs     |  4 ++--
 library/stdarch/crates/std_detect/src/detect/cache.rs  |  2 +-
 5 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/wasm32/memory.rs b/library/stdarch/crates/core_arch/src/wasm32/memory.rs
index f2c7fa54c3d..3df8abdee29 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/memory.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/memory.rs
@@ -41,7 +41,7 @@ pub fn memory_size(mem: u32) -> usize {
 /// by the specified `delta` of pages. The current WebAssembly page size is
 /// 65536 bytes (64 KB). If memory is successfully grown then the previous size
 /// of memory, in pages, is returned. If memory cannot be grown then
-/// `usize::max_value()` is returned.
+/// `usize::MAX` is returned.
 ///
 /// The argument `mem` is the numerical index of which memory to return the
 /// size of. Note that currently the WebAssembly specification only supports one
diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
index c7377e23a20..ed281f2e74e 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@@ -1059,7 +1059,7 @@ pub fn i8x16_add(a: v128, b: v128) -> v128 {
 }
 
 /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit signed
-/// integers, saturating on overflow to `i8::max_value()`.
+/// integers, saturating on overflow to `i8::MAX`.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.add_saturate_s))]
 pub fn i8x16_add_saturate_s(a: v128, b: v128) -> v128 {
@@ -1067,7 +1067,7 @@ pub fn i8x16_add_saturate_s(a: v128, b: v128) -> v128 {
 }
 
 /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit unsigned
-/// integers, saturating on overflow to `u8::max_value()`.
+/// integers, saturating on overflow to `u8::MAX`.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.add_saturate_u))]
 pub fn i8x16_add_saturate_u(a: v128, b: v128) -> v128 {
@@ -1082,7 +1082,7 @@ pub fn i8x16_sub(a: v128, b: v128) -> v128 {
 }
 
 /// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit
-/// signed integers, saturating on overflow to `i8::min_value()`.
+/// signed integers, saturating on overflow to `i8::MIN`.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.sub_saturate_s))]
 pub fn i8x16_sub_saturate_s(a: v128, b: v128) -> v128 {
@@ -1169,7 +1169,7 @@ pub fn i16x8_add(a: v128, b: v128) -> v128 {
 }
 
 /// Adds two 128-bit vectors as if they were two packed eight 16-bit signed
-/// integers, saturating on overflow to `i16::max_value()`.
+/// integers, saturating on overflow to `i16::MAX`.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.add_saturate_s))]
 pub fn i16x8_add_saturate_s(a: v128, b: v128) -> v128 {
@@ -1177,7 +1177,7 @@ pub fn i16x8_add_saturate_s(a: v128, b: v128) -> v128 {
 }
 
 /// Adds two 128-bit vectors as if they were two packed eight 16-bit unsigned
-/// integers, saturating on overflow to `u16::max_value()`.
+/// integers, saturating on overflow to `u16::MAX`.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.add_saturate_u))]
 pub fn i16x8_add_saturate_u(a: v128, b: v128) -> v128 {
@@ -1192,7 +1192,7 @@ pub fn i16x8_sub(a: v128, b: v128) -> v128 {
 }
 
 /// Subtracts two 128-bit vectors as if they were two packed eight 16-bit
-/// signed integers, saturating on overflow to `i16::min_value()`.
+/// signed integers, saturating on overflow to `i16::MIN`.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.sub_saturate_s))]
 pub fn i16x8_sub_saturate_s(a: v128, b: v128) -> v128 {
diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index b07971c5007..cf6dfd01a8e 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -790,7 +790,7 @@ pub unsafe fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 {
 ///
 /// The result is rounded according to the current rounding mode. If the result
 /// cannot be represented as a 32 bit integer the result will be `0x8000_0000`
-/// (`std::i32::MIN`) or an invalid operation floating point exception if
+/// (`i32::MIN`) or an invalid operation floating point exception if
 /// unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)).
 ///
 /// This corresponds to the `CVTSS2SI` instruction (with 32 bit output).
@@ -821,7 +821,7 @@ pub unsafe fn _mm_cvt_ss2si(a: __m128) -> i32 {
 ///
 /// The result is rounded always using truncation (round towards zero). If the
 /// result cannot be represented as a 32 bit integer the result will be
-/// `0x8000_0000` (`std::i32::MIN`) or an invalid operation floating point
+/// `0x8000_0000` (`i32::MIN`) or an invalid operation floating point
 /// exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)).
 ///
 /// This corresponds to the `CVTTSS2SI` instruction (with 32 bit output).
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse.rs b/library/stdarch/crates/core_arch/src/x86_64/sse.rs
index ec09282463e..3888da6ebf7 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse.rs
@@ -19,7 +19,7 @@ extern "C" {
 ///
 /// The result is rounded according to the current rounding mode. If the result
 /// cannot be represented as a 64 bit integer the result will be
-/// `0x8000_0000_0000_0000` (`std::i64::MIN`) or trigger an invalid operation
+/// `0x8000_0000_0000_0000` (`i64::MIN`) or trigger an invalid operation
 /// floating point exception if unmasked (see
 /// [`_mm_setcsr`](fn._mm_setcsr.html)).
 ///
@@ -39,7 +39,7 @@ pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 {
 ///
 /// The result is rounded always using truncation (round towards zero). If the
 /// result cannot be represented as a 64 bit integer the result will be
-/// `0x8000_0000_0000_0000` (`std::i64::MIN`) or an invalid operation floating
+/// `0x8000_0000_0000_0000` (`i64::MIN`) or an invalid operation floating
 /// point exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)).
 ///
 /// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output).
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index d421037f691..a9123c27234 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -80,7 +80,7 @@ impl Initializer {
 // Note: on x64, we only use the first slot
 static CACHE: [Cache; 2] = [Cache::uninitialized(), Cache::uninitialized()];
 
-/// Feature cache with capacity for `usize::max_value() - 1` features.
+/// Feature cache with capacity for `usize::MAX - 1` features.
 ///
 /// Note: the last feature bit is used to represent an
 /// uninitialized cache.
-- 
cgit 1.4.1-3-g733a5


From f14b74631908897066f63d08a31e2567b4dfb64b Mon Sep 17 00:00:00 2001
From: Linus Färnstrand <faern@faern.net>
Date: Sat, 4 Apr 2020 17:41:16 +0200
Subject: Replace all max/min_value() with MAX/MIN

---
 library/stdarch/crates/core_arch/src/mips/msa.rs   | 490 ++++++++++-----------
 .../crates/core_arch/src/powerpc/altivec.rs        |  42 +-
 .../stdarch/crates/core_arch/src/wasm32/simd128.rs |  52 +--
 library/stdarch/crates/core_arch/src/x86/adx.rs    |   6 +-
 library/stdarch/crates/core_arch/src/x86/mmx.rs    |  26 +-
 library/stdarch/crates/core_arch/src/x86/sse.rs    |   6 +-
 library/stdarch/crates/core_arch/src/x86_64/adx.rs |   6 +-
 .../stdarch/crates/std_detect/src/detect/cache.rs  |   4 +-
 library/stdarch/examples/wasm.rs                   |   2 +-
 9 files changed, 317 insertions(+), 317 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/mips/msa.rs b/library/stdarch/crates/core_arch/src/mips/msa.rs
index 29009b5d7c6..187d84c89f5 100644
--- a/library/stdarch/crates/core_arch/src/mips/msa.rs
+++ b/library/stdarch/crates/core_arch/src/mips/msa.rs
@@ -9319,10 +9319,10 @@ mod tests {
     unsafe fn test_msa_adds_a_b() {
         #[rustfmt::skip]
         let a = i8x16::new(
-            100, i8::max_value(), 100, i8::max_value(),
-            100, i8::max_value(), 100, i8::max_value(),
-            100, i8::max_value(), 100, i8::max_value(),
-            100, i8::max_value(), 100, i8::max_value()
+            100, i8::MAX, 100, i8::MAX,
+            100, i8::MAX, 100, i8::MAX,
+            100, i8::MAX, 100, i8::MAX,
+            100, i8::MAX, 100, i8::MAX
         );
         #[rustfmt::skip]
         let b = i8x16::new(
@@ -9349,15 +9349,15 @@ mod tests {
     unsafe fn test_msa_adds_a_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            100, i16::max_value(), 100, i16::max_value(), 
-            100, i16::max_value(), 100, i16::max_value()
+            100, i16::MAX, 100, i16::MAX, 
+            100, i16::MAX, 100, i16::MAX
         );
         #[rustfmt::skip]
         let b = i16x8::new(-4, -3, -2, -1, -4, -3, -2, -1);
         #[rustfmt::skip]
         let r = i16x8::new(
-            104, i16::max_value(), 102, i16::max_value(),
-            104, i16::max_value(), 102, i16::max_value()
+            104, i16::MAX, 102, i16::MAX,
+            104, i16::MAX, 102, i16::MAX
         );
 
         assert_eq!(
@@ -9369,11 +9369,11 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_adds_a_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(100, i32::max_value(), 100, i32::max_value());
+        let a = i32x4::new(100, i32::MAX, 100, i32::MAX);
         #[rustfmt::skip]
         let b = i32x4::new(-4, -3, -2, -1);
         #[rustfmt::skip]
-        let r = i32x4::new(104, i32::max_value(), 102, i32::max_value());
+        let r = i32x4::new(104, i32::MAX, 102, i32::MAX);
 
         assert_eq!(
             r,
@@ -9384,11 +9384,11 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_adds_a_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(100, i64::max_value());
+        let a = i64x2::new(100, i64::MAX);
         #[rustfmt::skip]
         let b = i64x2::new(-4, -3);
         #[rustfmt::skip]
-        let r = i64x2::new(104, i64::max_value());
+        let r = i64x2::new(104, i64::MAX);
 
         assert_eq!(
             r,
@@ -9400,10 +9400,10 @@ mod tests {
     unsafe fn test_msa_adds_s_b() {
         #[rustfmt::skip]
         let a = i8x16::new(
-            100, i8::min_value(), 100, i8::max_value(),
-            100, i8::min_value(), 100, i8::max_value(),
-            100, i8::min_value(), 100, i8::max_value(),
-            100, i8::min_value(), 100, i8::max_value()
+            100, i8::MIN, 100, i8::MAX,
+            100, i8::MIN, 100, i8::MAX,
+            100, i8::MIN, 100, i8::MAX,
+            100, i8::MIN, 100, i8::MAX
         );
         #[rustfmt::skip]
         let b = i8x16::new(
@@ -9414,10 +9414,10 @@ mod tests {
         );
         #[rustfmt::skip]
         let r = i8x16::new(
-            96, i8::min_value(), 98, i8::max_value(), 
-            96, i8::min_value(), 98, i8::max_value(), 
-            96, i8::min_value(), 98, i8::max_value(), 
-            96, i8::min_value(), 98, i8::max_value()
+            96, i8::MIN, 98, i8::MAX, 
+            96, i8::MIN, 98, i8::MAX, 
+            96, i8::MIN, 98, i8::MAX, 
+            96, i8::MIN, 98, i8::MAX
         );
 
         assert_eq!(
@@ -9430,15 +9430,15 @@ mod tests {
     unsafe fn test_msa_adds_s_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            100, i16::min_value(), 100, i16::max_value(), 
-            100, i16::min_value(), 100, i16::max_value()
+            100, i16::MIN, 100, i16::MAX, 
+            100, i16::MIN, 100, i16::MAX
         );
         #[rustfmt::skip]
         let b = i16x8::new(-4, -3, -2, 1, -4, -3, -2, 1);
         #[rustfmt::skip]
         let r = i16x8::new(
-            96, i16::min_value(), 98, i16::max_value(), 
-            96, i16::min_value(), 98, i16::max_value()
+            96, i16::MIN, 98, i16::MAX, 
+            96, i16::MIN, 98, i16::MAX
         );
 
         assert_eq!(
@@ -9450,11 +9450,11 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_adds_s_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(100, i32::max_value(), 100, i32::min_value());
+        let a = i32x4::new(100, i32::MAX, 100, i32::MIN);
         #[rustfmt::skip]
         let b = i32x4::new(-4, 3, -2, -1);
         #[rustfmt::skip]
-        let r = i32x4::new(96, i32::max_value(), 98, i32::min_value());
+        let r = i32x4::new(96, i32::MAX, 98, i32::MIN);
 
         assert_eq!(
             r,
@@ -9465,11 +9465,11 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_adds_s_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(100, i64::min_value());
+        let a = i64x2::new(100, i64::MIN);
         #[rustfmt::skip]
         let b = i64x2::new(-4, -3);
         #[rustfmt::skip]
-        let r = i64x2::new(96, i64::min_value());
+        let r = i64x2::new(96, i64::MIN);
 
         assert_eq!(
             r,
@@ -9481,10 +9481,10 @@ mod tests {
     unsafe fn test_msa_adds_u_b() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            100, u8::max_value(), 100, u8::max_value(),
-            100, u8::max_value(), 100, u8::max_value(),
-            100, u8::max_value(), 100, u8::max_value(),
-            100, u8::max_value(), 100, u8::max_value()
+            100, u8::MAX, 100, u8::MAX,
+            100, u8::MAX, 100, u8::MAX,
+            100, u8::MAX, 100, u8::MAX,
+            100, u8::MAX, 100, u8::MAX
         );
         #[rustfmt::skip]
         let b = u8x16::new(
@@ -9495,10 +9495,10 @@ mod tests {
         );
         #[rustfmt::skip]
         let r = u8x16::new(
-            104, u8::max_value(), 102, u8::max_value(), 
-            104, u8::max_value(), 102, u8::max_value(), 
-            104, u8::max_value(), 102, u8::max_value(), 
-            104, u8::max_value(), 102, u8::max_value()
+            104, u8::MAX, 102, u8::MAX, 
+            104, u8::MAX, 102, u8::MAX, 
+            104, u8::MAX, 102, u8::MAX, 
+            104, u8::MAX, 102, u8::MAX
         );
 
         assert_eq!(
@@ -9511,15 +9511,15 @@ mod tests {
     unsafe fn test_msa_adds_u_h() {
         #[rustfmt::skip]
         let a = u16x8::new(
-            100, u16::max_value(), 100, u16::max_value(), 
-            100, u16::max_value(), 100, u16::max_value()
+            100, u16::MAX, 100, u16::MAX, 
+            100, u16::MAX, 100, u16::MAX
         );
         #[rustfmt::skip]
         let b = u16x8::new(4, 3, 2, 1, 4, 3, 2, 1);
         #[rustfmt::skip]
         let r = u16x8::new(
-            104, u16::max_value(), 102, u16::max_value(), 
-            104, u16::max_value(), 102, u16::max_value()
+            104, u16::MAX, 102, u16::MAX, 
+            104, u16::MAX, 102, u16::MAX
         );
 
         assert_eq!(
@@ -9531,11 +9531,11 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_adds_u_w() {
         #[rustfmt::skip]
-        let a = u32x4::new(100, u32::max_value(), 100, u32::max_value());
+        let a = u32x4::new(100, u32::MAX, 100, u32::MAX);
         #[rustfmt::skip]
         let b = u32x4::new(4, 3, 2, 1);
         #[rustfmt::skip]
-        let r = u32x4::new(104, u32::max_value(), 102, u32::max_value());
+        let r = u32x4::new(104, u32::MAX, 102, u32::MAX);
 
         assert_eq!(
             r,
@@ -9546,11 +9546,11 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_adds_u_d() {
         #[rustfmt::skip]
-        let a = u64x2::new(100, u64::max_value());
+        let a = u64x2::new(100, u64::MAX);
         #[rustfmt::skip]
         let b = u64x2::new(4, 3);
         #[rustfmt::skip]
-        let r = u64x2::new(104, u64::max_value());
+        let r = u64x2::new(104, u64::MAX);
 
         assert_eq!(
             r,
@@ -9562,10 +9562,10 @@ mod tests {
     unsafe fn test_msa_addv_b() {
         #[rustfmt::skip]
         let a = i8x16::new(
-            100, i8::min_value(), 100, i8::max_value(),
-            100, i8::min_value(), 100, i8::max_value(),
-            100, i8::min_value(), 100, i8::max_value(),
-            100, i8::min_value(), 100, i8::max_value()
+            100, i8::MIN, 100, i8::MAX,
+            100, i8::MIN, 100, i8::MAX,
+            100, i8::MIN, 100, i8::MAX,
+            100, i8::MIN, 100, i8::MAX
         );
         #[rustfmt::skip]
         let b = i8x16::new(
@@ -9592,8 +9592,8 @@ mod tests {
     unsafe fn test_msa_addv_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            100, i16::min_value(), 100, i16::max_value(), 
-            100, i16::min_value(), 100, i16::max_value()
+            100, i16::MIN, 100, i16::MAX, 
+            100, i16::MIN, 100, i16::MAX
         );
         #[rustfmt::skip]
         let b = i16x8::new(-4, -3, -2, 1, -4, -3, -2, 1);
@@ -9609,7 +9609,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_addv_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(100, i32::max_value(), 100, i32::min_value());
+        let a = i32x4::new(100, i32::MAX, 100, i32::MIN);
         #[rustfmt::skip]
         let b = i32x4::new(-4, 3, -2, -1);
         #[rustfmt::skip]
@@ -9624,7 +9624,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_addv_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(100, i64::min_value());
+        let a = i64x2::new(100, i64::MIN);
         #[rustfmt::skip]
         let b = i64x2::new(-4, -3);
         #[rustfmt::skip]
@@ -9640,10 +9640,10 @@ mod tests {
     unsafe fn test_msa_addvi_b() {
         #[rustfmt::skip]
         let a = i8x16::new(
-            100, i8::max_value(), 100, i8::max_value(),
-            100, i8::max_value(), 100, i8::max_value(),
-            100, i8::max_value(), 100, i8::max_value(),
-            100, i8::max_value(), 100, i8::max_value()
+            100, i8::MAX, 100, i8::MAX,
+            100, i8::MAX, 100, i8::MAX,
+            100, i8::MAX, 100, i8::MAX,
+            100, i8::MAX, 100, i8::MAX
         );
         #[rustfmt::skip]
         let r = i8x16::new(
@@ -9660,8 +9660,8 @@ mod tests {
     unsafe fn test_msa_addvi_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            i16::max_value(), 3276, -100, -127, 
-            i16::max_value(), 3276, -100, -127
+            i16::MAX, 3276, -100, -127, 
+            i16::MAX, 3276, -100, -127
         );
         #[rustfmt::skip]
         let r = i16x8::new(
@@ -9675,7 +9675,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_addvi_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(100, i32::max_value(), 100, i32::min_value());
+        let a = i32x4::new(100, i32::MAX, 100, i32::MIN);
         #[rustfmt::skip]
         let r = i32x4::new(103, -2147483646, 103, -2147483645);
 
@@ -9685,7 +9685,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_addvi_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(100, i64::min_value());
+        let a = i64x2::new(100, i64::MIN);
         #[rustfmt::skip]
         let r = i64x2::new(117, -9223372036854775791);
 
@@ -9696,10 +9696,10 @@ mod tests {
     unsafe fn test_msa_and_v() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            100, u8::max_value(), 100, u8::max_value(),
-            100, u8::max_value(), 100, u8::max_value(),
-            100, u8::max_value(), 100, u8::max_value(),
-            100, u8::max_value(), 100, u8::max_value()
+            100, u8::MAX, 100, u8::MAX,
+            100, u8::MAX, 100, u8::MAX,
+            100, u8::MAX, 100, u8::MAX,
+            100, u8::MAX, 100, u8::MAX
     );
         #[rustfmt::skip]
         let b = u8x16::new(
@@ -9726,10 +9726,10 @@ mod tests {
     unsafe fn test_msa_andi_b() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            100, u8::max_value(), 100, u8::max_value(),
-            100, u8::max_value(), 100, u8::max_value(),
-            100, u8::max_value(), 100, u8::max_value(),
-            100, u8::max_value(), 100, u8::max_value()
+            100, u8::MAX, 100, u8::MAX,
+            100, u8::MAX, 100, u8::MAX,
+            100, u8::MAX, 100, u8::MAX,
+            100, u8::MAX, 100, u8::MAX
         );
         #[rustfmt::skip]
         let r = u8x16::new(
@@ -10437,10 +10437,10 @@ mod tests {
     unsafe fn test_msa_binsli_b() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1
         );
         #[rustfmt::skip]
         let b = u8x16::new(
@@ -10721,10 +10721,10 @@ mod tests {
     unsafe fn test_msa_bmnz_v() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1
         );
         #[rustfmt::skip]
         let b = u8x16::new(
@@ -10762,17 +10762,17 @@ mod tests {
     unsafe fn test_msa_bmnzi_b() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1
         );
         #[rustfmt::skip]
         let b = u8x16::new(
-            1, u8::max_value(), 155, 55,
-            1, u8::max_value(), 155, 55,
-            1, u8::max_value(), 155, 55,
-            1, u8::max_value(), 155, 55
+            1, u8::MAX, 155, 55,
+            1, u8::MAX, 155, 55,
+            1, u8::MAX, 155, 55,
+            1, u8::MAX, 155, 55
         );
         #[rustfmt::skip]
         let r = u8x16::new(
@@ -10792,10 +10792,10 @@ mod tests {
     unsafe fn test_msa_bmz_v() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1
         );
         #[rustfmt::skip]
         let b = u8x16::new(
@@ -10833,10 +10833,10 @@ mod tests {
     unsafe fn test_msa_bmzi_b() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1,
-            u8::max_value(), 155, 55, 1
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1,
+            u8::MAX, 155, 55, 1
         );
         #[rustfmt::skip]
         let b = u8x16::new(
@@ -10938,10 +10938,10 @@ mod tests {
     unsafe fn test_msa_bnegi_b() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            50, 100, 127, u8::max_value(),
-            50, 100, 127, u8::max_value(),
-            50, 100, 127, u8::max_value(),
-            50, 100, 127, u8::max_value()
+            50, 100, 127, u8::MAX,
+            50, 100, 127, u8::MAX,
+            50, 100, 127, u8::MAX,
+            50, 100, 127, u8::MAX
         );
         #[rustfmt::skip]
         let r = u8x16::new(
@@ -11518,17 +11518,17 @@ mod tests {
     unsafe fn test_msa_cle_u_b() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            u8::max_value(), 127, 55, 2,
-            u8::max_value(), 127, 55, 2,
-            u8::max_value(), 127, 55, 2,
-            u8::max_value(), 127, 55, 2
+            u8::MAX, 127, 55, 2,
+            u8::MAX, 127, 55, 2,
+            u8::MAX, 127, 55, 2,
+            u8::MAX, 127, 55, 2
         );
         #[rustfmt::skip]
         let b = u8x16::new(
-            u8::max_value(), 126, 55, 1,
-            u8::max_value(), 126, 55, 1,
-            u8::max_value(), 126, 55, 1,
-            u8::max_value(), 126, 55, 1
+            u8::MAX, 126, 55, 1,
+            u8::MAX, 126, 55, 1,
+            u8::MAX, 126, 55, 1,
+            u8::MAX, 126, 55, 1
         );
         #[rustfmt::skip]
         let r = i8x16::new(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0);
@@ -11543,13 +11543,13 @@ mod tests {
     unsafe fn test_msa_cle_u_h() {
         #[rustfmt::skip]
         let a = u16x8::new(
-            u16::max_value(), 155, 55, 2, 
-            u16::max_value(), 155, 55, 2
+            u16::MAX, 155, 55, 2, 
+            u16::MAX, 155, 55, 2
         );
         #[rustfmt::skip]
         let b = u16x8::new(
-            u16::max_value(), 155, 56, 1, 
-            u16::max_value(), 155, 56, 1
+            u16::MAX, 155, 56, 1, 
+            u16::MAX, 155, 56, 1
         );
         #[rustfmt::skip]
         let r = i16x8::new(-1, -1, -1, 0, -1, -1, -1, 0);
@@ -11563,9 +11563,9 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_cle_u_w() {
         #[rustfmt::skip]
-        let a = u32x4::new(u32::max_value(), 155, 55, 2);
+        let a = u32x4::new(u32::MAX, 155, 55, 2);
         #[rustfmt::skip]
-        let b = u32x4::new(u32::max_value(), 156, 55, 1);
+        let b = u32x4::new(u32::MAX, 156, 55, 1);
         #[rustfmt::skip]
         let r = i32x4::new(-1, -1, -1, 0);
 
@@ -11578,9 +11578,9 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_cle_u_d() {
         #[rustfmt::skip]
-        let a = u64x2::new(u64::max_value(), 155);
+        let a = u64x2::new(u64::MAX, 155);
         #[rustfmt::skip]
-        let b = u64x2::new(u64::max_value(), 156);
+        let b = u64x2::new(u64::MAX, 156);
         #[rustfmt::skip]
         let r = i64x2::new(-1, -1);
 
@@ -12041,7 +12041,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_copy_u_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(3, i64::max_value());
+        let a = i64x2::new(3, i64::MAX);
         #[rustfmt::skip]
         let r = 9223372036854775807 as u64;
 
@@ -14701,7 +14701,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_insert_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(3, i64::max_value());
+        let a = i64x2::new(3, i64::MAX);
         #[rustfmt::skip]
         let r = i64x2::new(3, 100);
 
@@ -14712,10 +14712,10 @@ mod tests {
     unsafe fn test_msa_insve_b() {
         #[rustfmt::skip]
         let a = i8x16::new(
-            -100, i8::max_value(), 4, i8::max_value(),
-            -100, i8::max_value(), 4, i8::max_value(),
-            -100, i8::max_value(), 4, i8::max_value(),
-            -100, i8::max_value(), 4, i8::max_value()
+            -100, i8::MAX, 4, i8::MAX,
+            -100, i8::MAX, 4, i8::MAX,
+            -100, i8::MAX, 4, i8::MAX,
+            -100, i8::MAX, 4, i8::MAX
         );
         #[rustfmt::skip]
         let b = i8x16::new(
@@ -14742,8 +14742,8 @@ mod tests {
     unsafe fn test_msa_insve_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            i16::max_value(), 3276, 100, 11,
-            i16::max_value(), 3276, 100, 11
+            i16::MAX, 3276, 100, 11,
+            i16::MAX, 3276, 100, 11
         );
         #[rustfmt::skip]
         let b = i16x8::new(
@@ -14780,7 +14780,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_insve_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(3, i64::max_value());
+        let a = i64x2::new(3, i64::MAX);
         #[rustfmt::skip]
         let b = i64x2::new(1, 2);
         #[rustfmt::skip]
@@ -14895,7 +14895,7 @@ mod tests {
     unsafe fn test_msa_madd_q_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            i16::max_value(), 1024, i16::min_value(), -1024,
+            i16::MAX, 1024, i16::MIN, -1024,
             1, 2, 3, 4
         );
         #[rustfmt::skip]
@@ -14905,7 +14905,7 @@ mod tests {
         );
         #[rustfmt::skip]
         let c = i16x8::new(
-            i16::max_value(), i16::max_value(), 1, -1,
+            i16::MAX, i16::MAX, 1, -1,
             33, 66, 99, 132
         );
         #[rustfmt::skip]
@@ -14924,7 +14924,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_madd_q_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(i32::max_value(), i32::min_value(), 1, 2);
+        let a = i32x4::new(i32::MAX, i32::MIN, 1, 2);
         #[rustfmt::skip]
         let b = i32x4::new(102401, 102401, 102401, 102401);
         #[rustfmt::skip]
@@ -14975,7 +14975,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_maddr_q_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(i32::max_value(), i32::min_value(), 1, 2);
+        let a = i32x4::new(i32::MAX, i32::MIN, 1, 2);
         #[rustfmt::skip]
         let b = i32x4::new(102401, 102401, 102401, 102401);
         #[rustfmt::skip]
@@ -16007,7 +16007,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_msubr_q_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(i32::max_value(), -2147483647, 1, 2);
+        let a = i32x4::new(i32::MAX, -2147483647, 1, 2);
         #[rustfmt::skip]
         let b = i32x4::new(10240, 10240, 10240, 10240);
         #[rustfmt::skip]
@@ -16154,8 +16154,8 @@ mod tests {
     unsafe fn test_msa_mul_q_w() {
         #[rustfmt::skip]
         let a = i32x4::new(
-            i32::max_value(), i32::max_value(),
-            i32::min_value(), i32::min_value()
+            i32::MAX, i32::MAX,
+            i32::MIN, i32::MIN
         );
         #[rustfmt::skip]
         let b = i32x4::new(30, 60, 30, 60);
@@ -16193,8 +16193,8 @@ mod tests {
     unsafe fn test_msa_mulr_q_w() {
         #[rustfmt::skip]
         let a = i32x4::new(
-            i32::max_value(), i32::max_value(),
-            i32::min_value(), i32::min_value()
+            i32::MAX, i32::MAX,
+            i32::MIN, i32::MIN
         );
         #[rustfmt::skip]
         let b = i32x4::new(30, 60, 30, 60);
@@ -16325,8 +16325,8 @@ mod tests {
     unsafe fn test_msa_nloc_w() {
         #[rustfmt::skip]
         let a = i32x4::new(
-            i32::min_value(), -1073741824,
-            1073741824, i32::max_value()
+            i32::MIN, -1073741824,
+            1073741824, i32::MAX
         );
         #[rustfmt::skip]
         let r = i32x4::new(1, 2, 0, 0);
@@ -16337,7 +16337,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_nloc_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(i64::min_value(), i64::max_value());
+        let a = i64x2::new(i64::MIN, i64::MAX);
         #[rustfmt::skip]
         let r = i64x2::new(1, 0);
 
@@ -16684,8 +16684,8 @@ mod tests {
     unsafe fn test_msa_pcnt_w() {
         #[rustfmt::skip]
         let a = i32x4::new(
-            i32::min_value(), -1073741824,
-            1073741824, i32::max_value()
+            i32::MIN, -1073741824,
+            1073741824, i32::MAX
         );
         #[rustfmt::skip]
         let r = i32x4::new(1, 2, 1, 31);
@@ -16707,10 +16707,10 @@ mod tests {
     unsafe fn test_msa_sat_s_b() {
         #[rustfmt::skip]
         let a = i8x16::new(
-            i8::max_value(), 105, 30, 1,
-            i8::max_value(), 105, 30, 1,
-            i8::max_value(), 105, 30, 1,
-            i8::max_value(), 105, 30, 1
+            i8::MAX, 105, 30, 1,
+            i8::MAX, 105, 30, 1,
+            i8::MAX, 105, 30, 1,
+            i8::MAX, 105, 30, 1
         );
         #[rustfmt::skip]
         let r = i8x16::new(
@@ -16727,8 +16727,8 @@ mod tests {
     unsafe fn test_msa_sat_s_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            i16::max_value(), 1155, 155, 1,
-            i16::max_value(), 1155, 155, 1
+            i16::MAX, 1155, 155, 1,
+            i16::MAX, 1155, 155, 1
         );
         #[rustfmt::skip]
         let r = i16x8::new(127, 127, 127, 1, 127, 127, 127, 1);
@@ -16739,7 +16739,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_sat_s_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(i32::max_value(), 111111155, i32::max_value(), 1);
+        let a = i32x4::new(i32::MAX, 111111155, i32::MAX, 1);
         #[rustfmt::skip]
         let r = i32x4::new(131071, 131071, 131071, 1);
 
@@ -16749,7 +16749,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_sat_s_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(i64::max_value(), 1);
+        let a = i64x2::new(i64::MAX, 1);
         #[rustfmt::skip]
         let r = i64x2::new(137438953471, 1);
 
@@ -16760,10 +16760,10 @@ mod tests {
     unsafe fn test_msa_sat_u_b() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            u8::max_value(), 105, 30, 1,
-            u8::max_value(), 105, 30, 1,
-            u8::max_value(), 105, 30, 1,
-            u8::max_value(), 105, 30, 1
+            u8::MAX, 105, 30, 1,
+            u8::MAX, 105, 30, 1,
+            u8::MAX, 105, 30, 1,
+            u8::MAX, 105, 30, 1
         );
         #[rustfmt::skip]
         let r = u8x16::new(
@@ -16780,8 +16780,8 @@ mod tests {
     unsafe fn test_msa_sat_u_h() {
         #[rustfmt::skip]
         let a = u16x8::new(
-            u16::max_value(), 1155, 155, 1,
-            u16::max_value(), 1155, 155, 1
+            u16::MAX, 1155, 155, 1,
+            u16::MAX, 1155, 155, 1
         );
         #[rustfmt::skip]
         let r = u16x8::new(255, 255, 155, 1, 255, 255, 155, 1);
@@ -16792,7 +16792,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_sat_u_w() {
         #[rustfmt::skip]
-        let a = u32x4::new(u32::max_value(), 111111155, u32::max_value(), 1);
+        let a = u32x4::new(u32::MAX, 111111155, u32::MAX, 1);
         #[rustfmt::skip]
         let r = u32x4::new(262143, 262143, 262143, 1);
 
@@ -16802,7 +16802,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_sat_u_d() {
         #[rustfmt::skip]
-        let a = u64x2::new(u64::max_value(), 1);
+        let a = u64x2::new(u64::MAX, 1);
         #[rustfmt::skip]
         let r = u64x2::new(274877906943, 1);
 
@@ -17293,7 +17293,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_sra_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(i32::min_value(), -1073741824, 1, 2);
+        let a = i32x4::new(i32::MIN, -1073741824, 1, 2);
         #[rustfmt::skip]
         let b = i32x4::new(16, 15, 16, 15);
         #[rustfmt::skip]
@@ -17308,7 +17308,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_sra_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(i64::min_value(), i64::max_value());
+        let a = i64x2::new(i64::MIN, i64::MAX);
         #[rustfmt::skip]
         let b = i64x2::new(32, 31);
         #[rustfmt::skip]
@@ -17324,10 +17324,10 @@ mod tests {
     unsafe fn test_msa_srai_b() {
         #[rustfmt::skip]
         let a = i8x16::new(
-            i8::max_value(), 125, 55, 1,
-            i8::max_value(), 125, 55, 1,
-            i8::max_value(), 125, 55, 1,
-            i8::max_value(), 125, 55, 1
+            i8::MAX, 125, 55, 1,
+            i8::MAX, 125, 55, 1,
+            i8::MAX, 125, 55, 1,
+            i8::MAX, 125, 55, 1
         );
         #[rustfmt::skip]
         let r = i8x16::new(
@@ -17344,8 +17344,8 @@ mod tests {
     unsafe fn test_msa_srai_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            i16::max_value(), 125, 55, 1, 
-            i16::max_value(), 125, 55, 1
+            i16::MAX, 125, 55, 1, 
+            i16::MAX, 125, 55, 1
         );
         #[rustfmt::skip]
         let r = i16x8::new(8191, 31, 13, 0, 8191, 31, 13, 0);
@@ -17356,7 +17356,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_srai_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(i32::max_value(), 125, 55, 1);
+        let a = i32x4::new(i32::MAX, 125, 55, 1);
         let r = i32x4::new(536870911, 31, 13, 0);
 
         assert_eq!(r, mem::transmute(__msa_srai_w(mem::transmute(a), 2)));
@@ -17365,7 +17365,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_srai_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(i64::max_value(), 55);
+        let a = i64x2::new(i64::MAX, 55);
         #[rustfmt::skip]
         let r = i64x2::new(2305843009213693951, 13);
 
@@ -17406,7 +17406,7 @@ mod tests {
     unsafe fn test_msa_srar_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            i16::min_value(), -16384, -8192, -4096,
+            i16::MIN, -16384, -8192, -4096,
             150, 50, 25, 15
         );
         #[rustfmt::skip]
@@ -17429,7 +17429,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_srar_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(i32::min_value(), -1073741824, 100, 50);
+        let a = i32x4::new(i32::MIN, -1073741824, 100, 50);
         #[rustfmt::skip]
         let b = i32x4::new(16, 15, 1, 2);
         #[rustfmt::skip]
@@ -17444,7 +17444,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_srar_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(i64::min_value(), i64::max_value());
+        let a = i64x2::new(i64::MIN, i64::MAX);
         #[rustfmt::skip]
         let b = i64x2::new(32, 31);
         #[rustfmt::skip]
@@ -17460,10 +17460,10 @@ mod tests {
     unsafe fn test_msa_srari_b() {
         #[rustfmt::skip]
         let a = i8x16::new(
-            125, i8::max_value(), 55, 1,
-            125, i8::max_value(), 55, 1,
-            125, i8::max_value(), 55, 1,
-            125, i8::max_value(), 55, 1
+            125, i8::MAX, 55, 1,
+            125, i8::MAX, 55, 1,
+            125, i8::MAX, 55, 1,
+            125, i8::MAX, 55, 1
         );
         #[rustfmt::skip]
         let r = i8x16::new(
@@ -17560,7 +17560,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_srl_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(i32::min_value(), -1073741824, 1, 2);
+        let a = i32x4::new(i32::MIN, -1073741824, 1, 2);
         #[rustfmt::skip]
         let b = i32x4::new(16, 15, 16, 15);
         #[rustfmt::skip]
@@ -17575,7 +17575,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_srl_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(i64::min_value(), i64::max_value());
+        let a = i64x2::new(i64::MIN, i64::MAX);
         #[rustfmt::skip]
         let b = i64x2::new(32, 31);
         #[rustfmt::skip]
@@ -17611,8 +17611,8 @@ mod tests {
     unsafe fn test_msa_srli_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            i16::max_value(), 3276, 100, 127,
-            i16::max_value(), 3276, 100, 127
+            i16::MAX, 3276, 100, 127,
+            i16::MAX, 3276, 100, 127
         );
         #[rustfmt::skip]
         let r = i16x8::new(
@@ -17626,7 +17626,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_srli_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(100, i32::max_value(), 100, i32::max_value());
+        let a = i32x4::new(100, i32::MAX, 100, i32::MAX);
         #[rustfmt::skip]
         let r = i32x4::new(25, 536870911, 25, 536870911);
 
@@ -17636,7 +17636,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_srli_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(100, i64::max_value());
+        let a = i64x2::new(100, i64::MAX);
         #[rustfmt::skip]
         let r = i64x2::new(50, 4611686018427387903);
 
@@ -17697,7 +17697,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_srlr_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(i32::min_value(), -1073741824, 1, 2);
+        let a = i32x4::new(i32::MIN, -1073741824, 1, 2);
         #[rustfmt::skip]
         let b = i32x4::new(16, 15, 16, 15);
         let r = i32x4::new(32768, 98304, 0, 0);
@@ -17711,7 +17711,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_srlr_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(i64::min_value(), i64::max_value());
+        let a = i64x2::new(i64::MIN, i64::MAX);
         #[rustfmt::skip]
         let b = i64x2::new(32, 31);
         #[rustfmt::skip]
@@ -17727,10 +17727,10 @@ mod tests {
     unsafe fn test_msa_srlri_b() {
         #[rustfmt::skip]
         let a = i8x16::new(
-            25, 50, 100, i8::max_value(),
-            25, 50, 100, i8::max_value(),
-            25, 50, 100, i8::max_value(),
-            25, 50, 100, i8::max_value()
+            25, 50, 100, i8::MAX,
+            25, 50, 100, i8::MAX,
+            25, 50, 100, i8::MAX,
+            25, 50, 100, i8::MAX
         );
         #[rustfmt::skip]
         let r = i8x16::new(
@@ -17747,8 +17747,8 @@ mod tests {
     unsafe fn test_msa_srlri_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            i16::max_value(), 3276, 100, 127,
-            i16::max_value(), 3276, 100, 127
+            i16::MAX, 3276, 100, 127,
+            i16::MAX, 3276, 100, 127
         );
         let r = i16x8::new(8192, 819, 25, 32, 8192, 819, 25, 32);
 
@@ -17758,7 +17758,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_srlri_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(100, 150, 200, i32::max_value());
+        let a = i32x4::new(100, 150, 200, i32::MAX);
         #[rustfmt::skip]
         let r = i32x4::new(25, 38, 50, 536870912);
 
@@ -17768,7 +17768,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_srlri_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(100, i64::max_value());
+        let a = i64x2::new(100, i64::MAX);
         #[rustfmt::skip]
         let r = i64x2::new(50, 4611686018427387904);
 
@@ -17839,10 +17839,10 @@ mod tests {
     unsafe fn test_msa_subs_s_b() {
         #[rustfmt::skip]
         let a = i8x16::new(
-            i8::min_value(), -2, -3, -4,
-            i8::min_value(), -2, -3, -4,
-            i8::min_value(), -2, -3, -4,
-            i8::min_value(), -2, -3, -4
+            i8::MIN, -2, -3, -4,
+            i8::MIN, -2, -3, -4,
+            i8::MIN, -2, -3, -4,
+            i8::MIN, -2, -3, -4
         );
         #[rustfmt::skip]
         let b = i8x16::new(
@@ -17853,10 +17853,10 @@ mod tests {
         );
         #[rustfmt::skip]
         let r = i8x16::new(
-            i8::min_value(), 5, -11, 5,
-            i8::min_value(), 5, -11, 5,
-            i8::min_value(), 5, -11, 5,
-            i8::min_value(), 5, -11, 5
+            i8::MIN, 5, -11, 5,
+            i8::MIN, 5, -11, 5,
+            i8::MIN, 5, -11, 5,
+            i8::MIN, 5, -11, 5
         );
 
         assert_eq!(
@@ -17869,15 +17869,15 @@ mod tests {
     unsafe fn test_msa_subs_s_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            i16::min_value(), -2, -3, -4,
-            i16::min_value(), -2, -3, -4
+            i16::MIN, -2, -3, -4,
+            i16::MIN, -2, -3, -4
         );
         #[rustfmt::skip]
         let b = i16x8::new(6, -7, 8, -9, 6, -7, 8, -9);
         #[rustfmt::skip]
         let r = i16x8::new(
-            i16::min_value(), 5, -11, 5,
-            i16::min_value(), 5, -11, 5
+            i16::MIN, 5, -11, 5,
+            i16::MIN, 5, -11, 5
         );
 
         assert_eq!(
@@ -17889,11 +17889,11 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_subs_s_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(i32::min_value(), -2, -3, -4);
+        let a = i32x4::new(i32::MIN, -2, -3, -4);
         #[rustfmt::skip]
         let b = i32x4::new(6, -7, 8, -9);
         #[rustfmt::skip]
-        let r = i32x4::new(i32::min_value(), 5, -11, 5);
+        let r = i32x4::new(i32::MIN, 5, -11, 5);
 
         assert_eq!(
             r,
@@ -17904,11 +17904,11 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_subs_s_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(i64::min_value(), -2);
+        let a = i64x2::new(i64::MIN, -2);
         #[rustfmt::skip]
         let b = i64x2::new(6, -7);
         #[rustfmt::skip]
-        let r = i64x2::new(i64::min_value(), 5);
+        let r = i64x2::new(i64::MIN, 5);
 
         assert_eq!(
             r,
@@ -17920,10 +17920,10 @@ mod tests {
     unsafe fn test_msa_subs_u_b() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            u8::max_value(), 2, 3, 4,
-            u8::max_value(), 2, 3, 4,
-            u8::max_value(), 2, 3, 4,
-            u8::max_value(), 2, 3, 4
+            u8::MAX, 2, 3, 4,
+            u8::MAX, 2, 3, 4,
+            u8::MAX, 2, 3, 4,
+            u8::MAX, 2, 3, 4
         );
         #[rustfmt::skip]
         let b = u8x16::new(
@@ -17950,8 +17950,8 @@ mod tests {
     unsafe fn test_msa_subs_u_h() {
         #[rustfmt::skip]
         let a = u16x8::new(
-            u16::max_value(), 2, 3, 4, 
-            u16::max_value(), 2, 3, 4
+            u16::MAX, 2, 3, 4, 
+            u16::MAX, 2, 3, 4
         );
         #[rustfmt::skip]
         let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9);
@@ -17967,7 +17967,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_subs_u_w() {
         #[rustfmt::skip]
-        let a = u32x4::new(u32::max_value(), 2, 3, 4);
+        let a = u32x4::new(u32::MAX, 2, 3, 4);
         #[rustfmt::skip]
         let b = u32x4::new(6, 7, 8, 9);
         #[rustfmt::skip]
@@ -17982,7 +17982,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_subs_u_d() {
         #[rustfmt::skip]
-        let a = u64x2::new(u64::max_value(), 2);
+        let a = u64x2::new(u64::MAX, 2);
         #[rustfmt::skip]
         let b = u64x2::new(6, 7);
         #[rustfmt::skip]
@@ -17998,10 +17998,10 @@ mod tests {
     unsafe fn test_msa_subsus_u_b() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            u8::max_value(), 2, 3, 4,
-            u8::max_value(), 2, 3, 4,
-            u8::max_value(), 2, 3, 4,
-            u8::max_value(), 2, 3, 4
+            u8::MAX, 2, 3, 4,
+            u8::MAX, 2, 3, 4,
+            u8::MAX, 2, 3, 4,
+            u8::MAX, 2, 3, 4
         );
         #[rustfmt::skip]
         let b = i8x16::new(
@@ -18028,8 +18028,8 @@ mod tests {
     unsafe fn test_msa_subsus_u_h() {
         #[rustfmt::skip]
         let a = u16x8::new(
-            u16::max_value(), 2, 3, 4,
-            u16::max_value(), 2, 3, 4
+            u16::MAX, 2, 3, 4,
+            u16::MAX, 2, 3, 4
         );
         #[rustfmt::skip]
         let b = i16x8::new(-6, -7, -8, -9, -6, -7, -8, -9);
@@ -18045,7 +18045,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_subsus_u_w() {
         #[rustfmt::skip]
-        let a = u32x4::new(u32::max_value(), 2, 3, 4);
+        let a = u32x4::new(u32::MAX, 2, 3, 4);
         #[rustfmt::skip]
         let b = i32x4::new(-6, -7, -8, -9);
         #[rustfmt::skip]
@@ -18060,7 +18060,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_subsus_u_d() {
         #[rustfmt::skip]
-        let a = u64x2::new(u64::max_value(), 2);
+        let a = u64x2::new(u64::MAX, 2);
         #[rustfmt::skip]
         let b = i64x2::new(-6, -7);
         #[rustfmt::skip]
@@ -18076,17 +18076,17 @@ mod tests {
     unsafe fn test_msa_subsuu_s_b() {
         #[rustfmt::skip]
         let a = u8x16::new(
-            u8::max_value(), 2, 3, 4,
-            u8::max_value(), 2, 3, 4,
-            u8::max_value(), 2, 3, 4,
-            u8::max_value(), 2, 3, 4
+            u8::MAX, 2, 3, 4,
+            u8::MAX, 2, 3, 4,
+            u8::MAX, 2, 3, 4,
+            u8::MAX, 2, 3, 4
         );
         #[rustfmt::skip]
         let b = u8x16::new(
-            6, 7, 8, u8::max_value(),
-            6, 7, 8, u8::max_value(),
-            6, 7, 8, u8::max_value(),
-            6, 7, 8, u8::max_value()
+            6, 7, 8, u8::MAX,
+            6, 7, 8, u8::MAX,
+            6, 7, 8, u8::MAX,
+            6, 7, 8, u8::MAX
         );
         #[rustfmt::skip]
         let r = i8x16::new(
@@ -18106,8 +18106,8 @@ mod tests {
     unsafe fn test_msa_subsuu_s_h() {
         #[rustfmt::skip]
         let a = u16x8::new(
-            u16::max_value(), 2, 3, 
-            4, u16::max_value(), 2, 3, 4
+            u16::MAX, 2, 3, 
+            4, u16::MAX, 2, 3, 4
         );
         #[rustfmt::skip]
         let b = u16x8::new(6, 7, 8, 65535, 6, 7, 8, 65535);
@@ -18123,7 +18123,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_subsuu_s_w() {
         #[rustfmt::skip]
-        let a = u32x4::new(u32::max_value(), 2, 3, 4);
+        let a = u32x4::new(u32::MAX, 2, 3, 4);
         #[rustfmt::skip]
         let b = u32x4::new(6, 7, 8, 4294967295);
         #[rustfmt::skip]
@@ -18138,11 +18138,11 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_subsuu_s_d() {
         #[rustfmt::skip]
-        let a = u64x2::new(u64::max_value(), 2);
+        let a = u64x2::new(u64::MAX, 2);
         #[rustfmt::skip]
         let b = u64x2::new(6, 7);
         #[rustfmt::skip]
-        let r = i64x2::new(i64::max_value(), -5);
+        let r = i64x2::new(i64::MAX, -5);
 
         assert_eq!(
             r,
@@ -18154,10 +18154,10 @@ mod tests {
     unsafe fn test_msa_subv_b() {
         #[rustfmt::skip]
         let a = i8x16::new(
-            i8::min_value(), -2, -3, -4,
-            i8::min_value(), -2, -3, -4,
-            i8::min_value(), -2, -3, -4,
-            i8::min_value(), -2, -3, -4
+            i8::MIN, -2, -3, -4,
+            i8::MIN, -2, -3, -4,
+            i8::MIN, -2, -3, -4,
+            i8::MIN, -2, -3, -4
         );
         #[rustfmt::skip]
         let b = i8x16::new(
@@ -18184,8 +18184,8 @@ mod tests {
     unsafe fn test_msa_subv_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            i16::min_value(), -2, -3, -4,
-            i16::min_value(), -2, -3, -4
+            i16::MIN, -2, -3, -4,
+            i16::MIN, -2, -3, -4
         );
         #[rustfmt::skip]
         let b = i16x8::new(6, -7, 8, -9, 6, -7, 8, -9);
@@ -18201,7 +18201,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_subv_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(i32::min_value(), -2, -3, -4);
+        let a = i32x4::new(i32::MIN, -2, -3, -4);
         #[rustfmt::skip]
         let b = i32x4::new(6, -7, 8, -9);
         #[rustfmt::skip]
@@ -18216,7 +18216,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_subv_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(i64::max_value(), -2);
+        let a = i64x2::new(i64::MAX, -2);
         #[rustfmt::skip]
         let b = i64x2::new(6, -7);
         #[rustfmt::skip]
@@ -18232,10 +18232,10 @@ mod tests {
     unsafe fn test_msa_subvi_b() {
         #[rustfmt::skip]
         let a = i8x16::new(
-            100, i8::max_value(), 50, i8::min_value(),
-            100, i8::max_value(), 50, i8::min_value(),
-            100, i8::max_value(), 50, i8::min_value(),
-            100, i8::max_value(), 50, i8::min_value()
+            100, i8::MAX, 50, i8::MIN,
+            100, i8::MAX, 50, i8::MIN,
+            100, i8::MAX, 50, i8::MIN,
+            100, i8::MAX, 50, i8::MIN
         );
         #[rustfmt::skip]
         let r = i8x16::new(
@@ -18252,8 +18252,8 @@ mod tests {
     unsafe fn test_msa_subvi_h() {
         #[rustfmt::skip]
         let a = i16x8::new(
-            i16::max_value(), 3276, -100, i16::min_value(),
-            i16::max_value(), 3276, -100, i16::min_value()
+            i16::MAX, 3276, -100, i16::MIN,
+            i16::MAX, 3276, -100, i16::MIN
         );
         #[rustfmt::skip]
         let r = i16x8::new(
@@ -18267,7 +18267,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_subvi_w() {
         #[rustfmt::skip]
-        let a = i32x4::new(100, 150, 200, i32::max_value());
+        let a = i32x4::new(100, 150, 200, i32::MAX);
         #[rustfmt::skip]
         let r = i32x4::new(95, 145, 195, 2147483642);
 
@@ -18277,7 +18277,7 @@ mod tests {
     #[simd_test(enable = "msa")]
     unsafe fn test_msa_subvi_d() {
         #[rustfmt::skip]
-        let a = i64x2::new(100, i64::max_value());
+        let a = i64x2::new(100, i64::MAX);
         #[rustfmt::skip]
         let r = i64x2::new(95, 9223372036854775802);
 
diff --git a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
index 3b6ee2ea19d..0ca927924e5 100644
--- a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
@@ -2045,32 +2045,32 @@ mod tests {
     }
 
     test_vec_avg! { test_vec_avg_i32x4, i32x4,
-    [i32::min_value(), i32::max_value(), 1, -1],
+    [i32::MIN, i32::MAX, 1, -1],
     [-1, 1, 1, -1],
     [-1073741824, 1073741824, 1, -1] }
 
     test_vec_avg! { test_vec_avg_u32x4, u32x4,
-    [u32::max_value(), 0, 1, 2],
+    [u32::MAX, 0, 1, 2],
     [2, 1, 0, 0],
     [2147483649, 1, 1, 1] }
 
     test_vec_avg! { test_vec_avg_i16x8, i16x8,
-    [i16::min_value(), i16::max_value(), 1, -1, 0, 0, 0, 0],
+    [i16::MIN, i16::MAX, 1, -1, 0, 0, 0, 0],
     [-1, 1, 1, -1, 0, 0, 0, 0],
     [-16384, 16384, 1, -1, 0, 0, 0, 0] }
 
     test_vec_avg! { test_vec_avg_u16x8, u16x8,
-    [u16::max_value(), 0, 1, 2, 0, 0, 0, 0],
+    [u16::MAX, 0, 1, 2, 0, 0, 0, 0],
     [2, 1, 0, 0, 0, 0, 0, 0],
     [32769, 1, 1, 1, 0, 0, 0, 0] }
 
     test_vec_avg! { test_vec_avg_i8x16, i8x16,
-    [i8::min_value(), i8::max_value(), 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+    [i8::MIN, i8::MAX, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
     [-1, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
     [-64, 64, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] }
 
     test_vec_avg! { test_vec_avg_u8x16, u8x16,
-    [u8::max_value(), 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+    [u8::MAX, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
     [2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
     [129, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] }
 
@@ -2081,36 +2081,36 @@ mod tests {
     }
 
     test_vec_adds! { test_vec_adds_i32x4, i32x4,
-    [i32::min_value(), i32::max_value(), 1, -1],
+    [i32::MIN, i32::MAX, 1, -1],
     [-1, 1, 1, -1],
-    [i32::min_value(), i32::max_value(), 2, -2] }
+    [i32::MIN, i32::MAX, 2, -2] }
 
     test_vec_adds! { test_vec_adds_u32x4, u32x4,
-    [u32::max_value(), 0, 1, 2],
+    [u32::MAX, 0, 1, 2],
     [2, 1, 0, 0],
-    [u32::max_value(), 1, 1, 2] }
+    [u32::MAX, 1, 1, 2] }
 
     test_vec_adds! { test_vec_adds_i16x8, i16x8,
-    [i16::min_value(), i16::max_value(), 1, -1, 0, 0, 0, 0],
+    [i16::MIN, i16::MAX, 1, -1, 0, 0, 0, 0],
     [-1, 1, 1, -1, 0, 0, 0, 0],
-    [i16::min_value(), i16::max_value(), 2, -2, 0, 0, 0, 0] }
+    [i16::MIN, i16::MAX, 2, -2, 0, 0, 0, 0] }
 
     test_vec_adds! { test_vec_adds_u16x8, u16x8,
-    [u16::max_value(), 0, 1, 2, 0, 0, 0, 0],
+    [u16::MAX, 0, 1, 2, 0, 0, 0, 0],
     [2, 1, 0, 0, 0, 0, 0, 0],
-    [u16::max_value(), 1, 1, 2, 0, 0, 0, 0] }
+    [u16::MAX, 1, 1, 2, 0, 0, 0, 0] }
 
     test_vec_adds! { test_vec_adds_i8x16, i8x16,
-    [i8::min_value(), i8::max_value(), 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+    [i8::MIN, i8::MAX, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
     [-1, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-    [i8::min_value(), i8::max_value(), 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] }
+    [i8::MIN, i8::MAX, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] }
 
     test_vec_adds! { test_vec_adds_u8x16, u8x16,
-    [u8::max_value(), 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+    [u8::MAX, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
     [2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-    [u8::max_value(), 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] }
+    [u8::MAX, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] }
 
-    test_vec_2! { test_vec_addc, vec_addc, u32x4, [u32::max_value(), 0, 0, 0], [1, 1, 1, 1], [1, 0, 0, 0] }
+    test_vec_2! { test_vec_addc, vec_addc, u32x4, [u32::MAX, 0, 0, 0], [1, 1, 1, 1], [1, 0, 0, 0] }
 
     macro_rules! test_vec_abs {
         { $name: ident, $ty: ident, $a: expr, $d: expr } => {
@@ -2482,9 +2482,9 @@ mod tests {
         ));
         let b: vector_signed_short = transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
         let c: vector_signed_short =
-            transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, i16::max_value() - 1));
+            transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, i16::MAX - 1));
 
-        let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, i16::max_value());
+        let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, i16::MAX);
 
         assert_eq!(d, transmute(vec_mradds(a, b, c)));
     }
diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
index ed281f2e74e..5ac01a4faed 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@@ -1862,23 +1862,23 @@ pub mod tests {
 
         test_i32x4_add => {
             [0i32, 0, 0, 0] (+ | i32x4_add) [1, 2, 3, 4],
-            [1i32, 1283, i32::max_value(), i32::min_value()]
+            [1i32, 1283, i32::MAX, i32::MIN]
                 (+ | i32x4_add)
-            [i32::max_value(); 4],
+            [i32::MAX; 4],
         }
 
         test_i32x4_sub => {
             [0i32, 0, 0, 0] (- | i32x4_sub) [1, 2, 3, 4],
-            [1i32, 1283, i32::max_value(), i32::min_value()]
+            [1i32, 1283, i32::MAX, i32::MIN]
                 (- | i32x4_sub)
-            [i32::max_value(); 4],
+            [i32::MAX; 4],
         }
 
         test_i32x4_mul => {
             [0i32, 0, 0, 0] (* | i32x4_mul) [1, 2, 3, 4],
-            [1i32, 1283, i32::max_value(), i32::min_value()]
+            [1i32, 1283, i32::MAX, i32::MIN]
                 (* | i32x4_mul)
-            [i32::max_value(); 4],
+            [i32::MAX; 4],
         }
 
         // TODO: test_i64x2_add
@@ -1904,7 +1904,7 @@ pub mod tests {
 
         test_i32x4_neg => {
             (- | i32x4_neg) [1i32, 2, 3, 4],
-            (- | i32x4_neg) [i32::min_value(), i32::max_value(), 0, 4],
+            (- | i32x4_neg) [i32::MIN, i32::MAX, 0, 4],
         }
 
         // TODO: test_i64x2_neg
@@ -2042,10 +2042,10 @@ pub mod tests {
     //
     //
     // test_bops!(i8x16[i8; 16] | shl[i8x16_shl_test]:
-    //            ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+    //            ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
     //            [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]);
     // test_bops!(i16x8[i16; 8] | shl[i16x8_shl_test]:
-    //            ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) =>
+    //            ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
     //            [0, -2, 4, 6, 8, 10, 12, -2]);
     // test_bops!(i32x4[i32; 4] | shl[i32x4_shl_test]:
     //            ([0, -1, 2, 3], 1) => [0, -2, 4, 6]);
@@ -2053,32 +2053,32 @@ pub mod tests {
     //            ([0, -1], 1) => [0, -2]);
     //
     // test_bops!(i8x16[i8; 16] | shr_s[i8x16_shr_s_test]:
-    //            ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+    //            ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
     //            [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
     // test_bops!(i16x8[i16; 8] | shr_s[i16x8_shr_s_test]:
-    //            ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) =>
-    //            [0, -1, 1, 1, 2, 2, 3, i16::max_value() / 2]);
+    //            ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
+    //            [0, -1, 1, 1, 2, 2, 3, i16::MAX / 2]);
     // test_bops!(i32x4[i32; 4] | shr_s[i32x4_shr_s_test]:
     //            ([0, -1, 2, 3], 1) => [0, -1, 1, 1]);
     // test_bops!(i64x2[i64; 2] | shr_s[i64x2_shr_s_test]:
     //            ([0, -1], 1) => [0, -1]);
     //
     // test_bops!(i8x16[i8; 16] | shr_u[i8x16_uhr_u_test]:
-    //            ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
-    //            [0, i8::max_value(), 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
+    //            ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+    //            [0, i8::MAX, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
     // test_bops!(i16x8[i16; 8] | shr_u[i16x8_uhr_u_test]:
-    //            ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) =>
-    //            [0, i16::max_value(), 1, 1, 2, 2, 3, i16::max_value() / 2]);
+    //            ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
+    //            [0, i16::MAX, 1, 1, 2, 2, 3, i16::MAX / 2]);
     // test_bops!(i32x4[i32; 4] | shr_u[i32x4_uhr_u_test]:
-    //            ([0, -1, 2, 3], 1) => [0, i32::max_value(), 1, 1]);
+    //            ([0, -1, 2, 3], 1) => [0, i32::MAX, 1, 1]);
     // test_bops!(i64x2[i64; 2] | shr_u[i64x2_uhr_u_test]:
-    //            ([0, -1], 1) => [0, i64::max_value()]);
+    //            ([0, -1], 1) => [0, i64::MAX]);
     //
     // #[wasm_bindgen_test]
     // fn v128_bitwise_logical_ops() {
     //     unsafe {
-    //         let a: [u32; 4] = [u32::max_value(), 0, u32::max_value(), 0];
-    //         let b: [u32; 4] = [u32::max_value(); 4];
+    //         let a: [u32; 4] = [u32::MAX, 0, u32::MAX, 0];
+    //         let b: [u32; 4] = [u32::MAX; 4];
     //         let c: [u32; 4] = [0; 4];
     //
     //         let vec_a: v128 = transmute(a);
@@ -2312,8 +2312,8 @@ pub mod tests {
     //     f32x4_convert_u_i32x4
     //         | convert_u_i32x4
     //         | f32x4
-    //         | [u32::max_value(), 2, 3, 4],
-    //     [u32::max_value() as f32, 2., 3., 4.]
+    //         | [u32::MAX, 2, 3, 4],
+    //     [u32::MAX as f32, 2., 3., 4.]
     // );
     // test_conv!(
     //     f64x2_convert_s_i64x2 | convert_s_i64x2 | f64x2 | [1_i64, 2],
@@ -2323,12 +2323,12 @@ pub mod tests {
     //     f64x2_convert_u_i64x2
     //         | convert_u_i64x2
     //         | f64x2
-    //         | [u64::max_value(), 2],
+    //         | [u64::MAX, 2],
     //     [18446744073709552000.0, 2.]
     // );
     //
     // // FIXME: this fails, and produces -2147483648 instead of saturating at
-    // // i32::max_value() test_conv!(i32x4_trunc_s_f32x4_sat | trunc_s_f32x4_sat
-    // // | i32x4 | [1_f32, 2., (i32::max_value() as f32 + 1.), 4.],
-    // // [1_i32, 2, i32::max_value(), 4]); FIXME: add other saturating tests
+    // // i32::MAX test_conv!(i32x4_trunc_s_f32x4_sat | trunc_s_f32x4_sat
+    // // | i32x4 | [1_f32, 2., (i32::MAX as f32 + 1.), 4.],
+    // // [1_i32, 2, i32::MAX, 4]); FIXME: add other saturating tests
 }
diff --git a/library/stdarch/crates/core_arch/src/x86/adx.rs b/library/stdarch/crates/core_arch/src/x86/adx.rs
index 6df321c049b..15fcec8a53f 100644
--- a/library/stdarch/crates/core_arch/src/x86/adx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/adx.rs
@@ -55,7 +55,7 @@ mod tests {
     #[test]
     fn test_addcarry_u32() {
         unsafe {
-            let a = u32::max_value();
+            let a = u32::MAX;
             let mut out = 0;
 
             let r = _addcarry_u32(0, a, 1, &mut out);
@@ -86,7 +86,7 @@ mod tests {
 
     #[simd_test(enable = "adx")]
     unsafe fn test_addcarryx_u32() {
-        let a = u32::max_value();
+        let a = u32::MAX;
         let mut out = 0;
 
         let r = _addcarryx_u32(0, a, 1, &mut out);
@@ -127,7 +127,7 @@ mod tests {
     #[test]
     fn test_subborrow_u32() {
         unsafe {
-            let a = u32::max_value();
+            let a = u32::MAX;
             let mut out = 0;
 
             let r = _subborrow_u32(0, 0, 1, &mut out);
diff --git a/library/stdarch/crates/core_arch/src/x86/mmx.rs b/library/stdarch/crates/core_arch/src/x86/mmx.rs
index ff4f8277e25..2ac95002752 100644
--- a/library/stdarch/crates/core_arch/src/x86/mmx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mmx.rs
@@ -552,8 +552,8 @@ mod tests {
     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_add_pi16() {
         let a = _mm_setr_pi16(-1, -1, 1, 1);
-        let b = _mm_setr_pi16(i16::min_value() + 1, 30001, -30001, i16::max_value() - 1);
-        let e = _mm_setr_pi16(i16::min_value(), 30000, -30000, i16::max_value());
+        let b = _mm_setr_pi16(i16::MIN + 1, 30001, -30001, i16::MAX - 1);
+        let e = _mm_setr_pi16(i16::MIN, 30000, -30000, i16::MAX);
         assert_eq_m64(e, _mm_add_pi16(a, b));
         assert_eq_m64(e, _m_paddw(a, b));
     }
@@ -561,8 +561,8 @@ mod tests {
     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_add_pi32() {
         let a = _mm_setr_pi32(1, -1);
-        let b = _mm_setr_pi32(i32::max_value() - 1, i32::min_value() + 1);
-        let e = _mm_setr_pi32(i32::max_value(), i32::min_value());
+        let b = _mm_setr_pi32(i32::MAX - 1, i32::MIN + 1);
+        let e = _mm_setr_pi32(i32::MAX, i32::MIN);
         assert_eq_m64(e, _mm_add_pi32(a, b));
         assert_eq_m64(e, _m_paddd(a, b));
     }
@@ -571,7 +571,7 @@ mod tests {
     unsafe fn test_mm_adds_pi8() {
         let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0);
         let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1);
-        let e = _mm_setr_pi8(i8::min_value(), 0, 0, i8::max_value(), -1, -1, 1, 1);
+        let e = _mm_setr_pi8(i8::MIN, 0, 0, i8::MAX, -1, -1, 1, 1);
         assert_eq_m64(e, _mm_adds_pi8(a, b));
         assert_eq_m64(e, _m_paddsb(a, b));
     }
@@ -580,7 +580,7 @@ mod tests {
     unsafe fn test_mm_adds_pi16() {
         let a = _mm_setr_pi16(-32000, 32000, 4, 0);
         let b = _mm_setr_pi16(-32000, 32000, -5, 1);
-        let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), -1, 1);
+        let e = _mm_setr_pi16(i16::MIN, i16::MAX, -1, 1);
         assert_eq_m64(e, _mm_adds_pi16(a, b));
         assert_eq_m64(e, _m_paddsw(a, b));
     }
@@ -589,7 +589,7 @@ mod tests {
     unsafe fn test_mm_adds_pu8() {
         let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 200u8 as i8);
         let b = _mm_setr_pi8(0, 10, 20, 30, 40, 50, 60, 200u8 as i8);
-        let e = _mm_setr_pi8(0, 11, 22, 33, 44, 55, 66, u8::max_value() as i8);
+        let e = _mm_setr_pi8(0, 11, 22, 33, 44, 55, 66, u8::MAX as i8);
         assert_eq_m64(e, _mm_adds_pu8(a, b));
         assert_eq_m64(e, _m_paddusb(a, b));
     }
@@ -598,7 +598,7 @@ mod tests {
     unsafe fn test_mm_adds_pu16() {
         let a = _mm_setr_pi16(0, 1, 2, 60000u16 as i16);
         let b = _mm_setr_pi16(0, 10, 20, 60000u16 as i16);
-        let e = _mm_setr_pi16(0, 11, 22, u16::max_value() as i16);
+        let e = _mm_setr_pi16(0, 11, 22, u16::MAX as i16);
         assert_eq_m64(e, _mm_adds_pu16(a, b));
         assert_eq_m64(e, _m_paddusw(a, b));
     }
@@ -633,11 +633,11 @@ mod tests {
     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_subs_pi8() {
         let a = _mm_setr_pi8(-100, 100, 0, 0, 0, 0, -5, 5);
-        let b = _mm_setr_pi8(100, -100, i8::min_value(), 127, -1, 1, 3, -3);
+        let b = _mm_setr_pi8(100, -100, i8::MIN, 127, -1, 1, 3, -3);
         let e = _mm_setr_pi8(
-            i8::min_value(),
-            i8::max_value(),
-            i8::max_value(),
+            i8::MIN,
+            i8::MAX,
+            i8::MAX,
             -127,
             1,
             -1,
@@ -652,7 +652,7 @@ mod tests {
     unsafe fn test_mm_subs_pi16() {
         let a = _mm_setr_pi16(-20000, 20000, 0, 0);
         let b = _mm_setr_pi16(20000, -20000, -1, 1);
-        let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), 1, -1);
+        let e = _mm_setr_pi16(i16::MIN, i16::MAX, 1, -1);
         assert_eq_m64(e, _mm_subs_pi16(a, b));
         assert_eq_m64(e, _m_psubsw(a, b));
     }
diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index cf6dfd01a8e..1b2b8349b14 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -3414,7 +3414,7 @@ mod tests {
     #[simd_test(enable = "sse")]
     unsafe fn test_mm_cvtss_si32() {
         let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
-        let result = &[42i32, -3, i32::min_value(), 0, i32::min_value(), 2147483520];
+        let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520];
         for i in 0..inputs.len() {
             let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
             let e = result[i];
@@ -3436,9 +3436,9 @@ mod tests {
             (-34.5, -34),
             (10.999, 10),
             (-5.99, -5),
-            (4.0e10, i32::min_value()),
+            (4.0e10, i32::MIN),
             (4.0e-10, 0),
-            (NAN, i32::min_value()),
+            (NAN, i32::MIN),
             (2147483500.1, 2147483520),
         ];
         for i in 0..inputs.len() {
diff --git a/library/stdarch/crates/core_arch/src/x86_64/adx.rs b/library/stdarch/crates/core_arch/src/x86_64/adx.rs
index 57efe75ddd7..a54d711369c 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/adx.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/adx.rs
@@ -55,7 +55,7 @@ mod tests {
     #[test]
     fn test_addcarry_u64() {
         unsafe {
-            let a = u64::max_value();
+            let a = u64::MAX;
             let mut out = 0;
 
             let r = _addcarry_u64(0, a, 1, &mut out);
@@ -86,7 +86,7 @@ mod tests {
 
     #[simd_test(enable = "adx")]
     unsafe fn test_addcarryx_u64() {
-        let a = u64::max_value();
+        let a = u64::MAX;
         let mut out = 0;
 
         let r = _addcarry_u64(0, a, 1, &mut out);
@@ -117,7 +117,7 @@ mod tests {
     #[test]
     fn test_subborrow_u64() {
         unsafe {
-            let a = u64::max_value();
+            let a = u64::MAX;
             let mut out = 0;
 
             let r = _subborrow_u64(0, 0, 1, &mut out);
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index a9123c27234..6bcbace4bbb 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -98,12 +98,12 @@ impl Cache {
     /// Creates an uninitialized cache.
     #[allow(clippy::declare_interior_mutable_const)]
     const fn uninitialized() -> Self {
-        Cache(AtomicUsize::new(usize::max_value()))
+        Cache(AtomicUsize::new(usize::MAX))
     }
     /// Is the cache uninitialized?
     #[inline]
     pub(crate) fn is_uninitialized(&self) -> bool {
-        self.0.load(Ordering::SeqCst) == usize::max_value()
+        self.0.load(Ordering::SeqCst) == usize::MAX
     }
 
     /// Is the `bit` in the cache set?
diff --git a/library/stdarch/examples/wasm.rs b/library/stdarch/examples/wasm.rs
index e6006df06ec..53f9c55d4e4 100644
--- a/library/stdarch/examples/wasm.rs
+++ b/library/stdarch/examples/wasm.rs
@@ -23,7 +23,7 @@ pub unsafe extern "C" fn page_alloc() -> *mut u8 {
     let ret = memory_grow(0, 1);
 
     // if we failed to allocate a page then return null
-    if ret == usize::max_value() {
+    if ret == usize::MAX {
         return ptr::null_mut();
     }
 
-- 
cgit 1.4.1-3-g733a5


From 04c1a9a9e95b8d6ee4aa28e927b62653663f2df5 Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Thu, 9 Apr 2020 00:05:10 +0100
Subject: Use llvm_asm! instead of asm! (#846)

---
 .../stdarch/crates/assert-instr-macro/src/lib.rs   |  2 +-
 .../crates/core_arch/src/acle/barrier/cp15.rs      |  6 +--
 library/stdarch/crates/core_arch/src/acle/hints.rs |  4 +-
 .../crates/core_arch/src/acle/registers/mod.rs     |  8 +--
 .../stdarch/crates/core_arch/src/arm/armclang.rs   |  4 +-
 library/stdarch/crates/core_arch/src/lib.rs        |  2 +-
 library/stdarch/crates/core_arch/src/x86/bt.rs     | 32 +++++------
 library/stdarch/crates/core_arch/src/x86/cpuid.rs  | 62 +++++++++++-----------
 library/stdarch/crates/core_arch/src/x86/eflags.rs |  8 +--
 library/stdarch/crates/core_arch/src/x86/xsave.rs  |  2 +-
 library/stdarch/crates/core_arch/src/x86_64/bt.rs  | 32 +++++------
 .../crates/std_detect/src/detect/os/aarch64.rs     |  6 +--
 library/stdarch/crates/std_detect/src/lib.rs       |  2 +-
 13 files changed, 85 insertions(+), 85 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/assert-instr-macro/src/lib.rs b/library/stdarch/crates/assert-instr-macro/src/lib.rs
index 8198aebc209..75fe9851cae 100644
--- a/library/stdarch/crates/assert-instr-macro/src/lib.rs
+++ b/library/stdarch/crates/assert-instr-macro/src/lib.rs
@@ -139,7 +139,7 @@ pub fn assert_instr(
 
             // Make sure that the shim is not removed by leaking it to unknown
             // code:
-            unsafe { asm!("" : : "r"(#shim_name as usize) : "memory" : "volatile") };
+            unsafe { llvm_asm!("" : : "r"(#shim_name as usize) : "memory" : "volatile") };
 
             ::stdarch_test::assert(#shim_name as usize,
                                    stringify!(#shim_name),
diff --git a/library/stdarch/crates/core_arch/src/acle/barrier/cp15.rs b/library/stdarch/crates/core_arch/src/acle/barrier/cp15.rs
index 7938acbbb4a..a4d101ad827 100644
--- a/library/stdarch/crates/core_arch/src/acle/barrier/cp15.rs
+++ b/library/stdarch/crates/core_arch/src/acle/barrier/cp15.rs
@@ -8,20 +8,20 @@ pub struct SY;
 impl super::super::sealed::Dmb for SY {
     #[inline(always)]
     unsafe fn __dmb(&self) {
-        asm!("mcr p15, 0, r0, c7, c10, 5" : : : "memory" : "volatile")
+        llvm_asm!("mcr p15, 0, r0, c7, c10, 5" : : : "memory" : "volatile")
     }
 }
 
 impl super::super::sealed::Dsb for SY {
     #[inline(always)]
     unsafe fn __dsb(&self) {
-        asm!("mcr p15, 0, r0, c7, c10, 4" : : : "memory" : "volatile")
+        llvm_asm!("mcr p15, 0, r0, c7, c10, 4" : : : "memory" : "volatile")
     }
 }
 
 impl super::super::sealed::Isb for SY {
     #[inline(always)]
     unsafe fn __isb(&self) {
-        asm!("mcr p15, 0, r0, c7, c5, 4" : : : "memory" : "volatile")
+        llvm_asm!("mcr p15, 0, r0, c7, c5, 4" : : : "memory" : "volatile")
     }
 }
diff --git a/library/stdarch/crates/core_arch/src/acle/hints.rs b/library/stdarch/crates/core_arch/src/acle/hints.rs
index 20faed69cb8..892d4f517f5 100644
--- a/library/stdarch/crates/core_arch/src/acle/hints.rs
+++ b/library/stdarch/crates/core_arch/src/acle/hints.rs
@@ -85,7 +85,7 @@ pub unsafe fn __yield() {
 pub unsafe fn __dbg(imm4: u32) {
     macro_rules! call {
         ($imm4:expr) => {
-            asm!(concat!("DBG ", stringify!($imm4)) : : : : "volatile")
+            llvm_asm!(concat!("DBG ", stringify!($imm4)) : : : : "volatile")
         }
     }
 
@@ -117,7 +117,7 @@ pub unsafe fn __dbg(imm4: u32) {
 /// will increase execution time.
 #[inline(always)]
 pub unsafe fn __nop() {
-    asm!("NOP" : : : : "volatile")
+    llvm_asm!("NOP" : : : : "volatile")
 }
 
 extern "C" {
diff --git a/library/stdarch/crates/core_arch/src/acle/registers/mod.rs b/library/stdarch/crates/core_arch/src/acle/registers/mod.rs
index 73fcc2c7b05..391a5f08241 100644
--- a/library/stdarch/crates/core_arch/src/acle/registers/mod.rs
+++ b/library/stdarch/crates/core_arch/src/acle/registers/mod.rs
@@ -4,7 +4,7 @@ macro_rules! rsr {
         impl super::super::sealed::Rsr for $R {
             unsafe fn __rsr(&self) -> u32 {
                 let r: u32;
-                asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile");
+                llvm_asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile");
                 r
             }
         }
@@ -17,7 +17,7 @@ macro_rules! rsrp {
         impl super::super::sealed::Rsrp for $R {
             unsafe fn __rsrp(&self) -> *const u8 {
                 let r: *const u8;
-                asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile");
+                llvm_asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile");
                 r
             }
         }
@@ -29,7 +29,7 @@ macro_rules! wsr {
     ($R:ident) => {
         impl super::super::sealed::Wsr for $R {
             unsafe fn __wsr(&self, value: u32) {
-                asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile");
+                llvm_asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile");
             }
         }
     };
@@ -40,7 +40,7 @@ macro_rules! wsrp {
     ($R:ident) => {
         impl super::super::sealed::Wsrp for $R {
             unsafe fn __wsrp(&self, value: *const u8) {
-                asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile");
+                llvm_asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile");
             }
         }
     };
diff --git a/library/stdarch/crates/core_arch/src/arm/armclang.rs b/library/stdarch/crates/core_arch/src/arm/armclang.rs
index 2e0a82ade3e..2604727984a 100644
--- a/library/stdarch/crates/core_arch/src/arm/armclang.rs
+++ b/library/stdarch/crates/core_arch/src/arm/armclang.rs
@@ -51,14 +51,14 @@ pub unsafe fn __breakpoint(val: i32) {
     #[cfg(target_arch = "arm")]
     macro_rules! call {
         ($imm8:expr) => {
-            asm!(concat!("BKPT ", stringify!($imm8)) : : : : "volatile")
+            llvm_asm!(concat!("BKPT ", stringify!($imm8)) : : : : "volatile")
         }
     }
 
     #[cfg(target_arch = "aarch64")]
     macro_rules! call {
         ($imm8:expr) => {
-            asm!(concat!("BRK ", stringify!($imm8)) : : : : "volatile")
+            llvm_asm!(concat!("BRK ", stringify!($imm8)) : : : : "volatile")
         }
     }
 
diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index 3be5859671c..fae4519a0eb 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -9,7 +9,7 @@
     platform_intrinsics,
     repr_simd,
     simd_ffi,
-    asm,
+    llvm_asm,
     proc_macro_hygiene,
     stmt_expr_attributes,
     core_intrinsics,
diff --git a/library/stdarch/crates/core_arch/src/x86/bt.rs b/library/stdarch/crates/core_arch/src/x86/bt.rs
index 6e42828dd7c..dc172f6f38b 100644
--- a/library/stdarch/crates/core_arch/src/x86/bt.rs
+++ b/library/stdarch/crates/core_arch/src/x86/bt.rs
@@ -7,10 +7,10 @@ use stdarch_test::assert_instr;
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittest(p: *const i32, b: i32) -> u8 {
     let r: u8;
-    asm!("btl $2, $1\n\tsetc ${0:b}"
-         : "=r"(r)
-         : "*m"(p), "r"(b)
-         : "cc", "memory");
+    llvm_asm!("btl $2, $1\n\tsetc ${0:b}"
+              : "=r"(r)
+              : "*m"(p), "r"(b)
+              : "cc", "memory");
     r
 }
 
@@ -20,10 +20,10 @@ pub unsafe fn _bittest(p: *const i32, b: i32) -> u8 {
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittestandset(p: *mut i32, b: i32) -> u8 {
     let r: u8;
-    asm!("btsl $2, $1\n\tsetc ${0:b}"
-         : "=r"(r), "+*m"(p)
-         : "r"(b)
-         : "cc", "memory");
+    llvm_asm!("btsl $2, $1\n\tsetc ${0:b}"
+              : "=r"(r), "+*m"(p)
+              : "r"(b)
+              : "cc", "memory");
     r
 }
 
@@ -33,10 +33,10 @@ pub unsafe fn _bittestandset(p: *mut i32, b: i32) -> u8 {
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittestandreset(p: *mut i32, b: i32) -> u8 {
     let r: u8;
-    asm!("btrl $2, $1\n\tsetc ${0:b}"
-         : "=r"(r), "+*m"(p)
-         : "r"(b)
-         : "cc", "memory");
+    llvm_asm!("btrl $2, $1\n\tsetc ${0:b}"
+              : "=r"(r), "+*m"(p)
+              : "r"(b)
+              : "cc", "memory");
     r
 }
 
@@ -46,10 +46,10 @@ pub unsafe fn _bittestandreset(p: *mut i32, b: i32) -> u8 {
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittestandcomplement(p: *mut i32, b: i32) -> u8 {
     let r: u8;
-    asm!("btcl $2, $1\n\tsetc ${0:b}"
-         : "=r"(r), "+*m"(p)
-         : "r"(b)
-         : "cc", "memory");
+    llvm_asm!("btcl $2, $1\n\tsetc ${0:b}"
+              : "=r"(r), "+*m"(p)
+              : "r"(b)
+              : "cc", "memory");
     r
 }
 
diff --git a/library/stdarch/crates/core_arch/src/x86/cpuid.rs b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
index f313c428122..5291aca0924 100644
--- a/library/stdarch/crates/core_arch/src/x86/cpuid.rs
+++ b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
@@ -57,18 +57,18 @@ pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
     let edx;
     #[cfg(target_arch = "x86")]
     {
-        asm!("cpuid"
-             : "={eax}"(eax), "={ebx}"(ebx), "={ecx}"(ecx), "={edx}"(edx)
-             : "{eax}"(leaf), "{ecx}"(sub_leaf)
-             : :);
+        llvm_asm!("cpuid"
+                  : "={eax}"(eax), "={ebx}"(ebx), "={ecx}"(ecx), "={edx}"(edx)
+                  : "{eax}"(leaf), "{ecx}"(sub_leaf)
+                  : :);
     }
     #[cfg(target_arch = "x86_64")]
     {
         // x86-64 uses %rbx as the base register, so preserve it.
-        asm!("cpuid"
-             : "={eax}"(eax), "={ebx}"(ebx), "={ecx}"(ecx), "={edx}"(edx)
-             : "{eax}"(leaf), "{ecx}"(sub_leaf)
-             : "rbx" :);
+        llvm_asm!("cpuid"
+                  : "={eax}"(eax), "={ebx}"(ebx), "={ecx}"(ecx), "={edx}"(edx)
+                  : "{eax}"(leaf), "{ecx}"(sub_leaf)
+                  : "rbx" :);
     }
     CpuidResult { eax, ebx, ecx, edx }
 }
@@ -113,29 +113,29 @@ pub fn has_cpuid() -> bool {
             // If it is, then `cpuid` is available.
             let result: u32;
             let _temp: u32;
-            asm!(r#"
-                 # Read eflags into $0 and copy it into $1:
-                 pushfd
-                 pop     $0
-                 mov     $1, $0
-                 # Flip 21st bit of $0.
-                 xor     $0, 0x200000
-                 # Set eflags to the value of $0
-                 #
-                 # Bit 21st can only be modified if cpuid is available
-                 push    $0
-                 popfd          # A
-                 # Read eflags into $0:
-                 pushfd         # B
-                 pop     $0
-                 # xor with the original eflags sets the bits that
-                 # have been modified:
-                 xor     $0, $1
-                 "#
-                 : "=r"(result), "=r"(_temp)
-                 :
-                 : "cc", "memory"
-                 : "intel");
+            llvm_asm!(r#"
+                      # Read eflags into $0 and copy it into $1:
+                      pushfd
+                      pop     $0
+                      mov     $1, $0
+                      # Flip 21st bit of $0.
+                      xor     $0, 0x200000
+                      # Set eflags to the value of $0
+                      #
+                      # Bit 21st can only be modified if cpuid is available
+                      push    $0
+                      popfd          # A
+                      # Read eflags into $0:
+                      pushfd         # B
+                      pop     $0
+                      # xor with the original eflags sets the bits that
+                      # have been modified:
+                      xor     $0, $1
+                      "#
+                      : "=r"(result), "=r"(_temp)
+                      :
+                      : "cc", "memory"
+                      : "intel");
             // There is a race between popfd (A) and pushfd (B)
             // where other bits beyond 21st may have been modified due to
             // interrupts, a debugger stepping through the asm, etc.
diff --git a/library/stdarch/crates/core_arch/src/x86/eflags.rs b/library/stdarch/crates/core_arch/src/x86/eflags.rs
index 4d800fc4f88..acb77bc3ece 100644
--- a/library/stdarch/crates/core_arch/src/x86/eflags.rs
+++ b/library/stdarch/crates/core_arch/src/x86/eflags.rs
@@ -13,7 +13,7 @@
 #[doc(hidden)]
 pub unsafe fn __readeflags() -> u32 {
     let eflags: u32;
-    asm!("pushfd; popl $0" : "=r"(eflags) : : : "volatile");
+    llvm_asm!("pushfd; popl $0" : "=r"(eflags) : : : "volatile");
     eflags
 }
 
@@ -30,7 +30,7 @@ pub unsafe fn __readeflags() -> u32 {
 #[doc(hidden)]
 pub unsafe fn __readeflags() -> u64 {
     let eflags: u64;
-    asm!("pushfq; popq $0" : "=r"(eflags) : : : "volatile");
+    llvm_asm!("pushfq; popq $0" : "=r"(eflags) : : : "volatile");
     eflags
 }
 
@@ -46,7 +46,7 @@ pub unsafe fn __readeflags() -> u64 {
 )]
 #[doc(hidden)]
 pub unsafe fn __writeeflags(eflags: u32) {
-    asm!("pushl $0; popfd" : : "r"(eflags) : "cc", "flags" : "volatile");
+    llvm_asm!("pushl $0; popfd" : : "r"(eflags) : "cc", "flags" : "volatile");
 }
 
 /// Write EFLAGS.
@@ -61,7 +61,7 @@ pub unsafe fn __writeeflags(eflags: u32) {
 )]
 #[doc(hidden)]
 pub unsafe fn __writeeflags(eflags: u64) {
-    asm!("pushq $0; popfq" : : "r"(eflags) : "cc", "flags" : "volatile");
+    llvm_asm!("pushq $0; popfq" : : "r"(eflags) : "cc", "flags" : "volatile");
 }
 
 #[cfg(test)]
diff --git a/library/stdarch/crates/core_arch/src/x86/xsave.rs b/library/stdarch/crates/core_arch/src/x86/xsave.rs
index f3814dd7f62..0076b1c9c45 100644
--- a/library/stdarch/crates/core_arch/src/x86/xsave.rs
+++ b/library/stdarch/crates/core_arch/src/x86/xsave.rs
@@ -87,7 +87,7 @@ pub unsafe fn _xsetbv(a: u32, val: u64) {
 pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
     let eax: u32;
     let edx: u32;
-    asm!("xgetbv" : "={eax}"(eax), "={edx}"(edx) : "{ecx}"(xcr_no));
+    llvm_asm!("xgetbv" : "={eax}"(eax), "={edx}"(edx) : "{ecx}"(xcr_no));
     ((edx as u64) << 32) | (eax as u64)
 }
 
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bt.rs b/library/stdarch/crates/core_arch/src/x86_64/bt.rs
index 9c6dcf7b61b..f0b16baab6f 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bt.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bt.rs
@@ -7,10 +7,10 @@ use stdarch_test::assert_instr;
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittest64(p: *const i64, b: i64) -> u8 {
     let r: u8;
-    asm!("btq $2, $1\n\tsetc ${0:b}"
-         : "=r"(r)
-         : "*m"(p), "r"(b)
-         : "cc", "memory");
+    llvm_asm!("btq $2, $1\n\tsetc ${0:b}"
+              : "=r"(r)
+              : "*m"(p), "r"(b)
+              : "cc", "memory");
     r
 }
 
@@ -20,10 +20,10 @@ pub unsafe fn _bittest64(p: *const i64, b: i64) -> u8 {
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittestandset64(p: *mut i64, b: i64) -> u8 {
     let r: u8;
-    asm!("btsq $2, $1\n\tsetc ${0:b}"
-         : "=r"(r), "+*m"(p)
-         : "r"(b)
-         : "cc", "memory");
+    llvm_asm!("btsq $2, $1\n\tsetc ${0:b}"
+              : "=r"(r), "+*m"(p)
+              : "r"(b)
+              : "cc", "memory");
     r
 }
 
@@ -33,10 +33,10 @@ pub unsafe fn _bittestandset64(p: *mut i64, b: i64) -> u8 {
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittestandreset64(p: *mut i64, b: i64) -> u8 {
     let r: u8;
-    asm!("btrq $2, $1\n\tsetc ${0:b}"
-         : "=r"(r), "+*m"(p)
-         : "r"(b)
-         : "cc", "memory");
+    llvm_asm!("btrq $2, $1\n\tsetc ${0:b}"
+              : "=r"(r), "+*m"(p)
+              : "r"(b)
+              : "cc", "memory");
     r
 }
 
@@ -46,10 +46,10 @@ pub unsafe fn _bittestandreset64(p: *mut i64, b: i64) -> u8 {
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittestandcomplement64(p: *mut i64, b: i64) -> u8 {
     let r: u8;
-    asm!("btcq $2, $1\n\tsetc ${0:b}"
-         : "=r"(r), "+*m"(p)
-         : "r"(b)
-         : "cc", "memory");
+    llvm_asm!("btcq $2, $1\n\tsetc ${0:b}"
+              : "=r"(r), "+*m"(p)
+              : "r"(b)
+              : "cc", "memory");
     r
 }
 
diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
index 9adc938a264..56fe7958d62 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -34,7 +34,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
         // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
         let aa64isar0: u64;
         unsafe {
-            asm!("mrs $0, ID_AA64ISAR0_EL1" : "=r"(aa64isar0));
+            llvm_asm!("mrs $0, ID_AA64ISAR0_EL1" : "=r"(aa64isar0));
         }
 
         let aes = bits_shift(aa64isar0, 7, 4) >= 1;
@@ -50,7 +50,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
         // ID_AA64PFR0_EL1 - Processor Feature Register 0
         let aa64pfr0: u64;
         unsafe {
-            asm!("mrs $0, ID_AA64PFR0_EL1" : "=r"(aa64pfr0));
+            llvm_asm!("mrs $0, ID_AA64PFR0_EL1" : "=r"(aa64pfr0));
         }
 
         let fp = bits_shift(aa64pfr0, 19, 16) < 0xF;
@@ -73,7 +73,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
         // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1
         let aa64isar1: u64;
         unsafe {
-            asm!("mrs $0, ID_AA64ISAR1_EL1" : "=r"(aa64isar1));
+            llvm_asm!("mrs $0, ID_AA64ISAR1_EL1" : "=r"(aa64isar1));
         }
 
         enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1);
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 88a00e40aac..8cd02c9616e 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -16,7 +16,7 @@
 #![allow(clippy::shadow_reuse)]
 #![deny(clippy::missing_inline_in_public_items)]
 #![cfg_attr(target_os = "linux", feature(linkage))]
-#![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))]
+#![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(llvm_asm))]
 #![cfg_attr(test, allow(unused_imports))]
 #![no_std]
 
-- 
cgit 1.4.1-3-g733a5


From 39fc893f6b2eab5fd3ac16491b0fd384eae8319a Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Thu, 23 Apr 2020 20:08:40 +0100
Subject: Stabilize all remaining x86 features for feature detection

---
 .../crates/std_detect/src/detect/arch/x86.rs       | 46 +++++++++++++---------
 1 file changed, 27 insertions(+), 19 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 578910054ed..08a223fa02f 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -66,6 +66,15 @@ features! {
     /// * `"avx512ifma"`
     /// * `"avx512vbmi"`
     /// * `"avx512vpopcntdq"`
+    /// * `"avx512vbmi2"`
+    /// * `"avx512gfni"`
+    /// * `"avx512vaes"`
+    /// * `"avx512vpclmulqdq"`
+    /// * `"avx512vnni"`
+    /// * `"avx512bitalg"`
+    /// * `"avx512bf16"`
+    /// * `"avx512vp2intersect"`
+    /// * `"f16c"`
     /// * `"fma"`
     /// * `"bmi1"`
     /// * `"bmi2"`
@@ -78,6 +87,9 @@ features! {
     /// * `"xsaveopt"`
     /// * `"xsaves"`
     /// * `"xsavec"`
+    /// * `"cmpxchg16b"`
+    /// * `"adx"`
+    /// * `"rtm"`
     ///
     /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
     #[stable(feature = "simd_x86", since = "1.27.0")]
@@ -86,7 +98,7 @@ features! {
     /// AES (Advanced Encryption Standard New Instructions AES-NI)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] pclmulqdq: "pclmulqdq";
     /// CLMUL (Carry-less Multiplication)
-    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")]  rdrand: "rdrand";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rdrand: "rdrand";
     /// RDRAND
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rdseed: "rdseed";
     /// RDSEED
@@ -114,10 +126,6 @@ features! {
     /// AVX (Advanced Vector Extensions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx2: "avx2";
     /// AVX2 (Advanced Vector Extensions 2)
-    // Detection for the AVX-512 features below was accidentally stabilized in
-    // Rust 1.27.0, even though the corresponding intrinsics are still unstable
-    // or unimplemeted. There are stable callers who rely on detection support,
-    // e.g. to call AVX-512 C code via FFI.
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512f: "avx512f" ;
     /// AVX-512 F (Foundation)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512cd: "avx512cd" ;
@@ -132,30 +140,30 @@ features! {
     /// AVX-512 DQ (Doubleword and Quadword)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vl: "avx512vl";
     /// AVX-512 VL (Vector Length Extensions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512ifma: "avx512ifma";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512ifma: "avx512ifma";
     /// AVX-512 IFMA (Integer Fused Multiply Add)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vbmi: "avx512vbmi";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi: "avx512vbmi";
     /// AVX-512 VBMI (Vector Byte Manipulation Instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vpopcntdq: "avx512vpopcntdq";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpopcntdq: "avx512vpopcntdq";
     /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
     /// Quadword)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vbmi2: "avx512vbmi2";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi2: "avx512vbmi2";
     /// AVX-512 VBMI2 (Additional byte, word, dword and qword capabilities)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512gfni: "avx512gfni";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512gfni: "avx512gfni";
     /// AVX-512 GFNI (Galois Field New Instruction)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vaes: "avx512vaes";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vaes: "avx512vaes";
     /// AVX-512 VAES (Vector AES instruction)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vpclmulqdq: "avx512vpclmulqdq";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpclmulqdq: "avx512vpclmulqdq";
     /// AVX-512 VPCLMULQDQ (Vector PCLMULQDQ instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vnni: "avx512vnni";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vnni: "avx512vnni";
     /// AVX-512 VNNI (Vector Neural Network Instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512bitalg: "avx512bitalg";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bitalg: "avx512bitalg";
     /// AVX-512 BITALG (Support for VPOPCNT[B,W] and VPSHUFBITQMB)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512bf16: "avx512bf16";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bf16: "avx512bf16";
     /// AVX-512 BF16 (BFLOAT16 instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] avx512vp2intersect: "avx512vp2intersect";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vp2intersect: "avx512vp2intersect";
     /// AVX-512 P2INTERSECT
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] f16c: "f16c";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c";
     /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma";
     /// FMA (Fused Multiply Add)
@@ -179,10 +187,10 @@ features! {
     /// XSAVES (Save Processor Extended States Supervisor)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsavec: "xsavec";
     /// XSAVEC (Save Processor Extended States Compacted)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] cmpxchg16b: "cmpxchg16b";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] cmpxchg16b: "cmpxchg16b";
     /// CMPXCH16B (16-byte compare-and-swap instruction)
     @FEATURE: #[stable(feature = "simd_x86_adx", since = "1.33.0")] adx: "adx";
     /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rtm: "rtm";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rtm: "rtm";
     /// RTM, Intel (Restricted Transactional Memory)
 }
-- 
cgit 1.4.1-3-g733a5


From a69b5ec7aebb3eee651d5aba779461483b9d1f33 Mon Sep 17 00:00:00 2001
From: Tobias Kortkamp <t@tobik.me>
Date: Fri, 24 Apr 2020 07:01:25 +0200
Subject: Unbreak non-x86 build on FreeBSD

error[E0432]: unresolved import `self::arm::check_for`
  --> src/libstd/../stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs:11:17
   |
11 |         pub use self::arm::check_for;
   |                 ^^^^^^^^^^^^^^^^^^^^ no `check_for` in `std_detect::detect::os::arm`

error[E0425]: cannot find value `detect_features` in module `self::os`
   --> src/libstd/../stdarch/crates/std_detect/src/detect/mod.rs:121:37
    |
121 |     cache::test(x as u32, self::os::detect_features)
    |                                     ^^^^^^^^^^^^^^^ not found in `self::os`
    |
help: possible candidate is found in another module, you can import it into scope
    |
20  | use crate::std_detect::detect::os::arm::detect_features;
---
 library/stdarch/crates/std_detect/src/detect/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index e8cce6e5dca..77d1f7c5063 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -100,7 +100,7 @@ cfg_if! {
     } else if #[cfg(all(target_os = "linux", feature = "use_std"))] {
         #[path = "os/linux/mod.rs"]
         mod os;
-    } else if #[cfg(target_os = "freebsd")] {
+    } else if #[cfg(all(target_os = "freebsd", feature = "use_std"))] {
         #[cfg(target_arch = "aarch64")]
         #[path = "os/aarch64.rs"]
         mod aarch64;
-- 
cgit 1.4.1-3-g733a5


From 4541757677a242a056f7eb27079214dac3ee820f Mon Sep 17 00:00:00 2001
From: Mahmut Bulut <vertexclique@gmail.com>
Date: Fri, 29 May 2020 02:28:14 +0200
Subject: feature detection

---
 .../ci/docker/aarch64-unknown-linux-gnu/Dockerfile |  2 +-
 .../stdarch/crates/core_arch/src/aarch64/tme.rs    | 60 +++++++++++-----------
 library/stdarch/crates/simd-test-macro/src/lib.rs  |  5 +-
 .../crates/std_detect/src/detect/arch/aarch64.rs   |  2 +
 .../crates/std_detect/src/detect/os/aarch64.rs     |  1 +
 .../std_detect/src/detect/os/freebsd/aarch64.rs    |  1 +
 6 files changed, 38 insertions(+), 33 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
index 41ff4729ac5..09ead2c0175 100644
--- a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:18.04
+FROM ubuntu:20.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
   gcc \
   ca-certificates \
diff --git a/library/stdarch/crates/core_arch/src/aarch64/tme.rs b/library/stdarch/crates/core_arch/src/aarch64/tme.rs
index 704416ae88d..f0ff8678133 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/tme.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/tme.rs
@@ -19,50 +19,50 @@ use stdarch_test::assert_instr;
 
 extern "C" {
     #[link_name = "llvm.aarch64.tstart"]
-    fn aarch64_tstart() -> i32;
+    fn aarch64_tstart() -> u64;
     #[link_name = "llvm.aarch64.tcommit"]
     fn aarch64_tcommit() -> ();
     #[link_name = "llvm.aarch64.tcancel"]
-    fn aarch64_tcancel(imm0: i64) -> ();
+    fn aarch64_tcancel(imm0: u64) -> ();
     #[link_name = "llvm.aarch64.ttest"]
-    fn aarch64_ttest() -> i32;
+    fn aarch64_ttest() -> u64;
 }
 
 /// Transaction successfully started.
-pub const _TMSTART_SUCCESS: u32 = 0x00_u32;
+pub const _TMSTART_SUCCESS: u64 = 0x00_u64;
 
 /// Extraction mask for failure reason
-pub const _TMFAILURE_REASON: u32 = 0x00007FFF_u32;
+pub const _TMFAILURE_REASON: u64 = 0x00007FFF_u64;
 
 /// Transaction retry is possible.
-pub const _TMFAILURE_RTRY: u32 = 1 << 15;
+pub const _TMFAILURE_RTRY: u64 = 1 << 15;
 
 /// Transaction executed a TCANCEL instruction
-pub const _TMFAILURE_CNCL: u32 = 1 << 16;
+pub const _TMFAILURE_CNCL: u64 = 1 << 16;
 
 /// Transaction aborted because a conflict occurred
-pub const _TMFAILURE_MEM: u32 = 1 << 17;
+pub const _TMFAILURE_MEM: u64 = 1 << 17;
 
 /// Fallback error type for any other reason
-pub const _TMFAILURE_IMP: u32 = 1 << 18;
+pub const _TMFAILURE_IMP: u64 = 1 << 18;
 
 /// Transaction aborted because a non-permissible operation was attempted
-pub const _TMFAILURE_ERR: u32 = 1 << 19;
+pub const _TMFAILURE_ERR: u64 = 1 << 19;
 
 /// Transaction aborted due to read or write set limit was exceeded
-pub const _TMFAILURE_SIZE: u32 = 1 << 20;
+pub const _TMFAILURE_SIZE: u64 = 1 << 20;
 
 /// Transaction aborted due to transactional nesting level was exceeded
-pub const _TMFAILURE_NEST: u32 = 1 << 21;
+pub const _TMFAILURE_NEST: u64 = 1 << 21;
 
 /// Transaction aborted due to a debug trap.
-pub const _TMFAILURE_DBG: u32 = 1 << 22;
+pub const _TMFAILURE_DBG: u64 = 1 << 22;
 
 /// Transaction failed from interrupt
-pub const _TMFAILURE_INT: u32 = 1 << 23;
+pub const _TMFAILURE_INT: u64 = 1 << 23;
 
 /// Indicates a TRIVIAL version of TM is available
-pub const _TMFAILURE_TRIVIAL: u32 = 1 << 24;
+pub const _TMFAILURE_TRIVIAL: u64 = 1 << 24;
 
 /// Starts a new transaction. When the transaction starts successfully the return value is 0.
 /// If the transaction fails, all state modifications are discarded and a cause of the failure
@@ -72,8 +72,8 @@ pub const _TMFAILURE_TRIVIAL: u32 = 1 << 24;
 #[inline]
 #[target_feature(enable = "tme")]
 #[cfg_attr(test, assert_instr(tstart))]
-pub unsafe fn __tstart() -> u32 {
-    aarch64_tstart() as _
+pub unsafe fn __tstart() -> u64 {
+    aarch64_tstart()
 }
 
 /// Commits the current transaction. For a nested transaction, the only effect is that the
@@ -95,7 +95,7 @@ pub unsafe fn __tcommit() {
 #[target_feature(enable = "tme")]
 #[cfg_attr(test, assert_instr(tcancel, imm0 = 0x0))]
 #[rustc_args_required_const(0)]
-pub unsafe fn __tcancel(imm0: u32) {
+pub unsafe fn __tcancel(imm0: u64) {
     macro_rules! call {
         ($imm0:expr) => {
             aarch64_tcancel($imm0)
@@ -111,8 +111,8 @@ pub unsafe fn __tcancel(imm0: u32) {
 #[inline]
 #[target_feature(enable = "tme")]
 #[cfg_attr(test, assert_instr(ttest))]
-pub unsafe fn __ttest() -> u32 {
-    aarch64_ttest() as _
+pub unsafe fn __ttest() -> u64 {
+    aarch64_ttest()
 }
 
 #[cfg(test)]
@@ -121,6 +121,8 @@ mod tests {
 
     use crate::core_arch::aarch64::*;
 
+    const CANCEL_CODE: u64 = (0 | (0x123 & _TMFAILURE_REASON) as u64) as u64;
+
     #[simd_test(enable = "tme")]
     unsafe fn test_tstart() {
         let mut x = 0;
@@ -128,7 +130,7 @@ mod tests {
             let code = tme::__tstart();
             if code == _TMSTART_SUCCESS {
                 x += 1;
-                assert_eq!(x, i+1);
+                assert_eq!(x, i + 1);
                 break;
             }
             assert_eq!(x, 0);
@@ -142,25 +144,23 @@ mod tests {
             let code = tme::__tstart();
             if code == _TMSTART_SUCCESS {
                 x += 1;
-                assert_eq!(x, i+1);
+                assert_eq!(x, i + 1);
                 tme::__tcommit();
             }
-            assert_eq!(x, i+1);
+            assert_eq!(x, i + 1);
         }
     }
 
     #[simd_test(enable = "tme")]
     unsafe fn test_tcancel() {
-        let reason = 0x123;
-        let cancel_code = (0 | (reason & _TMFAILURE_REASON) as i32) as u32;
         let mut x = 0;
 
         for i in 0..10 {
             let code = tme::__tstart();
             if code == _TMSTART_SUCCESS {
                 x += 1;
-                assert_eq!(x, i+1);
-                tme::__tcancel(cancel_code);
+                assert_eq!(x, i + 1);
+                tme::__tcancel(CANCEL_CODE);
                 break;
             }
         }
@@ -170,14 +170,12 @@ mod tests {
 
     #[simd_test(enable = "tme")]
     unsafe fn test_ttest() {
-        let reason = 0x123;
-        let cancel_code = (0 | (reason & _TMFAILURE_REASON) as i32) as u32;
         for _ in 0..10 {
             let code = tme::__tstart();
             if code == _TMSTART_SUCCESS {
                 if tme::__ttest() == 2 {
-              	     tme::__tcancel(cancel_code);
-                  	 break;
+                    tme::__tcancel(CANCEL_CODE);
+                    break;
                 }
             }
         }
diff --git a/library/stdarch/crates/simd-test-macro/src/lib.rs b/library/stdarch/crates/simd-test-macro/src/lib.rs
index d4ebb787b9a..6df48b1e6a5 100644
--- a/library/stdarch/crates/simd-test-macro/src/lib.rs
+++ b/library/stdarch/crates/simd-test-macro/src/lib.rs
@@ -156,5 +156,8 @@ fn find_name(item: TokenStream) -> Ident {
         }
     }
 
-    tokens.next().and_then(get_ident).expect("failed to find function name")
+    tokens
+        .next()
+        .and_then(get_ident)
+        .expect("failed to find function name")
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 154207e5ad1..761cde9583d 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -33,4 +33,6 @@ features! {
     /// Release consistent Processor consistent (RcPc)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dotprod: "dotprod";
     /// Vector Dot-Product (ASIMDDP)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] tme: "tme";
+    /// Transactional Memory Extensions (TME)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
index 56fe7958d62..c95b6889701 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -44,6 +44,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
         enable_feature(Feature::pmull, pmull);
         // Crypto is specified as AES + PMULL + SHA1 + SHA2 per LLVM/hosts.cpp
         enable_feature(Feature::crypto, aes && pmull && sha1 && sha2);
+        enable_feature(Feature::tme, bits_shift(aa64isar0, 27, 24) == 1);
         enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 1);
         enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1);
 
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
index e5df9ba4c9b..7e086ca057b 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
@@ -17,5 +17,6 @@ mod tests {
         println!("rdm: {:?}", is_aarch64_feature_detected!("rdm"));
         println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc"));
         println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod"));
+        println!("tme: {:?}", is_aarch64_feature_detected!("tme"));
     }
 }
-- 
cgit 1.4.1-3-g733a5


From 770964adac9b1e17c049a8cb0741a91ee838417c Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Sat, 18 Jul 2020 07:32:52 -0500
Subject: Update and revamp wasm32 SIMD intrinsics (#874)

Lots of time and lots of things have happened since the simd128 support
was first added to this crate. Things are starting to settle down now so
this commit syncs the Rust intrinsic definitions with the current
specification (https://github.com/WebAssembly/simd). Unfortuantely not
everything can be enabled just yet but everything is in the pipeline for
getting enabled soon.

This commit also applies a major revamp to how intrinsics are tested.
The intention is that the setup should be much more lightweight and/or
easy to work with after this commit.

At a high-level, the changes here are:

* Testing with node.js and `#[wasm_bindgen]` has been removed. Instead
  intrinsics are tested with Wasmtime which has a nearly complete
  implementation of the SIMD spec (and soon fully complete!)

* Testing is switched to `wasm32-wasi` to make idiomatic Rust bits a bit
  easier to work with (e.g. `panic!)`

* Testing of this crate's simd128 feature for wasm is re-enabled. This
  will run on CI and both compile and execute intrinsics. This should
  bring wasm intrinsics to the same level of parity as x86 intrinsics,
  for example.

* New wasm intrinsics have been added:
  * `iNNxMM_loadAxA_{s,u}`
  * `vNNxMM_load_splat`
  * `v8x16_swizzle`
  * `v128_andnot`
  * `iNNxMM_abs`
  * `iNNxMM_narrow_*_{u,s}`
  * `iNNxMM_bitmask` - commented out until LLVM is updated to LLVM 11
  * `iNNxMM_widen_*_{u,s}` - commented out until
    bytecodealliance/wasmtime#1994 lands
  * `iNNxMM_{max,min}_{u,s}`
  * `iNNxMM_avgr_u`

* Some wasm intrinsics have been removed:
  * `i64x2_trunc_*`
  * `f64x2_convert_*`
  * `i8x16_mul`

* The `v8x16.shuffle` instruction is exposed. This is done through a
  `macro` (not `macro_rules!`, but `macro`). This is intended to be
  somewhat experimental and unstable until we decide otherwise. This
  instruction has 16 immediate-mode expressions and is as a result
  unsuited to the existing `constify_*` logic of this crate. I'm hoping
  that we can game out over time what a macro might look like and/or
  look for better solutions. For now, though, what's implemented is the
  first of its kind in this crate (an architecture-specific macro), so
  some extra scrutiny looking at it would be appreciated.

* Lots of `assert_instr` annotations have been fixed for wasm.

* All wasm simd128 tests are uncommented and passing now.

This is still missing tests for new intrinsics and it's also missing
tests for various corner cases. I hope to get to those later as the
upstream spec itself gets closer to stabilization.

In the meantime, however, I went ahead and updated the `hex.rs` example
with a wasm implementation using intrinsics. With it I got some very
impressive speedups using Wasmtime:

    test benches::large_default  ... bench:     213,961 ns/iter (+/- 5,108) = 4900 MB/s
    test benches::large_fallback ... bench:   3,108,434 ns/iter (+/- 75,730) = 337 MB/s
    test benches::small_default  ... bench:          52 ns/iter (+/- 0) = 2250 MB/s
    test benches::small_fallback ... bench:         358 ns/iter (+/- 0) = 326 MB/s

or otherwise using Wasmtime hex encoding using SIMD is 15x faster on 1MB
chunks or 7x faster on small <128byte chunks.

All of these intrinsics are still unstable and will continue to be so
presumably until the simd proposal in wasm itself progresses to a later
stage. Additionaly we'll still want to sync with clang on intrinsic
names (or decide not to) at some point in the future.

* wasm: Unconditionally expose SIMD functions

This commit unconditionally exposes SIMD functions from the `wasm32`
module. This is done in such a way that the standard library does not
need to be recompiled to access SIMD intrinsics and use them. This,
hopefully, is the long-term story for SIMD in WebAssembly in Rust.

It's unlikely that all WebAssembly runtimes will end up implementing
SIMD so the standard library is unlikely to use SIMD any time soon, but
we want to make sure it's easily available to folks! This commit enables
all this by ensuring that SIMD is available to the standard library,
regardless of compilation flags.

This'll come with the same caveats as x86 support, where it doesn't make
sense to call these functions unless you're enabling simd support one
way or another locally. Additionally, as with x86, if you don't call
these functions then the instructions won't show up in your binary.

While I was here I went ahead and expanded the WebAssembly-specific
documentation for the wasm32 module as well, ensuring that the current
state of SIMD/Atomics are documented.
---
 library/stdarch/.github/workflows/main.yml         |    4 +-
 .../ci/docker/wasm32-unknown-unknown/Dockerfile    |   25 -
 .../wasm32-unknown-unknown/wasm-entrypoint.sh      |   15 -
 library/stdarch/ci/docker/wasm32-wasi/Dockerfile   |   16 +
 library/stdarch/ci/run.sh                          |   31 +-
 .../stdarch/crates/assert-instr-macro/src/lib.rs   |   15 +-
 library/stdarch/crates/core_arch/Cargo.toml        |    3 -
 library/stdarch/crates/core_arch/build.rs          |   14 +
 library/stdarch/crates/core_arch/src/lib.rs        |   13 +-
 library/stdarch/crates/core_arch/src/mod.rs        |  110 +-
 .../stdarch/crates/core_arch/src/wasm32/atomic.rs  |   20 +-
 .../stdarch/crates/core_arch/src/wasm32/memory.rs  |    2 -
 library/stdarch/crates/core_arch/src/wasm32/mod.rs |    4 -
 .../stdarch/crates/core_arch/src/wasm32/simd128.rs | 2757 ++++++++++++--------
 .../stdarch/crates/std_detect/src/detect/mod.rs    |    2 +
 .../crates/std_detect/src/detect/os/other.rs       |    1 +
 library/stdarch/crates/stdarch-test/Cargo.toml     |   11 +-
 library/stdarch/crates/stdarch-test/src/lib.rs     |   11 -
 library/stdarch/crates/stdarch-test/src/wasm.rs    |   48 +-
 library/stdarch/examples/Cargo.toml                |    6 +-
 library/stdarch/examples/hex.rs                    |   89 +-
 library/stdarch/examples/wasm.rs                   |    4 +-
 22 files changed, 1984 insertions(+), 1217 deletions(-)
 delete mode 100644 library/stdarch/ci/docker/wasm32-unknown-unknown/Dockerfile
 delete mode 100755 library/stdarch/ci/docker/wasm32-unknown-unknown/wasm-entrypoint.sh
 create mode 100644 library/stdarch/ci/docker/wasm32-wasi/Dockerfile

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/.github/workflows/main.yml b/library/stdarch/.github/workflows/main.yml
index c5ef15004ca..cadfc383000 100644
--- a/library/stdarch/.github/workflows/main.yml
+++ b/library/stdarch/.github/workflows/main.yml
@@ -77,7 +77,7 @@ jobs:
         - mips64-unknown-linux-gnuabi64
         - mips64el-unknown-linux-gnuabi64
         - s390x-unknown-linux-gnu
-        - wasm32-unknown-unknown
+        - wasm32-wasi
         - i586-unknown-linux-gnu
         - x86_64-linux-android
         - arm-linux-androideabi
@@ -129,7 +129,7 @@ jobs:
           disable_assert_instr: true
         - target: s390x-unknown-linux-gnu
           os: ubuntu-latest
-        - target: wasm32-unknown-unknown
+        - target: wasm32-wasi
           os: ubuntu-latest
         - target: aarch64-unknown-linux-gnu
           os: ubuntu-latest
diff --git a/library/stdarch/ci/docker/wasm32-unknown-unknown/Dockerfile b/library/stdarch/ci/docker/wasm32-unknown-unknown/Dockerfile
deleted file mode 100644
index 7b2567bcc7f..00000000000
--- a/library/stdarch/ci/docker/wasm32-unknown-unknown/Dockerfile
+++ /dev/null
@@ -1,25 +0,0 @@
-FROM ubuntu:18.04
-
-RUN apt-get update -y && apt-get install -y --no-install-recommends \
-  ca-certificates \
-  clang \
-  cmake \
-  curl \
-  git \
-  libc6-dev \
-  make \
-  python \
-  python3 \
-  xz-utils
-
-# Install `wasm2wat`
-RUN git clone --recursive https://github.com/WebAssembly/wabt
-RUN make -C wabt -j$(nproc)
-ENV PATH=$PATH:/wabt/bin
-
-# Install `node`
-RUN curl https://nodejs.org/dist/v12.0.0/node-v12.0.0-linux-x64.tar.xz | tar xJf -
-ENV PATH=$PATH:/node-v12.0.0-linux-x64/bin
-
-COPY docker/wasm32-unknown-unknown/wasm-entrypoint.sh /wasm-entrypoint.sh
-ENTRYPOINT ["/wasm-entrypoint.sh"]
diff --git a/library/stdarch/ci/docker/wasm32-unknown-unknown/wasm-entrypoint.sh b/library/stdarch/ci/docker/wasm32-unknown-unknown/wasm-entrypoint.sh
deleted file mode 100755
index 9916d1cb22a..00000000000
--- a/library/stdarch/ci/docker/wasm32-unknown-unknown/wasm-entrypoint.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-set -e
-
-# Download an appropriate version of wasm-bindgen based off of what's being used
-# in the lock file. Ideally we'd use `wasm-pack` at some point for this!
-version=$(grep -A 1 'name = "wasm-bindgen"' Cargo.lock | grep version)
-version=$(echo $version | awk '{print $3}' | sed 's/"//g')
-curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/$version/wasm-bindgen-$version-x86_64-unknown-linux-musl.tar.gz \
-   | tar xzf - -C target
-export PATH=$PATH:`pwd`/target/wasm-bindgen-$version-x86_64-unknown-linux-musl
-export CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner
-export NODE_ARGS=--experimental-wasm-simd
-
-exec "$@"
diff --git a/library/stdarch/ci/docker/wasm32-wasi/Dockerfile b/library/stdarch/ci/docker/wasm32-wasi/Dockerfile
new file mode 100644
index 00000000000..9bb4dc6f1d7
--- /dev/null
+++ b/library/stdarch/ci/docker/wasm32-wasi/Dockerfile
@@ -0,0 +1,16 @@
+FROM ubuntu:20.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update -y && apt-get install -y --no-install-recommends \
+  ca-certificates \
+  curl \
+  xz-utils \
+  clang
+
+RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v0.19.0/wasmtime-v0.19.0-x86_64-linux.tar.xz | tar xJf -
+ENV PATH=$PATH:/wasmtime-v0.19.0-x86_64-linux
+
+ENV CARGO_TARGET_WASM32_WASI_RUNNER="wasmtime \
+  --enable-simd \
+  --mapdir .::/checkout/target/wasm32-wasi/release/deps \
+  --"
diff --git a/library/stdarch/ci/run.sh b/library/stdarch/ci/run.sh
index 682a38636c3..2b7e51be3d0 100755
--- a/library/stdarch/ci/run.sh
+++ b/library/stdarch/ci/run.sh
@@ -44,6 +44,16 @@ cargo_test() {
     fi
     cmd="$cmd ${subcmd} --target=$TARGET $1"
     cmd="$cmd -- $2"
+
+    # wasm targets can't catch panics so if a test failures make sure the test
+    # harness isn't trying to capture output, otherwise we won't get any useful
+    # output.
+    case ${TARGET} in
+        wasm32*)
+            cmd="$cmd --nocapture"
+            ;;
+    esac
+
     $cmd
 }
 
@@ -72,20 +82,11 @@ case ${TARGET} in
         export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx"
         cargo_test "--release"
         ;;
-    wasm32-unknown-unknown*)
-        # Attempt to actually run some SIMD tests in node.js. Unfortunately
-        # though node.js (transitively through v8) doesn't have support for the
-        # full SIMD spec yet, only some functions. As a result only pass in
-        # some target features and a special `--cfg`
-        # FIXME: broken
-        #export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128 --cfg only_node_compatible_functions"
-        #cargo_test "--release"
-
-        # After that passes make sure that all intrinsics compile, passing in
-        # the extra feature to compile in non-node-compatible SIMD.
-        # FIXME: broken
-        #export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128,+unimplemented-simd128"
-        #cargo_test "--release --no-run"
+    wasm32*)
+        prev="$RUSTFLAGS"
+        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128,+unimplemented-simd128"
+        cargo_test "--release"
+        export RUSTFLAGS="$prev"
         ;;
     # FIXME: don't build anymore
     #mips-*gnu* | mipsel-*gnu*)
@@ -111,7 +112,7 @@ case ${TARGET} in
 
 esac
 
-if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ] && [ "$TARGET" != "wasm32-unknown-unknown" ]; then
+if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then
     # Test examples
     (
         cd examples
diff --git a/library/stdarch/crates/assert-instr-macro/src/lib.rs b/library/stdarch/crates/assert-instr-macro/src/lib.rs
index 75fe9851cae..0c03e80653f 100644
--- a/library/stdarch/crates/assert-instr-macro/src/lib.rs
+++ b/library/stdarch/crates/assert-instr-macro/src/lib.rs
@@ -122,6 +122,13 @@ pub fn assert_instr(
             // generate some code that's hopefully very tight in terms of
             // codegen but is otherwise unique to prevent code from being
             // folded.
+            //
+            // This is avoided on Wasm32 right now since these functions aren't
+            // inlined which breaks our tests since each intrinsic looks like it
+            // calls functions. Turns out functions aren't similar enough to get
+            // merged on wasm32 anyway. This bug is tracked at
+            // rust-lang/rust#74320.
+            #[cfg(not(target_arch = "wasm32"))]
             ::stdarch_test::_DONT_DEDUP.store(
                 std::mem::transmute(#shim_name_str.as_bytes().as_ptr()),
                 std::sync::atomic::Ordering::Relaxed,
@@ -131,8 +138,7 @@ pub fn assert_instr(
     };
 
     let tokens: TokenStream = quote! {
-        #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
-        #[cfg_attr(not(target_arch = "wasm32"), test)]
+        #[test]
         #[allow(non_snake_case)]
         fn #assert_name() {
             #to_test
@@ -146,11 +152,6 @@ pub fn assert_instr(
                                    #instr);
         }
     };
-    // why? necessary now to get tests to work?
-    let tokens: TokenStream = tokens
-        .to_string()
-        .parse()
-        .expect("cannot parse tokenstream");
 
     let tokens: TokenStream = quote! {
         #item
diff --git a/library/stdarch/crates/core_arch/Cargo.toml b/library/stdarch/crates/core_arch/Cargo.toml
index 72d89b01685..a25b20bf0cd 100644
--- a/library/stdarch/crates/core_arch/Cargo.toml
+++ b/library/stdarch/crates/core_arch/Cargo.toml
@@ -26,8 +26,5 @@ maintenance = { status = "experimental" }
 stdarch-test = { version = "0.*", path = "../stdarch-test" }
 std_detect = { version = "0.*", path = "../std_detect" }
 
-[target.wasm32-unknown-unknown.dev-dependencies]
-wasm-bindgen-test = "0.2.47"
-
 [package.metadata.docs.rs]
 rustdoc-args = [ "--cfg", "dox" ]
diff --git a/library/stdarch/crates/core_arch/build.rs b/library/stdarch/crates/core_arch/build.rs
index 4d65e9ddc3b..8a347e3f629 100644
--- a/library/stdarch/crates/core_arch/build.rs
+++ b/library/stdarch/crates/core_arch/build.rs
@@ -1,3 +1,17 @@
+use std::env;
+
 fn main() {
     println!("cargo:rustc-cfg=core_arch_docs");
+
+    // Used to tell our `#[assert_instr]` annotations that all simd intrinsics
+    // are available to test their codegen, since some are gated behind an extra
+    // `-Ctarget-feature=+unimplemented-simd128` that doesn't have any
+    // equivalent in `#[target_feature]` right now.
+    println!("cargo:rerun-if-env-changed=RUSTFLAGS");
+    if env::var("RUSTFLAGS")
+        .unwrap_or_default()
+        .contains("unimplemented-simd128")
+    {
+        println!("cargo:rustc-cfg=all_simd");
+    }
 }
diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index fae4519a0eb..aa8d4c98206 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -1,9 +1,12 @@
 #![doc(include = "core_arch_docs.md")]
+#![allow(improper_ctypes_definitions)]
 #![allow(dead_code)]
 #![allow(unused_features)]
+#![allow(incomplete_features)]
 #![feature(
     const_fn,
     const_fn_union,
+    const_generics,
     custom_inner_attributes,
     link_llvm_intrinsics,
     platform_intrinsics,
@@ -32,9 +35,12 @@
     adx_target_feature,
     rtm_target_feature,
     f16c_target_feature,
-    external_doc
+    external_doc,
+    allow_internal_unstable,
+    decl_macro
 )]
 #![cfg_attr(test, feature(test, abi_vectorcall, untagged_unions))]
+#![cfg_attr(all(test, target_arch = "wasm32"), feature(wasm_simd))]
 #![deny(clippy::missing_inline_in_public_items)]
 #![allow(
     clippy::inline_always,
@@ -66,13 +72,10 @@ extern crate std_detect;
 #[cfg(test)]
 extern crate stdarch_test;
 
-#[cfg(all(test, target_arch = "wasm32"))]
-extern crate wasm_bindgen_test;
-
 #[path = "mod.rs"]
 mod core_arch;
 
-pub use self::core_arch::arch::*;
+pub use self::core_arch::arch;
 
 #[allow(unused_imports)]
 use core::{ffi, hint, intrinsics, marker, mem, ops, ptr, sync};
diff --git a/library/stdarch/crates/core_arch/src/mod.rs b/library/stdarch/crates/core_arch/src/mod.rs
index 4ed18d7648e..d66bbede9dc 100644
--- a/library/stdarch/crates/core_arch/src/mod.rs
+++ b/library/stdarch/crates/core_arch/src/mod.rs
@@ -57,14 +57,110 @@ pub mod arch {
 
     /// Platform-specific intrinsics for the `wasm32` platform.
     ///
-
-    /// # Availability
+    /// This module provides intrinsics specific to the WebAssembly
+    /// architecture. Here you'll find intrinsics necessary for leveraging
+    /// WebAssembly proposals such as [atomics] and [simd]. These proposals are
+    /// evolving over time and as such the support here is unstable and requires
+    /// the nightly channel. As WebAssembly proposals stabilize these functions
+    /// will also become stable.
     ///
-    /// Note that intrinsics gated by `target_feature = "atomics"` or `target_feature = "simd128"`
-    /// are only available **when the standard library itself is compiled with the the respective
-    /// target feature**. This version of the standard library is not obtainable via `rustup`,
-    /// but rather will require the standard library to be compiled from source.
-    /// See the [module documentation](../index.html) for more details.
+    /// [atomics]: https://github.com/webassembly/threads
+    /// [simd]: https://github.com/webassembly/simd
+    ///
+    /// See the [module documentation](../index.html) for general information
+    /// about the `arch` module and platform intrinsics.
+    ///
+    /// ## Atomics
+    ///
+    /// The [threads proposal][atomics] for WebAssembly adds a number of
+    /// instructions for dealing with multithreaded programs. Atomic
+    /// instructions can all be generated through `std::sync::atomic` types, but
+    /// some instructions have no equivalent in Rust such as
+    /// `memory.atomic.notify` so this module will provide these intrinsics.
+    ///
+    /// At this time, however, these intrinsics are only available **when the
+    /// standard library itself is compiled with atomics**. Compiling with
+    /// atomics is not enabled by default and requires passing
+    /// `-Ctarget-feature=+atomics` to rustc. The standard library shipped via
+    /// `rustup` is not compiled with atomics. To get access to these intrinsics
+    /// you'll need to compile the standard library from source with the
+    /// requisite compiler flags.
+    ///
+    /// ## SIMD
+    ///
+    /// The [simd proposal][simd] for WebAssembly adds a new `v128` type for a
+    /// 128-bit SIMD register. It also adds a large array of instructions to
+    /// operate on the `v128` type to perform data processing. The SIMD proposal
+    /// has been in progress for quite some time and many instructions have come
+    /// and gone. This module attempts to keep up with the proposal, but if you
+    /// notice anything awry please feel free to [open an
+    /// issue](https://github.com/rust-lang/stdarch/issues/new).
+    ///
+    /// It's important to be aware that the current state of development of SIMD
+    /// in WebAssembly is still somewhat early days. There's lots of pieces to
+    /// demo and prototype with, but discussions and support are still in
+    /// progress. There's a number of pitfalls and gotchas in various places,
+    /// which will attempt to be documented here, but there may be others
+    /// lurking!
+    ///
+    /// Using SIMD is intended to be similar to as you would on `x86_64`, for
+    /// example. You'd write a function such as:
+    ///
+    /// ```rust,ignore
+    /// #[cfg(target_arch = "wasm32")]
+    /// #[target_feature(enable = "simd128")]
+    /// unsafe fn uses_simd() {
+    ///     use std::arch::wasm32::*;
+    ///     // ...
+    /// }
+    /// ```
+    ///
+    /// Unlike `x86_64`, however, WebAssembly does not currently have dynamic
+    /// detection at runtime as to whether SIMD is supported (this is one of the
+    /// motivators for the [conditional sections proposal][condsections], but
+    /// that is still pretty early days). This means that your binary will
+    /// either have SIMD and can only run on engines which support SIMD, or it
+    /// will not have SIMD at all. For compatibility the standard library itself
+    /// does not use any SIMD internally. Determining how best to ship your
+    /// WebAssembly binary with SIMD is largely left up to you as it can can be
+    /// pretty nuanced depending on your situation.
+    ///
+    /// [condsections]: https://github.com/webassembly/conditional-sections
+    ///
+    /// To enable SIMD support at compile time you need to do one of two things:
+    ///
+    /// * First you can annotate functions with `#[target_feature(enable =
+    ///   "simd128")]`. This causes just that one function to have SIMD support
+    ///   available to it, and intrinsics will get inlined as usual in this
+    ///   situation.
+    ///
+    /// * Second you can compile your program with `-Ctarget-feature=+simd128`.
+    ///   This compilation flag blanket enables SIMD support for your entire
+    ///   compilation. Note that this does not include the standard library
+    ///   unless you recompile the standard library.
+    ///
+    /// If you enable SIMD via either of these routes then you'll have a
+    /// WebAssembly binary that uses SIMD instructions, and you'll need to ship
+    /// that accordingly. Also note that if you call SIMD intrinsics but don't
+    /// enable SIMD via either of these mechanisms, you'll still have SIMD
+    /// generated in your program. This means to generate a binary without SIMD
+    /// you'll need to avoid both options above plus calling into any intrinsics
+    /// in this module.
+    ///
+    /// > **Note**: Due to
+    /// > [rust-lang/rust#74320](https://github.com/rust-lang/rust/issues/74320)
+    /// > it's recommended to compile your entire program with SIMD support
+    /// > (using `RUSTFLAGS`) or otherwise functions may not be inlined
+    /// > correctly.
+    ///
+    /// > **Note**: LLVM's SIMD support is actually split into two features:
+    /// > `simd128` and `unimplemented-simd128`. Rust code can enable `simd128`
+    /// > with `#[target_feature]` (and test for it with `#[cfg(target_feature =
+    /// > "simd128")]`, but it cannot enable `unimplemented-simd128`. The only
+    /// > way to enable this feature is to compile with
+    /// > `-Ctarget-feature=+simd128,+unimplemented-simd128`. This second
+    /// > feature enables more recent instructions implemented in LLVM which
+    /// > haven't always had enough time to make their way to runtimes.
     #[cfg(any(target_arch = "wasm32", dox))]
     #[doc(cfg(target_arch = "wasm32"))]
     #[stable(feature = "simd_wasm32", since = "1.33.0")]
diff --git a/library/stdarch/crates/core_arch/src/wasm32/atomic.rs b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
index b8ffaeac0e2..950f565f922 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
@@ -10,8 +10,6 @@
 
 #[cfg(test)]
 use stdarch_test::assert_instr;
-#[cfg(test)]
-use wasm_bindgen_test::wasm_bindgen_test;
 
 extern "C" {
     #[link_name = "llvm.wasm.atomic.wait.i32"]
@@ -22,7 +20,7 @@ extern "C" {
     fn llvm_atomic_notify(ptr: *mut i32, cnt: i32) -> i32;
 }
 
-/// Corresponding intrinsic to wasm's [`i32.atomic.wait` instruction][instr]
+/// Corresponding intrinsic to wasm's [`memory.atomic.wait32` instruction][instr]
 ///
 /// This function, when called, will block the current thread if the memory
 /// pointed to by `ptr` is equal to `expression` (performing this action
@@ -50,14 +48,14 @@ extern "C" {
 /// library is not obtainable via `rustup`, but rather will require the
 /// standard library to be compiled from source.
 ///
-/// [instr]: https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md#wait
+/// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory
 #[inline]
 #[cfg_attr(test, assert_instr("i32.atomic.wait"))]
-pub unsafe fn i32_atomic_wait(ptr: *mut i32, expression: i32, timeout_ns: i64) -> i32 {
+pub unsafe fn memory_atomic_wait32(ptr: *mut i32, expression: i32, timeout_ns: i64) -> i32 {
     llvm_atomic_wait_i32(ptr, expression, timeout_ns)
 }
 
-/// Corresponding intrinsic to wasm's [`i64.atomic.wait` instruction][instr]
+/// Corresponding intrinsic to wasm's [`memory.atomic.wait64` instruction][instr]
 ///
 /// This function, when called, will block the current thread if the memory
 /// pointed to by `ptr` is equal to `expression` (performing this action
@@ -85,14 +83,14 @@ pub unsafe fn i32_atomic_wait(ptr: *mut i32, expression: i32, timeout_ns: i64) -
 /// library is not obtainable via `rustup`, but rather will require the
 /// standard library to be compiled from source.
 ///
-/// [instr]: https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md#wait
+/// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory
 #[inline]
 #[cfg_attr(test, assert_instr("i64.atomic.wait"))]
-pub unsafe fn i64_atomic_wait(ptr: *mut i64, expression: i64, timeout_ns: i64) -> i32 {
+pub unsafe fn memory_atomic_wait64(ptr: *mut i64, expression: i64, timeout_ns: i64) -> i32 {
     llvm_atomic_wait_i64(ptr, expression, timeout_ns)
 }
 
-/// Corresponding intrinsic to wasm's [`atomic.notify` instruction][instr]
+/// Corresponding intrinsic to wasm's [`memory.atomic.notify` instruction][instr]
 ///
 /// This function will notify a number of threads blocked on the address
 /// indicated by `ptr`. Threads previously blocked with the `i32_atomic_wait`
@@ -112,9 +110,9 @@ pub unsafe fn i64_atomic_wait(ptr: *mut i64, expression: i64, timeout_ns: i64) -
 /// library is not obtainable via `rustup`, but rather will require the
 /// standard library to be compiled from source.
 ///
-/// [instr]: https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md#wake
+/// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory
 #[inline]
 #[cfg_attr(test, assert_instr("atomic.wake"))]
-pub unsafe fn atomic_notify(ptr: *mut i32, waiters: u32) -> u32 {
+pub unsafe fn memory_atomic_notify(ptr: *mut i32, waiters: u32) -> u32 {
     llvm_atomic_notify(ptr, waiters as i32) as u32
 }
diff --git a/library/stdarch/crates/core_arch/src/wasm32/memory.rs b/library/stdarch/crates/core_arch/src/wasm32/memory.rs
index 3df8abdee29..c4e801b7387 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/memory.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/memory.rs
@@ -1,7 +1,5 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
-#[cfg(test)]
-use wasm_bindgen_test::wasm_bindgen_test;
 
 extern "C" {
     #[link_name = "llvm.wasm.memory.grow.i32"]
diff --git a/library/stdarch/crates/core_arch/src/wasm32/mod.rs b/library/stdarch/crates/core_arch/src/wasm32/mod.rs
index 5e7a9d85f4e..10f07ce6109 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/mod.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/mod.rs
@@ -2,17 +2,13 @@
 
 #[cfg(test)]
 use stdarch_test::assert_instr;
-#[cfg(test)]
-use wasm_bindgen_test::wasm_bindgen_test;
 
 #[cfg(any(target_feature = "atomics", dox))]
 mod atomic;
 #[cfg(any(target_feature = "atomics", dox))]
 pub use self::atomic::*;
 
-#[cfg(any(target_feature = "simd128", dox))]
 mod simd128;
-#[cfg(any(target_feature = "simd128", dox))]
 pub use self::simd128::*;
 
 mod memory;
diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
index 5ac01a4faed..798035d76ad 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@@ -3,7 +3,9 @@
 //! [WebAssembly `SIMD128` ISA]:
 //! https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md
 
+#![unstable(feature = "wasm_simd", issue = "74372")]
 #![allow(non_camel_case_types)]
+#![allow(unused_imports)]
 
 use crate::{
     core_arch::{simd::*, simd_llvm::*},
@@ -14,8 +16,6 @@ use crate::{
 
 #[cfg(test)]
 use stdarch_test::assert_instr;
-#[cfg(test)]
-use wasm_bindgen_test::wasm_bindgen_test;
 
 types! {
     /// WASM-specific 128-bit wide SIMD vector type.
@@ -26,62 +26,73 @@ types! {
 #[allow(non_camel_case_types)]
 #[unstable(feature = "stdimd_internal", issue = "none")]
 pub(crate) trait v128Ext: Sized {
-    fn as_v128(self) -> v128;
+    unsafe fn as_v128(self) -> v128;
 
     #[inline]
-    fn as_u8x16(self) -> u8x16 {
-        unsafe { transmute(self.as_v128()) }
+    #[target_feature(enable = "simd128")]
+    unsafe fn as_u8x16(self) -> u8x16 {
+        transmute(self.as_v128())
     }
 
     #[inline]
-    fn as_u16x8(self) -> u16x8 {
-        unsafe { transmute(self.as_v128()) }
+    #[target_feature(enable = "simd128")]
+    unsafe fn as_u16x8(self) -> u16x8 {
+        transmute(self.as_v128())
     }
 
     #[inline]
-    fn as_u32x4(self) -> u32x4 {
-        unsafe { transmute(self.as_v128()) }
+    #[target_feature(enable = "simd128")]
+    unsafe fn as_u32x4(self) -> u32x4 {
+        transmute(self.as_v128())
     }
 
     #[inline]
-    fn as_u64x2(self) -> u64x2 {
-        unsafe { transmute(self.as_v128()) }
+    #[target_feature(enable = "simd128")]
+    unsafe fn as_u64x2(self) -> u64x2 {
+        transmute(self.as_v128())
     }
 
     #[inline]
-    fn as_i8x16(self) -> i8x16 {
-        unsafe { transmute(self.as_v128()) }
+    #[target_feature(enable = "simd128")]
+    unsafe fn as_i8x16(self) -> i8x16 {
+        transmute(self.as_v128())
     }
 
     #[inline]
-    fn as_i16x8(self) -> i16x8 {
-        unsafe { transmute(self.as_v128()) }
+    #[target_feature(enable = "simd128")]
+    unsafe fn as_i16x8(self) -> i16x8 {
+        transmute(self.as_v128())
     }
 
     #[inline]
-    fn as_i32x4(self) -> i32x4 {
-        unsafe { transmute(self.as_v128()) }
+    #[target_feature(enable = "simd128")]
+    unsafe fn as_i32x4(self) -> i32x4 {
+        transmute(self.as_v128())
     }
 
     #[inline]
-    fn as_i64x2(self) -> i64x2 {
-        unsafe { transmute(self.as_v128()) }
+    #[target_feature(enable = "simd128")]
+    unsafe fn as_i64x2(self) -> i64x2 {
+        transmute(self.as_v128())
     }
 
     #[inline]
-    fn as_f32x4(self) -> f32x4 {
-        unsafe { transmute(self.as_v128()) }
+    #[target_feature(enable = "simd128")]
+    unsafe fn as_f32x4(self) -> f32x4 {
+        transmute(self.as_v128())
     }
 
     #[inline]
-    fn as_f64x2(self) -> f64x2 {
-        unsafe { transmute(self.as_v128()) }
+    #[target_feature(enable = "simd128")]
+    unsafe fn as_f64x2(self) -> f64x2 {
+        transmute(self.as_v128())
     }
 }
 
 impl v128Ext for v128 {
     #[inline]
-    fn as_v128(self) -> Self {
+    #[target_feature(enable = "simd128")]
+    unsafe fn as_v128(self) -> Self {
         self
     }
 }
@@ -119,11 +130,6 @@ extern "C" {
     #[link_name = "llvm.wasm.alltrue.v4i32"]
     fn llvm_i32x4_all_true(x: i32x4) -> i32;
 
-    #[link_name = "llvm.wasm.anytrue.v2i64"]
-    fn llvm_i64x2_any_true(x: i64x2) -> i32;
-    #[link_name = "llvm.wasm.alltrue.v2i64"]
-    fn llvm_i64x2_all_true(x: i64x2) -> i32;
-
     #[link_name = "llvm.fabs.v4f32"]
     fn llvm_f32x4_abs(x: f32x4) -> f32x4;
     #[link_name = "llvm.sqrt.v4f32"]
@@ -143,129 +149,441 @@ extern "C" {
 
     #[link_name = "llvm.wasm.bitselect.v16i8"]
     fn llvm_bitselect(a: i8x16, b: i8x16, c: i8x16) -> i8x16;
+    #[link_name = "llvm.wasm.swizzle"]
+    fn llvm_swizzle(a: i8x16, b: i8x16) -> i8x16;
+
+    #[link_name = "llvm.wasm.bitmask.v16i8"]
+    fn llvm_bitmask_i8x16(a: i8x16) -> i32;
+    #[link_name = "llvm.wasm.narrow.signed.v16i8.v8i16"]
+    fn llvm_narrow_i8x16_s(a: i16x8, b: i16x8) -> i8x16;
+    #[link_name = "llvm.wasm.narrow.unsigned.v16i8.v8i16"]
+    fn llvm_narrow_i8x16_u(a: i16x8, b: i16x8) -> i8x16;
+    #[link_name = "llvm.wasm.avgr.unsigned.v16i8"]
+    fn llvm_avgr_u_i8x16(a: i8x16, b: i8x16) -> i8x16;
+
+    #[link_name = "llvm.wasm.bitmask.v8i16"]
+    fn llvm_bitmask_i16x8(a: i16x8) -> i32;
+    #[link_name = "llvm.wasm.narrow.signed.v8i16.v8i16"]
+    fn llvm_narrow_i16x8_s(a: i32x4, b: i32x4) -> i16x8;
+    #[link_name = "llvm.wasm.narrow.unsigned.v8i16.v8i16"]
+    fn llvm_narrow_i16x8_u(a: i32x4, b: i32x4) -> i16x8;
+    #[link_name = "llvm.wasm.avgr.unsigned.v8i16"]
+    fn llvm_avgr_u_i16x8(a: i16x8, b: i16x8) -> i16x8;
+    #[link_name = "llvm.wasm.widen.low.signed.v8i16.v16i8"]
+    fn llvm_widen_low_i16x8_s(a: i8x16) -> i16x8;
+    #[link_name = "llvm.wasm.widen.high.signed.v8i16.v16i8"]
+    fn llvm_widen_high_i16x8_s(a: i8x16) -> i16x8;
+    #[link_name = "llvm.wasm.widen.low.unsigned.v8i16.v16i8"]
+    fn llvm_widen_low_i16x8_u(a: i8x16) -> i16x8;
+    #[link_name = "llvm.wasm.widen.high.unsigned.v8i16.v16i8"]
+    fn llvm_widen_high_i16x8_u(a: i8x16) -> i16x8;
+
+    #[link_name = "llvm.wasm.bitmask.v4i32"]
+    fn llvm_bitmask_i32x4(a: i32x4) -> i32;
+    #[link_name = "llvm.wasm.avgr.unsigned.v4i32"]
+    fn llvm_avgr_u_i32x4(a: i32x4, b: i32x4) -> i32x4;
+    #[link_name = "llvm.wasm.widen.low.signed.v4i32.v8i16"]
+    fn llvm_widen_low_i32x4_s(a: i16x8) -> i32x4;
+    #[link_name = "llvm.wasm.widen.high.signed.v4i32.v8i16"]
+    fn llvm_widen_high_i32x4_s(a: i16x8) -> i32x4;
+    #[link_name = "llvm.wasm.widen.low.unsigned.v4i32.v8i16"]
+    fn llvm_widen_low_i32x4_u(a: i16x8) -> i32x4;
+    #[link_name = "llvm.wasm.widen.high.unsigned.v4i32.v8i16"]
+    fn llvm_widen_high_i32x4_u(a: i16x8) -> i32x4;
 }
 
 /// Loads a `v128` vector from the given heap address.
 #[inline]
 #[cfg_attr(test, assert_instr(v128.load))]
+#[target_feature(enable = "simd128")]
 pub unsafe fn v128_load(m: *const v128) -> v128 {
-    ptr::read(m)
+    *m
+}
+
+/// Load eight 8-bit integers and sign extend each one to a 16-bit lane
+#[inline]
+#[cfg_attr(all(test, all_simd), assert_instr(i16x8.load8x8_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_load8x8_s(m: *const i8) -> v128 {
+    transmute(simd_cast::<_, i16x8>(*(m as *const i8x8)))
+}
+
+/// Load eight 8-bit integers and zero extend each one to a 16-bit lane
+#[inline]
+#[cfg_attr(all(test, all_simd), assert_instr(i16x8.load8x8_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_load8x8_u(m: *const u8) -> v128 {
+    transmute(simd_cast::<_, u16x8>(*(m as *const u8x8)))
+}
+
+/// Load four 16-bit integers and sign extend each one to a 32-bit lane
+#[inline]
+#[cfg_attr(all(test, all_simd), assert_instr(i32x4.load16x4_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_load16x4_s(m: *const i16) -> v128 {
+    transmute(simd_cast::<_, i32x4>(*(m as *const i16x4)))
+}
+
+/// Load four 16-bit integers and zero extend each one to a 32-bit lane
+#[inline]
+#[cfg_attr(all(test, all_simd), assert_instr(i32x4.load16x4_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_load16x4_u(m: *const u16) -> v128 {
+    transmute(simd_cast::<_, u32x4>(*(m as *const u16x4)))
+}
+
+/// Load two 32-bit integers and sign extend each one to a 64-bit lane
+#[inline]
+#[cfg_attr(all(test, all_simd), assert_instr(i64x2.load32x2_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i64x2_load32x2_s(m: *const i32) -> v128 {
+    transmute(simd_cast::<_, i64x2>(*(m as *const i32x2)))
+}
+
+/// Load two 32-bit integers and zero extend each one to a 64-bit lane
+#[inline]
+#[cfg_attr(all(test, all_simd), assert_instr(i64x2.load32x2_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i64x2_load32x2_u(m: *const u32) -> v128 {
+    transmute(simd_cast::<_, u64x2>(*(m as *const u32x2)))
+}
+
+/// Load a single element and splat to all lanes of a v128 vector.
+#[inline]
+#[cfg_attr(all(test, all_simd), assert_instr(v8x16.load_splat))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn v8x16_load_splat(m: *const u8) -> v128 {
+    let v = *m;
+    transmute(u8x16(v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v))
+}
+
+/// Load a single element and splat to all lanes of a v128 vector.
+#[inline]
+#[cfg_attr(all(test, all_simd), assert_instr(v16x8.load_splat))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn v16x8_load_splat(m: *const u16) -> v128 {
+    let v = *m;
+    transmute(u16x8(v, v, v, v, v, v, v, v))
+}
+
+/// Load a single element and splat to all lanes of a v128 vector.
+#[inline]
+#[cfg_attr(all(test, all_simd), assert_instr(v32x4.load_splat))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn v32x4_load_splat(m: *const u32) -> v128 {
+    let v = *m;
+    transmute(u32x4(v, v, v, v))
+}
+
+/// Load a single element and splat to all lanes of a v128 vector.
+#[inline]
+#[cfg_attr(all(test, all_simd), assert_instr(v64x2.load_splat))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn v64x2_load_splat(m: *const u64) -> v128 {
+    let v = *m;
+    transmute(u64x2(v, v))
 }
 
 /// Stores a `v128` vector to the given heap address.
 #[inline]
 #[cfg_attr(test, assert_instr(v128.store))]
+#[target_feature(enable = "simd128")]
 pub unsafe fn v128_store(m: *mut v128, a: v128) {
-    ptr::write(m, a)
+    *m = a;
 }
 
 /// Materializes a constant SIMD value from the immediate operands.
 ///
-/// The `v128.const` instruction is encoded with 16 immediate bytes
-/// `imm` which provide the bits of the vector directly.
-#[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[rustc_args_required_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)]
-#[cfg_attr(test, assert_instr(
-    v128.const,
-    a0 = 0,
-    a1 = 1,
-    a2 = 2,
-    a3 = 3,
-    a4 = 4,
-    a5 = 5,
-    a6 = 6,
-    a7 = 7,
-    a8 = 8,
-    a9 = 9,
-    a10 = 10,
-    a11 = 11,
-    a12 = 12,
-    a13 = 13,
-    a14 = 14,
-    a15 = 15,
-))]
-pub const fn v128_const(
-    a0: u8,
-    a1: u8,
-    a2: u8,
-    a3: u8,
-    a4: u8,
-    a5: u8,
-    a6: u8,
-    a7: u8,
-    a8: u8,
-    a9: u8,
-    a10: u8,
-    a11: u8,
-    a12: u8,
-    a13: u8,
-    a14: u8,
-    a15: u8,
+/// This function generates a `v128.const` instruction as if the generated
+/// vector was interpreted as sixteen 8-bit integers.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[cfg_attr(
+    all(test, all_simd),
+    assert_instr(
+        v128.const,
+        a0 = 0,
+        a1 = 1,
+        a2 = 2,
+        a3 = 3,
+        a4 = 4,
+        a5 = 5,
+        a6 = 6,
+        a7 = 7,
+        a8 = 8,
+        a9 = 9,
+        a10 = 10,
+        a11 = 11,
+        a12 = 12,
+        a13 = 13,
+        a14 = 14,
+        a15 = 15,
+    )
+)]
+pub const unsafe fn i8x16_const(
+    a0: i8,
+    a1: i8,
+    a2: i8,
+    a3: i8,
+    a4: i8,
+    a5: i8,
+    a6: i8,
+    a7: i8,
+    a8: i8,
+    a9: i8,
+    a10: i8,
+    a11: i8,
+    a12: i8,
+    a13: i8,
+    a14: i8,
+    a15: i8,
 ) -> v128 {
-    union U {
-        imm: [u8; 16],
-        vec: v128,
-    }
-    unsafe {
-        U {
-            imm: [
-                a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15,
-            ],
-        }
-        .vec
-    }
+    transmute(i8x16(
+        a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15,
+    ))
 }
 
-/// Creates a vector with identical lanes.
+/// Materializes a constant SIMD value from the immediate operands.
 ///
-/// Constructs a vector with `x` replicated to all 16 lanes.
+/// This function generates a `v128.const` instruction as if the generated
+/// vector was interpreted as eight 16-bit integers.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[cfg_attr(
+    all(test, all_simd),
+    assert_instr(
+        v128.const,
+        a0 = 0,
+        a1 = 1,
+        a2 = 2,
+        a3 = 3,
+        a4 = 4,
+        a5 = 5,
+        a6 = 6,
+        a7 = 7,
+    )
+)]
+pub const unsafe fn i16x8_const(
+    a0: i16,
+    a1: i16,
+    a2: i16,
+    a3: i16,
+    a4: i16,
+    a5: i16,
+    a6: i16,
+    a7: i16,
+) -> v128 {
+    transmute(i16x8(a0, a1, a2, a3, a4, a5, a6, a7))
+}
+
+/// Materializes a constant SIMD value from the immediate operands.
+///
+/// This function generates a `v128.const` instruction as if the generated
+/// vector was interpreted as four 32-bit integers.
 #[inline]
-#[cfg_attr(test, assert_instr(i8x16.splat))]
-pub fn i8x16_splat(a: i8) -> v128 {
-    unsafe { transmute(i8x16::splat(a)) }
+#[target_feature(enable = "simd128")]
+#[cfg_attr(all(test, all_simd), assert_instr(v128.const, a0 = 0, a1 = 1, a2 = 2, a3 = 3))]
+pub const unsafe fn i32x4_const(a0: i32, a1: i32, a2: i32, a3: i32) -> v128 {
+    transmute(i32x4(a0, a1, a2, a3))
 }
 
-/// Extracts a lane from a 128-bit vector interpreted as 16 packed i8 numbers.
+/// Materializes a constant SIMD value from the immediate operands.
 ///
-/// Extracts the scalar value of lane specified in the immediate mode operand
-/// `imm` from `a`.
+/// This function generates a `v128.const` instruction as if the generated
+/// vector was interpreted as two 64-bit integers.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[cfg_attr(all(test, all_simd), assert_instr(v128.const, a0 = 0, a1 = 1))]
+pub const unsafe fn i64x2_const(a0: i64, a1: i64) -> v128 {
+    transmute(i64x2(a0, a1))
+}
+
+/// Materializes a constant SIMD value from the immediate operands.
 ///
-/// # Unsafety
+/// This function generates a `v128.const` instruction as if the generated
+/// vector was interpreted as four 32-bit floats.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[cfg_attr(all(test, all_simd), assert_instr(v128.const, a0 = 0.0, a1 = 1.0, a2 = 2.0, a3 = 3.0))]
+pub const unsafe fn f32x4_const(a0: f32, a1: f32, a2: f32, a3: f32) -> v128 {
+    transmute(f32x4(a0, a1, a2, a3))
+}
+
+/// Materializes a constant SIMD value from the immediate operands.
 ///
-/// This function has undefined behavior if `imm` is greater than or equal to
-/// 16.
+/// This function generates a `v128.const` instruction as if the generated
+/// vector was interpreted as two 64-bit floats.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[cfg_attr(all(test, all_simd), assert_instr(v128.const, a0 = 0.0, a1 = 1.0))]
+pub const unsafe fn f64x2_const(a0: f64, a1: f64) -> v128 {
+    transmute(f64x2(a0, a1))
+}
+
+/// Returns a new vector with lanes selected from the lanes of the two input
+/// vectors `$a` and `$b` specified in the 16 immediate operands.
+///
+/// The `$a` and `$b` expressions must have type `v128`, and this macro
+/// generates a wasm instruction that is encoded with 16 bytes providing the
+/// indices of the elements to return. The indices `i` in range [0, 15] select
+/// the `i`-th element of `a`. The indices in range [16, 31] select the `i -
+/// 16`-th element of `b`.
+///
+/// Note that this is a macro due to the codegen requirements of all of the
+/// index expressions `$i*` must be constant. A compiler error will be
+/// generated if any of the expressions are not constant.
+///
+/// All indexes `$i*` must have the type `u32`.
+#[inline]
+#[target_feature(enable = "simd128")]
+pub unsafe fn v8x16_shuffle<
+    const I0: usize,
+    const I1: usize,
+    const I2: usize,
+    const I3: usize,
+    const I4: usize,
+    const I5: usize,
+    const I6: usize,
+    const I7: usize,
+    const I8: usize,
+    const I9: usize,
+    const I10: usize,
+    const I11: usize,
+    const I12: usize,
+    const I13: usize,
+    const I14: usize,
+    const I15: usize,
+>(
+    a: v128,
+    b: v128,
+) -> v128 {
+    let shuf = simd_shuffle16::<u8x16, u8x16>(
+        a.as_u8x16(),
+        b.as_u8x16(),
+        [
+            I0 as u32, I1 as u32, I2 as u32, I3 as u32, I4 as u32, I5 as u32, I6 as u32, I7 as u32,
+            I8 as u32, I9 as u32, I10 as u32, I11 as u32, I12 as u32, I13 as u32, I14 as u32,
+            I15 as u32,
+        ],
+    );
+    transmute(shuf)
+}
+
+#[cfg(test)]
+#[assert_instr(v8x16.shuffle)]
+#[target_feature(enable = "simd128")]
+unsafe fn v8x16_shuffle_test(a: v128, b: v128) -> v128 {
+    v8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(a, b)
+}
+
+/// Same as [`v8x16_shuffle`], except operates as if the inputs were eight
+/// 16-bit integers, only taking 8 indices to shuffle.
+///
+/// Indices in the range [0, 7] select from `a` while [8, 15] select from `b`.
+/// Note that this will generate the `v8x16.shuffle` instruction, since there
+/// is no native `v16x8.shuffle` instruction (there is no need for one since
+/// `v8x16.shuffle` suffices).
+#[inline]
+#[target_feature(enable = "simd128")]
+pub unsafe fn v16x8_shuffle<
+    const I0: usize,
+    const I1: usize,
+    const I2: usize,
+    const I3: usize,
+    const I4: usize,
+    const I5: usize,
+    const I6: usize,
+    const I7: usize,
+>(
+    a: v128,
+    b: v128,
+) -> v128 {
+    let shuf = simd_shuffle8::<u16x8, u16x8>(
+        a.as_u16x8(),
+        b.as_u16x8(),
+        [
+            I0 as u32, I1 as u32, I2 as u32, I3 as u32, I4 as u32, I5 as u32, I6 as u32, I7 as u32,
+        ],
+    );
+    transmute(shuf)
+}
+
+#[cfg(test)]
+#[assert_instr(v8x16.shuffle)]
+#[target_feature(enable = "simd128")]
+unsafe fn v16x8_shuffle_test(a: v128, b: v128) -> v128 {
+    v16x8_shuffle::<0, 2, 4, 6, 8, 10, 12, 14>(a, b)
+}
+
+/// Same as [`v8x16_shuffle`], except operates as if the inputs were four
+/// 32-bit integers, only taking 4 indices to shuffle.
+///
+/// Indices in the range [0, 3] select from `a` while [4, 7] select from `b`.
+/// Note that this will generate the `v8x16.shuffle` instruction, since there
+/// is no native `v32x4.shuffle` instruction (there is no need for one since
+/// `v8x16.shuffle` suffices).
 #[inline]
-#[rustc_args_required_const(1)]
-pub unsafe fn i8x16_extract_lane(a: v128, imm: usize) -> i8 {
-    #[cfg(test)]
-    #[assert_instr(i8x16.extract_lane_s)]
-    fn extract_lane_s(a: v128) -> i32 {
-        unsafe { i8x16_extract_lane(a, 0) as i32 }
-    }
-    #[cfg(test)]
-    #[cfg(not(only_node_compatible_functions))]
-    #[assert_instr(i8x16.extract_lane_u)]
-    fn extract_lane_u(a: v128) -> u32 {
-        unsafe { i8x16_extract_lane(a, 0) as u32 }
-    }
-    simd_extract(a.as_i8x16(), imm as u32)
+#[target_feature(enable = "simd128")]
+pub unsafe fn v32x4_shuffle<const I0: usize, const I1: usize, const I2: usize, const I3: usize>(
+    a: v128,
+    b: v128,
+) -> v128 {
+    let shuf = simd_shuffle4::<u32x4, u32x4>(
+        a.as_u32x4(),
+        b.as_u32x4(),
+        [I0 as u32, I1 as u32, I2 as u32, I3 as u32],
+    );
+    transmute(shuf)
 }
 
-/// Replaces a lane from a 128-bit vector interpreted as 16 packed i8 numbers.
+#[cfg(test)]
+#[assert_instr(v8x16.shuffle)]
+#[target_feature(enable = "simd128")]
+unsafe fn v32x4_shuffle_test(a: v128, b: v128) -> v128 {
+    v32x4_shuffle::<0, 2, 4, 6>(a, b)
+}
+
+/// Same as [`v8x16_shuffle`], except operates as if the inputs were two
+/// 64-bit integers, only taking 2 indices to shuffle.
 ///
-/// Replaces the scalar value of lane specified in the immediate mode operand
-/// `imm` with `a`.
+/// Indices in the range [0, 1] select from `a` while [2, 3] select from `b`.
+/// Note that this will generate the `v8x16.shuffle` instruction, since there
+/// is no native `v64x2.shuffle` instruction (there is no need for one since
+/// `v8x16.shuffle` suffices).
+#[inline]
+#[target_feature(enable = "simd128")]
+pub unsafe fn v64x2_shuffle<const I0: usize, const I1: usize>(a: v128, b: v128) -> v128 {
+    let shuf = simd_shuffle2::<u64x2, u64x2>(a.as_u64x2(), b.as_u64x2(), [I0 as u32, I1 as u32]);
+    transmute(shuf)
+}
+
+#[cfg(test)]
+#[assert_instr(v8x16.shuffle)]
+#[target_feature(enable = "simd128")]
+unsafe fn v64x2_shuffle_test(a: v128, b: v128) -> v128 {
+    v64x2_shuffle::<0, 2>(a, b)
+}
+
+/// Returns a new vector with lanes selected from the lanes of the first input
+/// vector `a` specified in the second input vector `s`.
 ///
-/// # Unsafety
+/// The indices `i` in range [0, 15] select the `i`-th element of `a`. For
+/// indices outside of the range the resulting lane is 0.
+#[inline]
+#[cfg_attr(test, assert_instr(v8x16.swizzle))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn v8x16_swizzle(a: v128, s: v128) -> v128 {
+    transmute(llvm_swizzle(transmute(a), transmute(s)))
+}
+
+/// Creates a vector with identical lanes.
 ///
-/// This function has undefined behavior if `imm` is greater than or equal to
-/// 16.
+/// Constructs a vector with `x` replicated to all 16 lanes.
 #[inline]
-#[cfg_attr(test, assert_instr(i8x16.replace_lane, imm = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn i8x16_replace_lane(a: v128, imm: usize, val: i8) -> v128 {
-    transmute(simd_insert(a.as_i8x16(), imm as u32, val))
+#[cfg_attr(test, assert_instr(i8x16.splat))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_splat(a: i8) -> v128 {
+    transmute(i8x16::splat(a))
 }
 
 /// Creates a vector with identical lanes.
@@ -273,220 +591,267 @@ pub unsafe fn i8x16_replace_lane(a: v128, imm: usize, val: i8) -> v128 {
 /// Construct a vector with `x` replicated to all 8 lanes.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.splat))]
-pub fn i16x8_splat(a: i16) -> v128 {
-    unsafe { transmute(i16x8::splat(a)) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_splat(a: i16) -> v128 {
+    transmute(i16x8::splat(a))
 }
 
-/// Extracts a lane from a 128-bit vector interpreted as 8 packed i16 numbers.
-///
-/// Extracts a the scalar value of lane specified in the immediate mode operand
-/// `imm` from `a`.
-///
-/// # Unsafety
+/// Creates a vector with identical lanes.
 ///
-/// This function has undefined behavior if `imm` is greater than or equal to
-/// 8.
+/// Constructs a vector with `x` replicated to all 4 lanes.
 #[inline]
-#[rustc_args_required_const(1)]
-pub unsafe fn i16x8_extract_lane(a: v128, imm: usize) -> i16 {
-    #[cfg(test)]
-    #[assert_instr(i16x8.extract_lane_s)]
-    fn extract_lane_s(a: v128) -> i32 {
-        unsafe { i16x8_extract_lane(a, 0) as i32 }
-    }
-    #[cfg(test)]
-    #[cfg(not(only_node_compatible_functions))]
-    #[assert_instr(i16x8.extract_lane_u)]
-    fn extract_lane_u(a: v128) -> u32 {
-        unsafe { i16x8_extract_lane(a, 0) as u32 }
-    }
-    simd_extract(a.as_i16x8(), imm as u32)
+#[cfg_attr(test, assert_instr(i32x4.splat))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_splat(a: i32) -> v128 {
+    transmute(i32x4::splat(a))
 }
 
-/// Replaces a lane from a 128-bit vector interpreted as 8 packed i16 numbers.
-///
-/// Replaces the scalar value of lane specified in the immediate mode operand
-/// `imm` with `a`.
-///
-/// # Unsafety
+/// Creates a vector with identical lanes.
 ///
-/// This function has undefined behavior if `imm` is greater than or equal to
-/// 8.
+/// Construct a vector with `x` replicated to all 2 lanes.
 #[inline]
-#[cfg_attr(test, assert_instr(i16x8.replace_lane, imm = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn i16x8_replace_lane(a: v128, imm: usize, val: i16) -> v128 {
-    transmute(simd_insert(a.as_i16x8(), imm as u32, val))
+#[cfg_attr(test, assert_instr(i64x2.splat))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i64x2_splat(a: i64) -> v128 {
+    transmute(i64x2::splat(a))
 }
 
 /// Creates a vector with identical lanes.
 ///
 /// Constructs a vector with `x` replicated to all 4 lanes.
 #[inline]
-#[cfg_attr(test, assert_instr(i32x4.splat))]
-pub fn i32x4_splat(a: i32) -> v128 {
-    unsafe { transmute(i32x4::splat(a)) }
+#[cfg_attr(test, assert_instr(f32x4.splat))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_splat(a: f32) -> v128 {
+    transmute(f32x4::splat(a))
 }
 
-/// Extracts a lane from a 128-bit vector interpreted as 4 packed i32 numbers.
+/// Creates a vector with identical lanes.
+///
+/// Constructs a vector with `x` replicated to all 2 lanes.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.splat))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_splat(a: f64) -> v128 {
+    transmute(f64x2::splat(a))
+}
+
+/// Extracts a lane from a 128-bit vector interpreted as 16 packed i8 numbers.
 ///
 /// Extracts the scalar value of lane specified in the immediate mode operand
-/// `imm` from `a`.
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_extract_lane<const N: usize>(a: v128) -> i8 {
+    simd_extract(a.as_i8x16(), N as u32)
+}
+
+#[cfg(test)]
+#[assert_instr(i8x16.extract_lane_s)]
+#[target_feature(enable = "simd128")]
+unsafe fn i8x16_extract_lane_s(a: v128) -> i32 {
+    i8x16_extract_lane::<0>(a) as i32
+}
+
+#[cfg(test)]
+#[assert_instr(i8x16.extract_lane_u)]
+#[target_feature(enable = "simd128")]
+unsafe fn i8x16_extract_lane_u(a: v128) -> u32 {
+    i8x16_extract_lane::<0>(a) as u8 as u32
+}
+
+/// Replaces a lane from a 128-bit vector interpreted as 16 packed i8 numbers.
 ///
-/// # Unsafety
+/// Replaces the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_replace_lane<const N: usize>(a: v128, val: i8) -> v128 {
+    transmute(simd_insert(a.as_i8x16(), N as u32, val))
+}
+
+#[cfg(test)]
+#[assert_instr(i8x16.replace_lane)]
+#[target_feature(enable = "simd128")]
+unsafe fn i8x16_replace_lane_test(a: v128, val: i8) -> v128 {
+    i8x16_replace_lane::<0>(a, val)
+}
+
+/// Extracts a lane from a 128-bit vector interpreted as 8 packed i16 numbers.
 ///
-/// This function has undefined behavior if `imm` is greater than or equal to
-/// 4.
+/// Extracts a the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 #[inline]
-#[cfg_attr(test, assert_instr(i32x4.extract_lane, imm = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn i32x4_extract_lane(a: v128, imm: usize) -> i32 {
-    simd_extract(a.as_i32x4(), imm as u32)
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_extract_lane<const N: usize>(a: v128) -> i16 {
+    simd_extract(a.as_i16x8(), N as u32)
 }
 
-/// Replaces a lane from a 128-bit vector interpreted as 4 packed i32 numbers.
+#[cfg(test)]
+#[assert_instr(i16x8.extract_lane_s)]
+#[target_feature(enable = "simd128")]
+unsafe fn i16x8_extract_lane_s(a: v128) -> i32 {
+    i16x8_extract_lane::<0>(a) as i32
+}
+
+#[cfg(test)]
+#[assert_instr(i16x8.extract_lane_u)]
+#[target_feature(enable = "simd128")]
+unsafe fn i16x8_extract_lane_u(a: v128) -> u32 {
+    i16x8_extract_lane::<0>(a) as u16 as u32
+}
+
+/// Replaces a lane from a 128-bit vector interpreted as 8 packed i16 numbers.
 ///
 /// Replaces the scalar value of lane specified in the immediate mode operand
-/// `imm` with `a`.
-///
-/// # Unsafety
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_replace_lane<const N: usize>(a: v128, val: i16) -> v128 {
+    transmute(simd_insert(a.as_i16x8(), N as u32, val))
+}
+
+#[cfg(test)]
+#[assert_instr(i16x8.replace_lane)]
+#[target_feature(enable = "simd128")]
+unsafe fn i16x8_replace_lane_test(a: v128, val: i16) -> v128 {
+    i16x8_replace_lane::<0>(a, val)
+}
+
+/// Extracts a lane from a 128-bit vector interpreted as 4 packed i32 numbers.
 ///
-/// This function has undefined behavior if `imm` is greater than or equal to
-/// 4.
+/// Extracts the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 #[inline]
-#[cfg_attr(test, assert_instr(i32x4.replace_lane, imm = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn i32x4_replace_lane(a: v128, imm: usize, val: i32) -> v128 {
-    transmute(simd_insert(a.as_i32x4(), imm as u32, val))
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_extract_lane<const N: usize>(a: v128) -> i32 {
+    simd_extract(a.as_i32x4(), N as u32)
 }
 
-/// Creates a vector with identical lanes.
+#[cfg(test)]
+#[assert_instr(i32x4.extract_lane)]
+#[target_feature(enable = "simd128")]
+unsafe fn i32x4_extract_lane_test(a: v128) -> i32 {
+    i32x4_extract_lane::<0>(a)
+}
+
+/// Replaces a lane from a 128-bit vector interpreted as 4 packed i32 numbers.
 ///
-/// Construct a vector with `x` replicated to all 2 lanes.
+/// Replaces the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i8x16.splat))]
-pub fn i64x2_splat(a: i64) -> v128 {
-    unsafe { transmute(i64x2::splat(a)) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_replace_lane<const N: usize>(a: v128, val: i32) -> v128 {
+    transmute(simd_insert(a.as_i32x4(), N as u32, val))
+}
+
+#[cfg(test)]
+#[assert_instr(i32x4.replace_lane)]
+#[target_feature(enable = "simd128")]
+unsafe fn i32x4_replace_lane_test(a: v128, val: i32) -> v128 {
+    i32x4_replace_lane::<0>(a, val)
 }
 
 /// Extracts a lane from a 128-bit vector interpreted as 2 packed i64 numbers.
 ///
 /// Extracts the scalar value of lane specified in the immediate mode operand
-/// `imm` from `a`.
-///
-/// # Unsafety
-///
-/// This function has undefined behavior if `imm` is greater than or equal to
-/// 2.
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i64x2.extract_lane_s, imm = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn i64x2_extract_lane(a: v128, imm: usize) -> i64 {
-    simd_extract(a.as_i64x2(), imm as u32)
+#[target_feature(enable = "simd128")]
+pub unsafe fn i64x2_extract_lane<const N: usize>(a: v128) -> i64 {
+    simd_extract(a.as_i64x2(), N as u32)
+}
+
+#[cfg(test)]
+#[assert_instr(i64x2.extract_lane)]
+#[target_feature(enable = "simd128")]
+unsafe fn i64x2_extract_lane_test(a: v128) -> i64 {
+    i64x2_extract_lane::<0>(a)
 }
 
 /// Replaces a lane from a 128-bit vector interpreted as 2 packed i64 numbers.
 ///
 /// Replaces the scalar value of lane specified in the immediate mode operand
-/// `imm` with `a`.
-///
-/// # Unsafety
-///
-/// This function has undefined behavior if `imm` is greater than or equal to
-/// 2.
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i64x2.replace_lane, imm = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn i64x2_replace_lane(a: v128, imm: usize, val: i64) -> v128 {
-    transmute(simd_insert(a.as_i64x2(), imm as u32, val))
+#[target_feature(enable = "simd128")]
+pub unsafe fn i64x2_replace_lane<const N: usize>(a: v128, val: i64) -> v128 {
+    transmute(simd_insert(a.as_i64x2(), N as u32, val))
 }
 
-/// Creates a vector with identical lanes.
-///
-/// Constructs a vector with `x` replicated to all 4 lanes.
-#[inline]
-#[cfg_attr(test, assert_instr(f32x4.splat))]
-pub fn f32x4_splat(a: f32) -> v128 {
-    unsafe { transmute(f32x4::splat(a)) }
+#[cfg(test)]
+#[assert_instr(i64x2.replace_lane)]
+#[target_feature(enable = "simd128")]
+unsafe fn i64x2_replace_lane_test(a: v128, val: i64) -> v128 {
+    i64x2_replace_lane::<0>(a, val)
 }
 
 /// Extracts a lane from a 128-bit vector interpreted as 4 packed f32 numbers.
 ///
-/// Extracts the scalar value of lane specified in the immediate mode operand
-/// `imm` from `a`.
-///
-/// # Unsafety
-///
-/// This function has undefined behavior if `imm` is greater than or equal to
-/// 4.
+/// Extracts the scalar value of lane specified fn the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 #[inline]
-#[cfg_attr(test, assert_instr(f32x4.extract_lane, imm = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn f32x4_extract_lane(a: v128, imm: usize) -> f32 {
-    simd_extract(a.as_f32x4(), imm as u32)
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_extract_lane<const N: usize>(a: v128) -> f32 {
+    simd_extract(a.as_f32x4(), N as u32)
+}
+
+#[cfg(test)]
+#[assert_instr(f32x4.extract_lane)]
+#[target_feature(enable = "simd128")]
+unsafe fn f32x4_extract_lane_test(a: v128) -> f32 {
+    f32x4_extract_lane::<0>(a)
 }
 
 /// Replaces a lane from a 128-bit vector interpreted as 4 packed f32 numbers.
 ///
-/// Replaces the scalar value of lane specified in the immediate mode operand
-/// `imm` with `a`.
-///
-/// # Unsafety
-///
-/// This function has undefined behavior if `imm` is greater than or equal to
-/// 4.
+/// Replaces the scalar value of lane specified fn the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 #[inline]
-#[cfg_attr(test, assert_instr(f32x4.replace_lane, imm = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn f32x4_replace_lane(a: v128, imm: usize, val: f32) -> v128 {
-    transmute(simd_insert(a.as_f32x4(), imm as u32, val))
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_replace_lane<const N: usize>(a: v128, val: f32) -> v128 {
+    transmute(simd_insert(a.as_f32x4(), N as u32, val))
 }
 
-/// Creates a vector with identical lanes.
-///
-/// Constructs a vector with `x` replicated to all 2 lanes.
-#[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(f64x2.splat))]
-pub fn f64x2_splat(a: f64) -> v128 {
-    unsafe { transmute(f64x2::splat(a)) }
+#[cfg(test)]
+#[assert_instr(f32x4.replace_lane)]
+#[target_feature(enable = "simd128")]
+unsafe fn f32x4_replace_lane_test(a: v128, val: f32) -> v128 {
+    f32x4_replace_lane::<0>(a, val)
 }
 
-/// Extracts lane from a 128-bit vector interpreted as 2 packed f64 numbers.
-///
-/// Extracts the scalar value of lane specified in the immediate mode operand
-/// `imm` from `a`.
-///
-/// # Unsafety
+/// Extracts a lane from a 128-bit vector interpreted as 2 packed f64 numbers.
 ///
-/// This function has undefined behavior if `imm` is greater than or equal to
-/// 2.
+/// Extracts the scalar value of lane specified fn the immediate mode operand
+/// `N` from `a`. If `N` fs out of bounds then it is a compile time error.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(f64x2.extract_lane_s, imm = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn f64x2_extract_lane(a: v128, imm: usize) -> f64 {
-    simd_extract(a.as_f64x2(), imm as u32)
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_extract_lane<const N: usize>(a: v128) -> f64 {
+    simd_extract(a.as_f64x2(), N as u32)
+}
+
+#[cfg(test)]
+#[assert_instr(f64x2.extract_lane)]
+#[target_feature(enable = "simd128")]
+unsafe fn f64x2_extract_lane_test(a: v128) -> f64 {
+    f64x2_extract_lane::<0>(a)
 }
 
 /// Replaces a lane from a 128-bit vector interpreted as 2 packed f64 numbers.
 ///
 /// Replaces the scalar value of lane specified in the immediate mode operand
-/// `imm` with `a`.
-///
-/// # Unsafety
-///
-/// This function has undefined behavior if `imm` is greater than or equal to
-/// 2.
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(f64x2.replace_lane, imm = 0))]
-#[rustc_args_required_const(1)]
-pub unsafe fn f64x2_replace_lane(a: v128, imm: usize, val: f64) -> v128 {
-    transmute(simd_insert(a.as_f64x2(), imm as u32, val))
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_replace_lane<const N: usize>(a: v128, val: f64) -> v128 {
+    transmute(simd_insert(a.as_f64x2(), N as u32, val))
+}
+
+#[cfg(test)]
+#[assert_instr(f64x2.replace_lane)]
+#[target_feature(enable = "simd128")]
+unsafe fn f64x2_replace_lane_test(a: v128, val: f64) -> v128 {
+    f64x2_replace_lane::<0>(a, val)
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
@@ -496,8 +861,9 @@ pub unsafe fn f64x2_replace_lane(a: v128, imm: usize, val: f64) -> v128 {
 /// were equal, or all zeros if the elements were not equal.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.eq))]
-pub fn i8x16_eq(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_eq::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_eq(a: v128, b: v128) -> v128 {
+    transmute(simd_eq::<_, i8x16>(a.as_i8x16(), b.as_i8x16()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
@@ -507,8 +873,9 @@ pub fn i8x16_eq(a: v128, b: v128) -> v128 {
 /// were not equal, or all zeros if the elements were equal.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.ne))]
-pub fn i8x16_ne(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_ne::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_ne(a: v128, b: v128) -> v128 {
+    transmute(simd_ne::<_, i8x16>(a.as_i8x16(), b.as_i8x16()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
@@ -518,8 +885,9 @@ pub fn i8x16_ne(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.lt_s))]
-pub fn i8x16_lt_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_lt::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_lt_s(a: v128, b: v128) -> v128 {
+    transmute(simd_lt::<_, i8x16>(a.as_i8x16(), b.as_i8x16()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
@@ -529,8 +897,9 @@ pub fn i8x16_lt_s(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.lt_u))]
-pub fn i8x16_lt_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_lt::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_lt_u(a: v128, b: v128) -> v128 {
+    transmute(simd_lt::<_, i8x16>(a.as_u8x16(), b.as_u8x16()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
@@ -540,8 +909,9 @@ pub fn i8x16_lt_u(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.gt_s))]
-pub fn i8x16_gt_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_gt::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_gt_s(a: v128, b: v128) -> v128 {
+    transmute(simd_gt::<_, i8x16>(a.as_i8x16(), b.as_i8x16()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
@@ -551,8 +921,9 @@ pub fn i8x16_gt_s(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.gt_u))]
-pub fn i8x16_gt_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_gt::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_gt_u(a: v128, b: v128) -> v128 {
+    transmute(simd_gt::<_, i8x16>(a.as_u8x16(), b.as_u8x16()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
@@ -562,8 +933,9 @@ pub fn i8x16_gt_u(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.le_s))]
-pub fn i8x16_le_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_le::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_le_s(a: v128, b: v128) -> v128 {
+    transmute(simd_le::<_, i8x16>(a.as_i8x16(), b.as_i8x16()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
@@ -573,8 +945,9 @@ pub fn i8x16_le_s(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.le_u))]
-pub fn i8x16_le_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_le::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_le_u(a: v128, b: v128) -> v128 {
+    transmute(simd_le::<_, i8x16>(a.as_u8x16(), b.as_u8x16()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
@@ -584,8 +957,9 @@ pub fn i8x16_le_u(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.ge_s))]
-pub fn i8x16_ge_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_ge::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_ge_s(a: v128, b: v128) -> v128 {
+    transmute(simd_ge::<_, i8x16>(a.as_i8x16(), b.as_i8x16()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
@@ -595,8 +969,9 @@ pub fn i8x16_ge_s(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.ge_u))]
-pub fn i8x16_ge_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_ge::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_ge_u(a: v128, b: v128) -> v128 {
+    transmute(simd_ge::<_, i8x16>(a.as_u8x16(), b.as_u8x16()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
@@ -606,8 +981,9 @@ pub fn i8x16_ge_u(a: v128, b: v128) -> v128 {
 /// were equal, or all zeros if the elements were not equal.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.eq))]
-pub fn i16x8_eq(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_eq::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_eq(a: v128, b: v128) -> v128 {
+    transmute(simd_eq::<_, i16x8>(a.as_i16x8(), b.as_i16x8()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
@@ -617,8 +993,9 @@ pub fn i16x8_eq(a: v128, b: v128) -> v128 {
 /// were not equal, or all zeros if the elements were equal.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.ne))]
-pub fn i16x8_ne(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_ne::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_ne(a: v128, b: v128) -> v128 {
+    transmute(simd_ne::<_, i16x8>(a.as_i16x8(), b.as_i16x8()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
@@ -628,8 +1005,9 @@ pub fn i16x8_ne(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.lt_s))]
-pub fn i16x8_lt_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_lt::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_lt_s(a: v128, b: v128) -> v128 {
+    transmute(simd_lt::<_, i16x8>(a.as_i16x8(), b.as_i16x8()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
@@ -639,8 +1017,9 @@ pub fn i16x8_lt_s(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.lt_u))]
-pub fn i16x8_lt_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_lt::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_lt_u(a: v128, b: v128) -> v128 {
+    transmute(simd_lt::<_, i16x8>(a.as_u16x8(), b.as_u16x8()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
@@ -650,8 +1029,9 @@ pub fn i16x8_lt_u(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.gt_s))]
-pub fn i16x8_gt_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_gt::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_gt_s(a: v128, b: v128) -> v128 {
+    transmute(simd_gt::<_, i16x8>(a.as_i16x8(), b.as_i16x8()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
@@ -661,8 +1041,9 @@ pub fn i16x8_gt_s(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.gt_u))]
-pub fn i16x8_gt_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_gt::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_gt_u(a: v128, b: v128) -> v128 {
+    transmute(simd_gt::<_, i16x8>(a.as_u16x8(), b.as_u16x8()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
@@ -672,8 +1053,9 @@ pub fn i16x8_gt_u(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.le_s))]
-pub fn i16x8_le_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_le::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_le_s(a: v128, b: v128) -> v128 {
+    transmute(simd_le::<_, i16x8>(a.as_i16x8(), b.as_i16x8()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
@@ -683,8 +1065,9 @@ pub fn i16x8_le_s(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.le_u))]
-pub fn i16x8_le_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_le::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_le_u(a: v128, b: v128) -> v128 {
+    transmute(simd_le::<_, i16x8>(a.as_u16x8(), b.as_u16x8()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
@@ -694,8 +1077,9 @@ pub fn i16x8_le_u(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.ge_s))]
-pub fn i16x8_ge_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_ge::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_ge_s(a: v128, b: v128) -> v128 {
+    transmute(simd_ge::<_, i16x8>(a.as_i16x8(), b.as_i16x8()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
@@ -705,8 +1089,9 @@ pub fn i16x8_ge_s(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.ge_u))]
-pub fn i16x8_ge_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_ge::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_ge_u(a: v128, b: v128) -> v128 {
+    transmute(simd_ge::<_, i16x8>(a.as_u16x8(), b.as_u16x8()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -716,8 +1101,9 @@ pub fn i16x8_ge_u(a: v128, b: v128) -> v128 {
 /// were equal, or all zeros if the elements were not equal.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.eq))]
-pub fn i32x4_eq(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_eq::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_eq(a: v128, b: v128) -> v128 {
+    transmute(simd_eq::<_, i32x4>(a.as_i32x4(), b.as_i32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -727,8 +1113,9 @@ pub fn i32x4_eq(a: v128, b: v128) -> v128 {
 /// were not equal, or all zeros if the elements were equal.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.ne))]
-pub fn i32x4_ne(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_ne::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_ne(a: v128, b: v128) -> v128 {
+    transmute(simd_ne::<_, i32x4>(a.as_i32x4(), b.as_i32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -738,8 +1125,9 @@ pub fn i32x4_ne(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.lt_s))]
-pub fn i32x4_lt_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_lt::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_lt_s(a: v128, b: v128) -> v128 {
+    transmute(simd_lt::<_, i32x4>(a.as_i32x4(), b.as_i32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -749,8 +1137,9 @@ pub fn i32x4_lt_s(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.lt_u))]
-pub fn i32x4_lt_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_lt::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_lt_u(a: v128, b: v128) -> v128 {
+    transmute(simd_lt::<_, i32x4>(a.as_u32x4(), b.as_u32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -760,8 +1149,9 @@ pub fn i32x4_lt_u(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.gt_s))]
-pub fn i32x4_gt_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_gt::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_gt_s(a: v128, b: v128) -> v128 {
+    transmute(simd_gt::<_, i32x4>(a.as_i32x4(), b.as_i32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -771,8 +1161,9 @@ pub fn i32x4_gt_s(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.gt_u))]
-pub fn i32x4_gt_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_gt::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_gt_u(a: v128, b: v128) -> v128 {
+    transmute(simd_gt::<_, i32x4>(a.as_u32x4(), b.as_u32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -782,8 +1173,9 @@ pub fn i32x4_gt_u(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.le_s))]
-pub fn i32x4_le_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_le::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_le_s(a: v128, b: v128) -> v128 {
+    transmute(simd_le::<_, i32x4>(a.as_i32x4(), b.as_i32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -793,8 +1185,9 @@ pub fn i32x4_le_s(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.le_u))]
-pub fn i32x4_le_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_le::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_le_u(a: v128, b: v128) -> v128 {
+    transmute(simd_le::<_, i32x4>(a.as_u32x4(), b.as_u32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -804,8 +1197,9 @@ pub fn i32x4_le_u(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.ge_s))]
-pub fn i32x4_ge_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_ge::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_ge_s(a: v128, b: v128) -> v128 {
+    transmute(simd_ge::<_, i32x4>(a.as_i32x4(), b.as_i32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -815,8 +1209,9 @@ pub fn i32x4_ge_s(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.ge_u))]
-pub fn i32x4_ge_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_ge::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_ge_u(a: v128, b: v128) -> v128 {
+    transmute(simd_ge::<_, i32x4>(a.as_u32x4(), b.as_u32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -826,8 +1221,9 @@ pub fn i32x4_ge_u(a: v128, b: v128) -> v128 {
 /// were equal, or all zeros if the elements were not equal.
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.eq))]
-pub fn f32x4_eq(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_eq::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_eq(a: v128, b: v128) -> v128 {
+    transmute(simd_eq::<_, i32x4>(a.as_f32x4(), b.as_f32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -837,8 +1233,9 @@ pub fn f32x4_eq(a: v128, b: v128) -> v128 {
 /// were not equal, or all zeros if the elements were equal.
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.ne))]
-pub fn f32x4_ne(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_ne::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_ne(a: v128, b: v128) -> v128 {
+    transmute(simd_ne::<_, i32x4>(a.as_f32x4(), b.as_f32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -848,8 +1245,9 @@ pub fn f32x4_ne(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.lt))]
-pub fn f32x4_lt(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_lt::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_lt(a: v128, b: v128) -> v128 {
+    transmute(simd_lt::<_, i32x4>(a.as_f32x4(), b.as_f32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -859,8 +1257,9 @@ pub fn f32x4_lt(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.gt))]
-pub fn f32x4_gt(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_gt::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_gt(a: v128, b: v128) -> v128 {
+    transmute(simd_gt::<_, i32x4>(a.as_f32x4(), b.as_f32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -870,8 +1269,9 @@ pub fn f32x4_gt(a: v128, b: v128) -> v128 {
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.le))]
-pub fn f32x4_le(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_le::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_le(a: v128, b: v128) -> v128 {
+    transmute(simd_le::<_, i32x4>(a.as_f32x4(), b.as_f32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
@@ -881,8 +1281,9 @@ pub fn f32x4_le(a: v128, b: v128) -> v128 {
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.ge))]
-pub fn f32x4_ge(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_ge::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_ge(a: v128, b: v128) -> v128 {
+    transmute(simd_ge::<_, i32x4>(a.as_f32x4(), b.as_f32x4()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
@@ -891,10 +1292,10 @@ pub fn f32x4_ge(a: v128, b: v128) -> v128 {
 /// Returns a new vector where each lane is all ones if the pairwise elements
 /// were equal, or all zeros if the elements were not equal.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.eq))]
-pub fn f64x2_eq(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_eq::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_eq(a: v128, b: v128) -> v128 {
+    transmute(simd_eq::<_, i64x2>(a.as_f64x2(), b.as_f64x2()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
@@ -903,10 +1304,10 @@ pub fn f64x2_eq(a: v128, b: v128) -> v128 {
 /// Returns a new vector where each lane is all ones if the pairwise elements
 /// were not equal, or all zeros if the elements were equal.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.ne))]
-pub fn f64x2_ne(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_ne::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_ne(a: v128, b: v128) -> v128 {
+    transmute(simd_ne::<_, i64x2>(a.as_f64x2(), b.as_f64x2()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
@@ -915,10 +1316,10 @@ pub fn f64x2_ne(a: v128, b: v128) -> v128 {
 /// Returns a new vector where each lane is all ones if the pairwise left
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.lt))]
-pub fn f64x2_lt(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_lt::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_lt(a: v128, b: v128) -> v128 {
+    transmute(simd_lt::<_, i64x2>(a.as_f64x2(), b.as_f64x2()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
@@ -927,10 +1328,10 @@ pub fn f64x2_lt(a: v128, b: v128) -> v128 {
 /// Returns a new vector where each lane is all ones if the pairwise left
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.gt))]
-pub fn f64x2_gt(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_gt::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_gt(a: v128, b: v128) -> v128 {
+    transmute(simd_gt::<_, i64x2>(a.as_f64x2(), b.as_f64x2()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
@@ -939,10 +1340,10 @@ pub fn f64x2_gt(a: v128, b: v128) -> v128 {
 /// Returns a new vector where each lane is all ones if the pairwise left
 /// element is less than the pairwise right element, or all zeros otherwise.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.le))]
-pub fn f64x2_le(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_le::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_le(a: v128, b: v128) -> v128 {
+    transmute(simd_le::<_, i64x2>(a.as_f64x2(), b.as_f64x2()))
 }
 
 /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
@@ -951,69 +1352,137 @@ pub fn f64x2_le(a: v128, b: v128) -> v128 {
 /// Returns a new vector where each lane is all ones if the pairwise left
 /// element is greater than the pairwise right element, or all zeros otherwise.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.ge))]
-pub fn f64x2_ge(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_ge::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_ge(a: v128, b: v128) -> v128 {
+    transmute(simd_ge::<_, i64x2>(a.as_f64x2(), b.as_f64x2()))
 }
 
 /// Flips each bit of the 128-bit input vector.
 #[inline]
 #[cfg_attr(test, assert_instr(v128.not))]
-pub fn v128_not(a: v128) -> v128 {
-    unsafe { transmute(simd_xor(a.as_i64x2(), i64x2(!0, !0))) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn v128_not(a: v128) -> v128 {
+    transmute(simd_xor(a.as_i64x2(), i64x2(!0, !0)))
 }
 
 /// Performs a bitwise and of the two input 128-bit vectors, returning the
 /// resulting vector.
 #[inline]
 #[cfg_attr(test, assert_instr(v128.and))]
-pub fn v128_and(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_and(a.as_i64x2(), b.as_i64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn v128_and(a: v128, b: v128) -> v128 {
+    transmute(simd_and(a.as_i64x2(), b.as_i64x2()))
+}
+
+/// Bitwise AND of bits of `a` and the logical inverse of bits of `b`.
+///
+/// This operation is equivalent to `v128.and(a, v128.not(b))`
+#[inline]
+#[cfg_attr(all(test, all_simd), assert_instr(v128.andnot))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn v128_andnot(a: v128, b: v128) -> v128 {
+    transmute(simd_and(
+        a.as_i64x2(),
+        simd_xor(b.as_i64x2(), i64x2(-1, -1)),
+    ))
 }
 
 /// Performs a bitwise or of the two input 128-bit vectors, returning the
 /// resulting vector.
 #[inline]
 #[cfg_attr(test, assert_instr(v128.or))]
-pub fn v128_or(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn v128_or(a: v128, b: v128) -> v128 {
+    transmute(simd_or(a.as_i64x2(), b.as_i64x2()))
 }
 
 /// Performs a bitwise xor of the two input 128-bit vectors, returning the
 /// resulting vector.
 #[inline]
 #[cfg_attr(test, assert_instr(v128.xor))]
-pub fn v128_xor(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn v128_xor(a: v128, b: v128) -> v128 {
+    transmute(simd_xor(a.as_i64x2(), b.as_i64x2()))
 }
 
 /// Use the bitmask in `c` to select bits from `v1` when 1 and `v2` when 0.
 #[inline]
 #[cfg_attr(test, assert_instr(v128.bitselect))]
-pub fn v128_bitselect(v1: v128, v2: v128, c: v128) -> v128 {
-    unsafe { transmute(llvm_bitselect(c.as_i8x16(), v1.as_i8x16(), v2.as_i8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn v128_bitselect(v1: v128, v2: v128, c: v128) -> v128 {
+    transmute(llvm_bitselect(v1.as_i8x16(), v2.as_i8x16(), c.as_i8x16()))
+}
+
+/// Lane-wise wrapping absolute value.
+#[inline]
+// #[cfg_attr(test, assert_instr(i8x16.abs))] // FIXME support not in our LLVM yet
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_abs(a: v128) -> v128 {
+    let a = transmute::<_, i8x16>(a);
+    let zero = i8x16::splat(0);
+    transmute(simd_select::<m8x16, i8x16>(
+        simd_lt(a, zero),
+        simd_sub(zero, a),
+        a,
+    ))
 }
 
 /// Negates a 128-bit vectors intepreted as sixteen 8-bit signed integers
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.neg))]
-pub fn i8x16_neg(a: v128) -> v128 {
-    unsafe { transmute(simd_mul(a.as_i8x16(), i8x16::splat(-1))) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_neg(a: v128) -> v128 {
+    transmute(simd_mul(a.as_i8x16(), i8x16::splat(-1)))
 }
 
 /// Returns 1 if any lane is nonzero or 0 if all lanes are zero.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.any_true))]
-pub fn i8x16_any_true(a: v128) -> i32 {
-    unsafe { llvm_i8x16_any_true(a.as_i8x16()) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_any_true(a: v128) -> i32 {
+    llvm_i8x16_any_true(a.as_i8x16())
 }
 
 /// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.all_true))]
-pub fn i8x16_all_true(a: v128) -> i32 {
-    unsafe { llvm_i8x16_all_true(a.as_i8x16()) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_all_true(a: v128) -> i32 {
+    llvm_i8x16_all_true(a.as_i8x16())
+}
+
+// FIXME: not available in our LLVM yet
+// /// Extracts the high bit for each lane in `a` and produce a scalar mask with
+// /// all bits concatenated.
+// #[inline]
+// #[cfg_attr(test, assert_instr(i8x16.all_true))]
+// pub unsafe fn i8x16_bitmask(a: v128) -> i32 {
+//     llvm_bitmask_i8x16(transmute(a))
+// }
+
+/// Converts two input vectors into a smaller lane vector by narrowing each
+/// lane.
+///
+/// Signed saturation to 0x7f or 0x80 is used and the input lanes are always
+/// interpreted as signed integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.narrow_i16x8_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_narrow_i16x8_s(a: v128, b: v128) -> v128 {
+    transmute(llvm_narrow_i8x16_s(transmute(a), transmute(b)))
+}
+
+/// Converts two input vectors into a smaller lane vector by narrowing each
+/// lane.
+///
+/// Signed saturation to 0x00 or 0xff is used and the input lanes are always
+/// interpreted as signed integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.narrow_i16x8_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_narrow_i16x8_u(a: v128, b: v128) -> v128 {
+    transmute(llvm_narrow_i8x16_u(transmute(a), transmute(b)))
 }
 
 /// Shifts each lane to the left by the specified number of bits.
@@ -1021,10 +1490,10 @@ pub fn i8x16_all_true(a: v128) -> i32 {
 /// Only the low bits of the shift amount are used if the shift amount is
 /// greater than the lane width.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(i8x16.shl))]
-pub fn i8x16_shl(a: v128, amt: u32) -> v128 {
-    unsafe { transmute(simd_shl(a.as_i8x16(), i8x16::splat(amt as i8))) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_shl(a: v128, amt: u32) -> v128 {
+    transmute(simd_shl(a.as_i8x16(), i8x16::splat(amt as i8)))
 }
 
 /// Shifts each lane to the right by the specified number of bits, sign
@@ -1033,10 +1502,10 @@ pub fn i8x16_shl(a: v128, amt: u32) -> v128 {
 /// Only the low bits of the shift amount are used if the shift amount is
 /// greater than the lane width.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i8x16.shl))]
-pub fn i8x16_shr_s(a: v128, amt: u32) -> v128 {
-    unsafe { transmute(simd_shr(a.as_i8x16(), i8x16::splat(amt as i8))) }
+#[cfg_attr(test, assert_instr(i8x16.shr_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_shr_s(a: v128, amt: u32) -> v128 {
+    transmute(simd_shr(a.as_i8x16(), i8x16::splat(amt as i8)))
 }
 
 /// Shifts each lane to the right by the specified number of bits, shifting in
@@ -1045,85 +1514,217 @@ pub fn i8x16_shr_s(a: v128, amt: u32) -> v128 {
 /// Only the low bits of the shift amount are used if the shift amount is
 /// greater than the lane width.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i8x16.shl))]
-pub fn i8x16_shr_u(a: v128, amt: u32) -> v128 {
-    unsafe { transmute(simd_shr(a.as_u8x16(), u8x16::splat(amt as u8))) }
+#[cfg_attr(test, assert_instr(i8x16.shr_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_shr_u(a: v128, amt: u32) -> v128 {
+    transmute(simd_shr(a.as_u8x16(), u8x16::splat(amt as u8)))
 }
 
 /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit integers.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.add))]
-pub fn i8x16_add(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_add(a: v128, b: v128) -> v128 {
+    transmute(simd_add(a.as_i8x16(), b.as_i8x16()))
 }
 
 /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit signed
 /// integers, saturating on overflow to `i8::MAX`.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.add_saturate_s))]
-pub fn i8x16_add_saturate_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(llvm_i8x16_add_saturate_s(a.as_i8x16(), b.as_i8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_add_saturate_s(a: v128, b: v128) -> v128 {
+    transmute(llvm_i8x16_add_saturate_s(a.as_i8x16(), b.as_i8x16()))
 }
 
 /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit unsigned
 /// integers, saturating on overflow to `u8::MAX`.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.add_saturate_u))]
-pub fn i8x16_add_saturate_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(llvm_i8x16_add_saturate_u(a.as_i8x16(), b.as_i8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_add_saturate_u(a: v128, b: v128) -> v128 {
+    transmute(llvm_i8x16_add_saturate_u(a.as_i8x16(), b.as_i8x16()))
 }
 
 /// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit integers.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.sub))]
-pub fn i8x16_sub(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_sub(a: v128, b: v128) -> v128 {
+    transmute(simd_sub(a.as_i8x16(), b.as_i8x16()))
 }
 
 /// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit
 /// signed integers, saturating on overflow to `i8::MIN`.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.sub_saturate_s))]
-pub fn i8x16_sub_saturate_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(llvm_i8x16_sub_saturate_s(a.as_i8x16(), b.as_i8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_sub_saturate_s(a: v128, b: v128) -> v128 {
+    transmute(llvm_i8x16_sub_saturate_s(a.as_i8x16(), b.as_i8x16()))
 }
 
 /// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit
 /// unsigned integers, saturating on overflow to 0.
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.sub_saturate_u))]
-pub fn i8x16_sub_saturate_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(llvm_i8x16_sub_saturate_u(a.as_i8x16(), b.as_i8x16())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_sub_saturate_u(a: v128, b: v128) -> v128 {
+    transmute(llvm_i8x16_sub_saturate_u(a.as_i8x16(), b.as_i8x16()))
 }
 
-/// Multiplies two 128-bit vectors as if they were two packed sixteen 8-bit
-/// signed integers.
+/// Compares lane-wise signed integers, and returns the minimum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.min_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_min_s(a: v128, b: v128) -> v128 {
+    let a = a.as_i8x16();
+    let b = b.as_i8x16();
+    transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
+}
+
+/// Compares lane-wise unsigned integers, and returns the minimum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.min_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_min_u(a: v128, b: v128) -> v128 {
+    let a = transmute::<_, u8x16>(a);
+    let b = transmute::<_, u8x16>(b);
+    transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
+}
+
+/// Compares lane-wise signed integers, and returns the maximum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.max_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_max_s(a: v128, b: v128) -> v128 {
+    let a = transmute::<_, i8x16>(a);
+    let b = transmute::<_, i8x16>(b);
+    transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
+}
+
+/// Compares lane-wise unsigned integers, and returns the maximum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.max_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_max_u(a: v128, b: v128) -> v128 {
+    let a = transmute::<_, u8x16>(a);
+    let b = transmute::<_, u8x16>(b);
+    transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
+}
+
+/// Lane-wise rounding average.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.avgr_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_avgr_u(a: v128, b: v128) -> v128 {
+    transmute(llvm_avgr_u_i8x16(transmute(a), transmute(b)))
+}
+
+/// Lane-wise wrapping absolute value.
 #[inline]
-#[cfg_attr(test, assert_instr(i8x16.mul))]
-pub fn i8x16_mul(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_mul(a.as_i8x16(), b.as_i8x16())) }
+// #[cfg_attr(test, assert_instr(i16x8.abs))] // FIXME support not in our LLVM yet
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_abs(a: v128) -> v128 {
+    let a = transmute::<_, i16x8>(a);
+    let zero = i16x8::splat(0);
+    transmute(simd_select::<m16x8, i16x8>(
+        simd_lt(a, zero),
+        simd_sub(zero, a),
+        a,
+    ))
 }
 
 /// Negates a 128-bit vectors intepreted as eight 16-bit signed integers
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.neg))]
-pub fn i16x8_neg(a: v128) -> v128 {
-    unsafe { transmute(simd_mul(a.as_i16x8(), i16x8::splat(-1))) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_neg(a: v128) -> v128 {
+    transmute(simd_mul(a.as_i16x8(), i16x8::splat(-1)))
 }
 
 /// Returns 1 if any lane is nonzero or 0 if all lanes are zero.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.any_true))]
-pub fn i16x8_any_true(a: v128) -> i32 {
-    unsafe { llvm_i16x8_any_true(a.as_i16x8()) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_any_true(a: v128) -> i32 {
+    llvm_i16x8_any_true(a.as_i16x8())
 }
 
 /// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.all_true))]
-pub fn i16x8_all_true(a: v128) -> i32 {
-    unsafe { llvm_i16x8_all_true(a.as_i16x8()) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_all_true(a: v128) -> i32 {
+    llvm_i16x8_all_true(a.as_i16x8())
+}
+
+// FIXME: not available in our LLVM yet
+// /// Extracts the high bit for each lane in `a` and produce a scalar mask with
+// /// all bits concatenated.
+// #[inline]
+// #[cfg_attr(test, assert_instr(i16x8.all_true))]
+// pub unsafe fn i16x8_bitmask(a: v128) -> i32 {
+//     llvm_bitmask_i16x8(transmute(a))
+// }
+
+/// Converts two input vectors into a smaller lane vector by narrowing each
+/// lane.
+///
+/// Signed saturation to 0x7fff or 0x8000 is used and the input lanes are always
+/// interpreted as signed integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.narrow_i32x4_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_narrow_i32x4_s(a: v128, b: v128) -> v128 {
+    transmute(llvm_narrow_i16x8_s(transmute(a), transmute(b)))
+}
+
+/// Converts two input vectors into a smaller lane vector by narrowing each
+/// lane.
+///
+/// Signed saturation to 0x0000 or 0xffff is used and the input lanes are always
+/// interpreted as signed integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.narrow_i32x4_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_narrow_i32x4_u(a: v128, b: v128) -> v128 {
+    transmute(llvm_narrow_i16x8_u(transmute(a), transmute(b)))
+}
+
+/// Converts low half of the smaller lane vector to a larger lane
+/// vector, sign extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.widen_low_i8x16_s))]
+pub unsafe fn i16x8_widen_low_i8x16_s(a: v128) -> v128 {
+    transmute(llvm_widen_low_i16x8_s(transmute(a)))
+}
+
+/// Converts high half of the smaller lane vector to a larger lane
+/// vector, sign extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.widen_high_i8x16_s))]
+pub unsafe fn i16x8_widen_high_i8x16_s(a: v128) -> v128 {
+    transmute(llvm_widen_high_i16x8_s(transmute(a)))
+}
+
+/// Converts low half of the smaller lane vector to a larger lane
+/// vector, zero extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.widen_low_i8x16_u))]
+pub unsafe fn i16x8_widen_low_i8x16_u(a: v128) -> v128 {
+    transmute(llvm_widen_low_i16x8_u(transmute(a)))
+}
+
+/// Converts high half of the smaller lane vector to a larger lane
+/// vector, zero extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.widen_high_i8x16_u))]
+pub unsafe fn i16x8_widen_high_i8x16_u(a: v128) -> v128 {
+    transmute(llvm_widen_high_i16x8_u(transmute(a)))
 }
 
 /// Shifts each lane to the left by the specified number of bits.
@@ -1131,10 +1732,10 @@ pub fn i16x8_all_true(a: v128) -> i32 {
 /// Only the low bits of the shift amount are used if the shift amount is
 /// greater than the lane width.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(i16x8.shl))]
-pub fn i16x8_shl(a: v128, amt: u32) -> v128 {
-    unsafe { transmute(simd_shl(a.as_i16x8(), i16x8::splat(amt as i16))) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_shl(a: v128, amt: u32) -> v128 {
+    transmute(simd_shl(a.as_i16x8(), i16x8::splat(amt as i16)))
 }
 
 /// Shifts each lane to the right by the specified number of bits, sign
@@ -1143,10 +1744,10 @@ pub fn i16x8_shl(a: v128, amt: u32) -> v128 {
 /// Only the low bits of the shift amount are used if the shift amount is
 /// greater than the lane width.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i16x8.shl))]
-pub fn i16x8_shr_s(a: v128, amt: u32) -> v128 {
-    unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(amt as i16))) }
+#[cfg_attr(test, assert_instr(i16x8.shr_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_shr_s(a: v128, amt: u32) -> v128 {
+    transmute(simd_shr(a.as_i16x8(), i16x8::splat(amt as i16)))
 }
 
 /// Shifts each lane to the right by the specified number of bits, shifting in
@@ -1155,85 +1756,202 @@ pub fn i16x8_shr_s(a: v128, amt: u32) -> v128 {
 /// Only the low bits of the shift amount are used if the shift amount is
 /// greater than the lane width.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i16x8.shl))]
-pub fn i16x8_shr_u(a: v128, amt: u32) -> v128 {
-    unsafe { transmute(simd_shr(a.as_u16x8(), u16x8::splat(amt as u16))) }
+#[cfg_attr(test, assert_instr(i16x8.shr_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_shr_u(a: v128, amt: u32) -> v128 {
+    transmute(simd_shr(a.as_u16x8(), u16x8::splat(amt as u16)))
 }
 
 /// Adds two 128-bit vectors as if they were two packed eight 16-bit integers.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.add))]
-pub fn i16x8_add(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_add(a: v128, b: v128) -> v128 {
+    transmute(simd_add(a.as_i16x8(), b.as_i16x8()))
 }
 
 /// Adds two 128-bit vectors as if they were two packed eight 16-bit signed
 /// integers, saturating on overflow to `i16::MAX`.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.add_saturate_s))]
-pub fn i16x8_add_saturate_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(llvm_i16x8_add_saturate_s(a.as_i16x8(), b.as_i16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_add_saturate_s(a: v128, b: v128) -> v128 {
+    transmute(llvm_i16x8_add_saturate_s(a.as_i16x8(), b.as_i16x8()))
 }
 
 /// Adds two 128-bit vectors as if they were two packed eight 16-bit unsigned
 /// integers, saturating on overflow to `u16::MAX`.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.add_saturate_u))]
-pub fn i16x8_add_saturate_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(llvm_i16x8_add_saturate_u(a.as_i16x8(), b.as_i16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_add_saturate_u(a: v128, b: v128) -> v128 {
+    transmute(llvm_i16x8_add_saturate_u(a.as_i16x8(), b.as_i16x8()))
 }
 
 /// Subtracts two 128-bit vectors as if they were two packed eight 16-bit integers.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.sub))]
-pub fn i16x8_sub(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_sub(a: v128, b: v128) -> v128 {
+    transmute(simd_sub(a.as_i16x8(), b.as_i16x8()))
 }
 
 /// Subtracts two 128-bit vectors as if they were two packed eight 16-bit
 /// signed integers, saturating on overflow to `i16::MIN`.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.sub_saturate_s))]
-pub fn i16x8_sub_saturate_s(a: v128, b: v128) -> v128 {
-    unsafe { transmute(llvm_i16x8_sub_saturate_s(a.as_i16x8(), b.as_i16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_sub_saturate_s(a: v128, b: v128) -> v128 {
+    transmute(llvm_i16x8_sub_saturate_s(a.as_i16x8(), b.as_i16x8()))
 }
 
 /// Subtracts two 128-bit vectors as if they were two packed eight 16-bit
 /// unsigned integers, saturating on overflow to 0.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.sub_saturate_u))]
-pub fn i16x8_sub_saturate_u(a: v128, b: v128) -> v128 {
-    unsafe { transmute(llvm_i16x8_sub_saturate_u(a.as_i16x8(), b.as_i16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_sub_saturate_u(a: v128, b: v128) -> v128 {
+    transmute(llvm_i16x8_sub_saturate_u(a.as_i16x8(), b.as_i16x8()))
 }
 
 /// Multiplies two 128-bit vectors as if they were two packed eight 16-bit
 /// signed integers.
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.mul))]
-pub fn i16x8_mul(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_mul(a: v128, b: v128) -> v128 {
+    transmute(simd_mul(a.as_i16x8(), b.as_i16x8()))
+}
+
+/// Compares lane-wise signed integers, and returns the minimum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.min_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_min_s(a: v128, b: v128) -> v128 {
+    let a = transmute::<_, i16x8>(a);
+    let b = transmute::<_, i16x8>(b);
+    transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
+}
+
+/// Compares lane-wise unsigned integers, and returns the minimum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.min_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_min_u(a: v128, b: v128) -> v128 {
+    let a = transmute::<_, u16x8>(a);
+    let b = transmute::<_, u16x8>(b);
+    transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
+}
+
+/// Compares lane-wise signed integers, and returns the maximum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.max_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_max_s(a: v128, b: v128) -> v128 {
+    let a = transmute::<_, i16x8>(a);
+    let b = transmute::<_, i16x8>(b);
+    transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
+}
+
+/// Compares lane-wise unsigned integers, and returns the maximum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.max_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_max_u(a: v128, b: v128) -> v128 {
+    let a = transmute::<_, u16x8>(a);
+    let b = transmute::<_, u16x8>(b);
+    transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
+}
+
+/// Lane-wise rounding average.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.avgr_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_avgr_u(a: v128, b: v128) -> v128 {
+    transmute(llvm_avgr_u_i16x8(transmute(a), transmute(b)))
+}
+
+/// Lane-wise wrapping absolute value.
+#[inline]
+// #[cfg_attr(test, assert_instr(i32x4.abs))] // FIXME support not in our LLVM yet
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_abs(a: v128) -> v128 {
+    let a = transmute::<_, i32x4>(a);
+    let zero = i32x4::splat(0);
+    transmute(simd_select::<m32x4, i32x4>(
+        simd_lt(a, zero),
+        simd_sub(zero, a),
+        a,
+    ))
 }
 
 /// Negates a 128-bit vectors intepreted as four 32-bit signed integers
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.neg))]
-pub fn i32x4_neg(a: v128) -> v128 {
-    unsafe { transmute(simd_mul(a.as_i32x4(), i32x4::splat(-1))) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_neg(a: v128) -> v128 {
+    transmute(simd_mul(a.as_i32x4(), i32x4::splat(-1)))
 }
 
 /// Returns 1 if any lane is nonzero or 0 if all lanes are zero.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.any_true))]
-pub fn i32x4_any_true(a: v128) -> i32 {
-    unsafe { llvm_i32x4_any_true(a.as_i32x4()) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_any_true(a: v128) -> i32 {
+    llvm_i32x4_any_true(a.as_i32x4())
 }
 
 /// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.all_true))]
-pub fn i32x4_all_true(a: v128) -> i32 {
-    unsafe { llvm_i32x4_all_true(a.as_i32x4()) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_all_true(a: v128) -> i32 {
+    llvm_i32x4_all_true(a.as_i32x4())
+}
+
+// FIXME: not available in our LLVM yet
+// /// Extracts the high bit for each lane in `a` and produce a scalar mask with
+// /// all bits concatenated.
+// #[inline]
+// #[cfg_attr(test, assert_instr(i32x4.all_true))]
+// pub unsafe fn i32x4_bitmask(a: v128) -> i32 {
+//     llvm_bitmask_i32x4(transmute(a))
+// }
+
+/// Converts low half of the smaller lane vector to a larger lane
+/// vector, sign extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.widen_low_i16x8_s))]
+pub unsafe fn i32x4_widen_low_i16x8_s(a: v128) -> v128 {
+    transmute(llvm_widen_low_i32x4_s(transmute(a)))
+}
+
+/// Converts high half of the smaller lane vector to a larger lane
+/// vector, sign extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.widen_high_i16x8_s))]
+pub unsafe fn i32x4_widen_high_i16x8_s(a: v128) -> v128 {
+    transmute(llvm_widen_high_i32x4_s(transmute(a)))
+}
+
+/// Converts low half of the smaller lane vector to a larger lane
+/// vector, zero extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.widen_low_i16x8_u))]
+pub unsafe fn i32x4_widen_low_i16x8_u(a: v128) -> v128 {
+    transmute(llvm_widen_low_i32x4_u(transmute(a)))
+}
+
+/// Converts high half of the smaller lane vector to a larger lane
+/// vector, zero extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.widen_high_i16x8_u))]
+pub unsafe fn i32x4_widen_high_i16x8_u(a: v128) -> v128 {
+    transmute(llvm_widen_high_i32x4_u(transmute(a)))
 }
 
 /// Shifts each lane to the left by the specified number of bits.
@@ -1241,10 +1959,10 @@ pub fn i32x4_all_true(a: v128) -> i32 {
 /// Only the low bits of the shift amount are used if the shift amount is
 /// greater than the lane width.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(i32x4.shl))]
-pub fn i32x4_shl(a: v128, amt: u32) -> v128 {
-    unsafe { transmute(simd_shl(a.as_i32x4(), i32x4::splat(amt as i32))) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_shl(a: v128, amt: u32) -> v128 {
+    transmute(simd_shl(a.as_i32x4(), i32x4::splat(amt as i32)))
 }
 
 /// Shifts each lane to the right by the specified number of bits, sign
@@ -1253,10 +1971,10 @@ pub fn i32x4_shl(a: v128, amt: u32) -> v128 {
 /// Only the low bits of the shift amount are used if the shift amount is
 /// greater than the lane width.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i32x4.shl))]
-pub fn i32x4_shr_s(a: v128, amt: u32) -> v128 {
-    unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(amt as i32))) }
+#[cfg_attr(test, assert_instr(i32x4.shr_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_shr_s(a: v128, amt: u32) -> v128 {
+    transmute(simd_shr(a.as_i32x4(), i32x4::splat(amt as i32)))
 }
 
 /// Shifts each lane to the right by the specified number of bits, shifting in
@@ -1265,56 +1983,87 @@ pub fn i32x4_shr_s(a: v128, amt: u32) -> v128 {
 /// Only the low bits of the shift amount are used if the shift amount is
 /// greater than the lane width.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i32x4.shl))]
-pub fn i32x4_shr_u(a: v128, amt: u32) -> v128 {
-    unsafe { transmute(simd_shr(a.as_u32x4(), u32x4::splat(amt as u32))) }
+#[cfg_attr(test, assert_instr(i32x4.shr_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_shr_u(a: v128, amt: u32) -> v128 {
+    transmute(simd_shr(a.as_u32x4(), u32x4::splat(amt as u32)))
 }
 
 /// Adds two 128-bit vectors as if they were two packed four 32-bit integers.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.add))]
-pub fn i32x4_add(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_add(a: v128, b: v128) -> v128 {
+    transmute(simd_add(a.as_i32x4(), b.as_i32x4()))
 }
 
 /// Subtracts two 128-bit vectors as if they were two packed four 32-bit integers.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.sub))]
-pub fn i32x4_sub(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_sub(a: v128, b: v128) -> v128 {
+    transmute(simd_sub(a.as_i32x4(), b.as_i32x4()))
 }
 
 /// Multiplies two 128-bit vectors as if they were two packed four 32-bit
 /// signed integers.
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.mul))]
-pub fn i32x4_mul(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_mul(a: v128, b: v128) -> v128 {
+    transmute(simd_mul(a.as_i32x4(), b.as_i32x4()))
 }
 
-/// Negates a 128-bit vectors intepreted as two 64-bit signed integers
+/// Compares lane-wise signed integers, and returns the minimum of
+/// each pair.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i32x4.neg))]
-pub fn i64x2_neg(a: v128) -> v128 {
-    unsafe { transmute(simd_mul(a.as_i64x2(), i64x2::splat(-1))) }
+#[cfg_attr(test, assert_instr(i32x4.min_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_min_s(a: v128, b: v128) -> v128 {
+    let a = transmute::<_, i32x4>(a);
+    let b = transmute::<_, i32x4>(b);
+    transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
 }
 
-/// Returns 1 if any lane is nonzero or 0 if all lanes are zero.
+/// Compares lane-wise unsigned integers, and returns the minimum of
+/// each pair.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i64x2.any_true))]
-pub fn i64x2_any_true(a: v128) -> i32 {
-    unsafe { llvm_i64x2_any_true(a.as_i64x2()) }
+#[cfg_attr(test, assert_instr(i32x4.min_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_min_u(a: v128, b: v128) -> v128 {
+    let a = transmute::<_, u32x4>(a);
+    let b = transmute::<_, u32x4>(b);
+    transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
 }
 
-/// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero.
+/// Compares lane-wise signed integers, and returns the maximum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.max_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_max_s(a: v128, b: v128) -> v128 {
+    let a = transmute::<_, i32x4>(a);
+    let b = transmute::<_, i32x4>(b);
+    transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
+}
+
+/// Compares lane-wise unsigned integers, and returns the maximum of
+/// each pair.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i64x2.all_true))]
-pub fn i64x2_all_true(a: v128) -> i32 {
-    unsafe { llvm_i64x2_all_true(a.as_i64x2()) }
+#[cfg_attr(test, assert_instr(i32x4.max_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_max_u(a: v128, b: v128) -> v128 {
+    let a = transmute::<_, u32x4>(a);
+    let b = transmute::<_, u32x4>(b);
+    transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
+}
+
+/// Negates a 128-bit vectors intepreted as two 64-bit signed integers
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.neg))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i64x2_neg(a: v128) -> v128 {
+    transmute(simd_mul(a.as_i64x2(), i64x2::splat(-1)))
 }
 
 /// Shifts each lane to the left by the specified number of bits.
@@ -1322,10 +2071,10 @@ pub fn i64x2_all_true(a: v128) -> i32 {
 /// Only the low bits of the shift amount are used if the shift amount is
 /// greater than the lane width.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(i64x2.shl))]
-pub fn i64x2_shl(a: v128, amt: u32) -> v128 {
-    unsafe { transmute(simd_shl(a.as_i64x2(), i64x2::splat(amt as i64))) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i64x2_shl(a: v128, amt: u32) -> v128 {
+    transmute(simd_shl(a.as_i64x2(), i64x2::splat(amt as i64)))
 }
 
 /// Shifts each lane to the right by the specified number of bits, sign
@@ -1334,10 +2083,10 @@ pub fn i64x2_shl(a: v128, amt: u32) -> v128 {
 /// Only the low bits of the shift amount are used if the shift amount is
 /// greater than the lane width.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i64x2.shl))]
-pub fn i64x2_shr_s(a: v128, amt: u32) -> v128 {
-    unsafe { transmute(simd_shr(a.as_i64x2(), i64x2::splat(amt as i64))) }
+#[cfg_attr(test, assert_instr(i64x2.shr_s))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i64x2_shr_s(a: v128, amt: u32) -> v128 {
+    transmute(simd_shr(a.as_i64x2(), i64x2::splat(amt as i64)))
 }
 
 /// Shifts each lane to the right by the specified number of bits, shifting in
@@ -1346,181 +2095,196 @@ pub fn i64x2_shr_s(a: v128, amt: u32) -> v128 {
 /// Only the low bits of the shift amount are used if the shift amount is
 /// greater than the lane width.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(i64x2.shl))]
-pub fn i64x2_shr_u(a: v128, amt: u32) -> v128 {
-    unsafe { transmute(simd_shr(a.as_u64x2(), u64x2::splat(amt as u64))) }
+#[cfg_attr(test, assert_instr(i64x2.shr_u))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i64x2_shr_u(a: v128, amt: u32) -> v128 {
+    transmute(simd_shr(a.as_u64x2(), u64x2::splat(amt as u64)))
 }
 
 /// Adds two 128-bit vectors as if they were two packed two 64-bit integers.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(i64x2.add))]
-pub fn i64x2_add(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i64x2_add(a: v128, b: v128) -> v128 {
+    transmute(simd_add(a.as_i64x2(), b.as_i64x2()))
 }
 
 /// Subtracts two 128-bit vectors as if they were two packed two 64-bit integers.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(i64x2.sub))]
-pub fn i64x2_sub(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i64x2_sub(a: v128, b: v128) -> v128 {
+    transmute(simd_sub(a.as_i64x2(), b.as_i64x2()))
+}
+
+/// Multiplies two 128-bit vectors as if they were two packed two 64-bit integers.
+#[inline]
+// #[cfg_attr(test, assert_instr(i64x2.mul))] // FIXME: not present in our LLVM
+#[target_feature(enable = "simd128")]
+pub unsafe fn i64x2_mul(a: v128, b: v128) -> v128 {
+    transmute(simd_mul(a.as_i64x2(), b.as_i64x2()))
 }
 
 /// Calculates the absolute value of each lane of a 128-bit vector interpreted
 /// as four 32-bit floating point numbers.
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.abs))]
-pub fn f32x4_abs(a: v128) -> v128 {
-    unsafe { transmute(llvm_f32x4_abs(a.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_abs(a: v128) -> v128 {
+    transmute(llvm_f32x4_abs(a.as_f32x4()))
 }
 
 /// Negates each lane of a 128-bit vector interpreted as four 32-bit floating
 /// point numbers.
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.neg))]
-pub fn f32x4_neg(a: v128) -> v128 {
-    unsafe { f32x4_mul(a, transmute(f32x4(-1.0, -1.0, -1.0, -1.0))) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_neg(a: v128) -> v128 {
+    f32x4_mul(a, transmute(f32x4(-1.0, -1.0, -1.0, -1.0)))
 }
 
 /// Calculates the square root of each lane of a 128-bit vector interpreted as
 /// four 32-bit floating point numbers.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f32x4.sqrt))]
-pub fn f32x4_sqrt(a: v128) -> v128 {
-    unsafe { transmute(llvm_f32x4_sqrt(a.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_sqrt(a: v128) -> v128 {
+    transmute(llvm_f32x4_sqrt(a.as_f32x4()))
 }
 
 /// Adds pairwise lanes of two 128-bit vectors interpreted as four 32-bit
 /// floating point numbers.
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.add))]
-pub fn f32x4_add(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_add(a.as_f32x4(), b.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_add(a: v128, b: v128) -> v128 {
+    transmute(simd_add(a.as_f32x4(), b.as_f32x4()))
 }
 
 /// Subtracts pairwise lanes of two 128-bit vectors interpreted as four 32-bit
 /// floating point numbers.
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.sub))]
-pub fn f32x4_sub(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_sub(a.as_f32x4(), b.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_sub(a: v128, b: v128) -> v128 {
+    transmute(simd_sub(a.as_f32x4(), b.as_f32x4()))
 }
 
 /// Multiplies pairwise lanes of two 128-bit vectors interpreted as four 32-bit
 /// floating point numbers.
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.mul))]
-pub fn f32x4_mul(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_mul(a.as_f32x4(), b.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_mul(a: v128, b: v128) -> v128 {
+    transmute(simd_mul(a.as_f32x4(), b.as_f32x4()))
 }
 
 /// Divides pairwise lanes of two 128-bit vectors interpreted as four 32-bit
 /// floating point numbers.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f32x4.div))]
-pub fn f32x4_div(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_div(a.as_f32x4(), b.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_div(a: v128, b: v128) -> v128 {
+    transmute(simd_div(a.as_f32x4(), b.as_f32x4()))
 }
 
 /// Calculates the minimum of pairwise lanes of two 128-bit vectors interpreted
 /// as four 32-bit floating point numbers.
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.min))]
-pub fn f32x4_min(a: v128, b: v128) -> v128 {
-    unsafe { transmute(llvm_f32x4_min(a.as_f32x4(), b.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_min(a: v128, b: v128) -> v128 {
+    transmute(llvm_f32x4_min(a.as_f32x4(), b.as_f32x4()))
 }
 
 /// Calculates the maximum of pairwise lanes of two 128-bit vectors interpreted
 /// as four 32-bit floating point numbers.
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.max))]
-pub fn f32x4_max(a: v128, b: v128) -> v128 {
-    unsafe { transmute(llvm_f32x4_max(a.as_f32x4(), b.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_max(a: v128, b: v128) -> v128 {
+    transmute(llvm_f32x4_max(a.as_f32x4(), b.as_f32x4()))
 }
 
 /// Calculates the absolute value of each lane of a 128-bit vector interpreted
 /// as two 64-bit floating point numbers.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.abs))]
-pub fn f64x2_abs(a: v128) -> v128 {
-    unsafe { transmute(llvm_f64x2_abs(a.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_abs(a: v128) -> v128 {
+    transmute(llvm_f64x2_abs(a.as_f64x2()))
 }
 
 /// Negates each lane of a 128-bit vector interpreted as two 64-bit floating
 /// point numbers.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr(f64x2.abs))]
-pub fn f64x2_neg(a: v128) -> v128 {
-    unsafe { f64x2_mul(a, transmute(f64x2(-1.0, -1.0))) }
+#[cfg_attr(test, assert_instr(f64x2.neg))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_neg(a: v128) -> v128 {
+    f64x2_mul(a, transmute(f64x2(-1.0, -1.0)))
 }
 
 /// Calculates the square root of each lane of a 128-bit vector interpreted as
 /// two 64-bit floating point numbers.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.sqrt))]
-pub fn f64x2_sqrt(a: v128) -> v128 {
-    unsafe { transmute(llvm_f64x2_sqrt(a.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_sqrt(a: v128) -> v128 {
+    transmute(llvm_f64x2_sqrt(a.as_f64x2()))
 }
 
 /// Adds pairwise lanes of two 128-bit vectors interpreted as two 64-bit
 /// floating point numbers.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.add))]
-pub fn f64x2_add(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_add(a.as_f64x2(), b.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_add(a: v128, b: v128) -> v128 {
+    transmute(simd_add(a.as_f64x2(), b.as_f64x2()))
 }
 
 /// Subtracts pairwise lanes of two 128-bit vectors interpreted as two 64-bit
 /// floating point numbers.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.sub))]
-pub fn f64x2_sub(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_sub(a.as_f64x2(), b.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_sub(a: v128, b: v128) -> v128 {
+    transmute(simd_sub(a.as_f64x2(), b.as_f64x2()))
 }
 
 /// Multiplies pairwise lanes of two 128-bit vectors interpreted as two 64-bit
 /// floating point numbers.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.mul))]
-pub fn f64x2_mul(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_mul(a.as_f64x2(), b.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_mul(a: v128, b: v128) -> v128 {
+    transmute(simd_mul(a.as_f64x2(), b.as_f64x2()))
 }
 
 /// Divides pairwise lanes of two 128-bit vectors interpreted as two 64-bit
 /// floating point numbers.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.div))]
-pub fn f64x2_div(a: v128, b: v128) -> v128 {
-    unsafe { transmute(simd_div(a.as_f64x2(), b.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_div(a: v128, b: v128) -> v128 {
+    transmute(simd_div(a.as_f64x2(), b.as_f64x2()))
 }
 
 /// Calculates the minimum of pairwise lanes of two 128-bit vectors interpreted
 /// as two 64-bit floating point numbers.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.min))]
-pub fn f64x2_min(a: v128, b: v128) -> v128 {
-    unsafe { transmute(llvm_f64x2_min(a.as_f64x2(), b.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_min(a: v128, b: v128) -> v128 {
+    transmute(llvm_f64x2_min(a.as_f64x2(), b.as_f64x2()))
 }
 
 /// Calculates the maximum of pairwise lanes of two 128-bit vectors interpreted
 /// as two 64-bit floating point numbers.
 #[inline]
-#[cfg(not(only_node_compatible_functions))]
 #[cfg_attr(test, assert_instr(f64x2.max))]
-pub fn f64x2_max(a: v128, b: v128) -> v128 {
-    unsafe { transmute(llvm_f64x2_max(a.as_f64x2(), b.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f64x2_max(a: v128, b: v128) -> v128 {
+    transmute(llvm_f64x2_max(a.as_f64x2(), b.as_f64x2()))
 }
 
 /// Converts a 128-bit vector interpreted as four 32-bit floating point numbers
@@ -1530,8 +2294,9 @@ pub fn f64x2_max(a: v128, b: v128) -> v128 {
 /// representable intger.
 #[inline]
 #[cfg_attr(test, assert_instr("i32x4.trunc_sat_f32x4_s"))]
-pub fn i32x4_trunc_s_f32x4_sat(a: v128) -> v128 {
-    unsafe { transmute(simd_cast::<_, i32x4>(a.as_f32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_trunc_sat_f32x4_s(a: v128) -> v128 {
+    transmute(simd_cast::<_, i32x4>(a.as_f32x4()))
 }
 
 /// Converts a 128-bit vector interpreted as four 32-bit floating point numbers
@@ -1541,66 +2306,27 @@ pub fn i32x4_trunc_s_f32x4_sat(a: v128) -> v128 {
 /// representable intger.
 #[inline]
 #[cfg_attr(test, assert_instr("i32x4.trunc_sat_f32x4_u"))]
-pub fn i32x4_trunc_u_f32x4_sat(a: v128) -> v128 {
-    unsafe { transmute(simd_cast::<_, u32x4>(a.as_f32x4())) }
-}
-
-/// Converts a 128-bit vector interpreted as two 64-bit floating point numbers
-/// into a 128-bit vector of two 64-bit signed integers.
-///
-/// NaN is converted to 0 and if it's out of bounds it becomes the nearest
-/// representable intger.
-#[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr("i64x2.trunc_s/f64x2:sat"))]
-pub fn i64x2_trunc_s_f64x2_sat(a: v128) -> v128 {
-    unsafe { transmute(simd_cast::<_, i64x2>(a.as_f64x2())) }
-}
-
-/// Converts a 128-bit vector interpreted as two 64-bit floating point numbers
-/// into a 128-bit vector of two 64-bit unsigned integers.
-///
-/// NaN is converted to 0 and if it's out of bounds it becomes the nearest
-/// representable intger.
-#[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr("i64x2.trunc_u/f64x2:sat"))]
-pub fn i64x2_trunc_u_f64x2_sat(a: v128) -> v128 {
-    unsafe { transmute(simd_cast::<_, u64x2>(a.as_f64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_trunc_sat_f32x4_u(a: v128) -> v128 {
+    transmute(simd_cast::<_, u32x4>(a.as_f32x4()))
 }
 
 /// Converts a 128-bit vector interpreted as four 32-bit signed integers into a
 /// 128-bit vector of four 32-bit floating point numbers.
 #[inline]
 #[cfg_attr(test, assert_instr("f32x4.convert_i32x4_s"))]
-pub fn f32x4_convert_i32x4_s(a: v128) -> v128 {
-    unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_convert_i32x4_s(a: v128) -> v128 {
+    transmute(simd_cast::<_, f32x4>(a.as_i32x4()))
 }
 
 /// Converts a 128-bit vector interpreted as four 32-bit unsigned integers into a
 /// 128-bit vector of four 32-bit floating point numbers.
 #[inline]
 #[cfg_attr(test, assert_instr("f32x4.convert_i32x4_u"))]
-pub fn f32x4_convert_i32x4_u(a: v128) -> v128 {
-    unsafe { transmute(simd_cast::<_, f32x4>(a.as_u32x4())) }
-}
-
-/// Converts a 128-bit vector interpreted as two 64-bit signed integers into a
-/// 128-bit vector of two 64-bit floating point numbers.
-#[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr("f64x2.convert_s/i64x2"))]
-pub fn f64x2_convert_s_i64x2(a: v128) -> v128 {
-    unsafe { transmute(simd_cast::<_, f64x2>(a.as_i64x2())) }
-}
-
-/// Converts a 128-bit vector interpreted as two 64-bit unsigned integers into a
-/// 128-bit vector of two 64-bit floating point numbers.
-#[inline]
-#[cfg(not(only_node_compatible_functions))]
-#[cfg_attr(test, assert_instr("f64x2.convert_u/i64x2"))]
-pub fn f64x2_convert_u_i64x2(a: v128) -> v128 {
-    unsafe { transmute(simd_cast::<_, f64x2>(a.as_u64x2())) }
+#[target_feature(enable = "simd128")]
+pub unsafe fn f32x4_convert_i32x4_u(a: v128) -> v128 {
+    transmute(simd_cast::<_, f32x4>(a.as_u32x4()))
 }
 
 #[cfg(test)]
@@ -1610,7 +2336,6 @@ pub mod tests {
     use std::mem;
     use std::num::Wrapping;
     use std::prelude::v1::*;
-    use wasm_bindgen_test::*;
 
     fn compare_bytes(a: v128, b: v128) {
         let a: [u8; 16] = unsafe { transmute(a) };
@@ -1618,23 +2343,22 @@ pub mod tests {
         assert_eq!(a, b);
     }
 
-    #[wasm_bindgen_test]
-    #[cfg(not(only_node_compatible_functions))]
+    #[test]
     fn test_v128_const() {
         const A: v128 =
-            unsafe { super::v128_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) };
+            unsafe { super::i8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) };
         compare_bytes(A, A);
     }
 
     macro_rules! test_splat {
         ($test_id:ident: $val:expr => $($vals:expr),*) => {
-            #[wasm_bindgen_test]
+            #[test]
             fn $test_id() {
+                unsafe {
                 let a = super::$test_id($val);
-                let b: v128 = unsafe {
-                    transmute([$($vals as u8),*])
-                };
+                let b: v128 = transmute([$($vals as u8),*]);
                 compare_bytes(a, b);
+                }
             }
         }
     }
@@ -1642,10 +2366,8 @@ pub mod tests {
     test_splat!(i8x16_splat: 42 => 42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42);
     test_splat!(i16x8_splat: 42 => 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0);
     test_splat!(i32x4_splat: 42 => 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0);
-    #[cfg(not(only_node_compatible_functions))]
     test_splat!(i64x2_splat: 42 => 42, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0);
     test_splat!(f32x4_splat: 42. => 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66);
-    #[cfg(not(only_node_compatible_functions))]
     test_splat!(f64x2_splat: 42. => 0, 0, 0, 0, 0, 0, 69, 64, 0, 0, 0, 0, 0, 0, 69, 64);
 
     // tests extract and replace lanes
@@ -1658,25 +2380,25 @@ pub mod tests {
             count: $count:expr,
             indices: [$($idx:expr),*],
         ) => {
-            #[wasm_bindgen_test]
+            #[test]
             fn $test_id() {
                 unsafe {
                     let arr: [$elem; $count] = [123 as $elem; $count];
                     let vec: v128 = transmute(arr);
                     $(
-                        assert_eq!($extract(vec, $idx), 123 as $elem);
-                    )*;
+                        assert_eq!($extract::<$idx>(vec), 123 as $elem);
+                    )*
 
                     // create a vector from array and check that the indices contain
                     // the same values as in the array:
                     let arr: [$elem; $count] = [$($idx as $elem),*];
                     let vec: v128 = transmute(arr);
                     $(
-                        assert_eq!($extract(vec, $idx), $idx as $elem);
+                        assert_eq!($extract::<$idx>(vec), $idx as $elem);
 
-                        let tmp = $replace(vec, $idx, 124 as $elem);
-                        assert_eq!($extract(tmp, $idx), 124 as $elem);
-                    )*;
+                        let tmp = $replace::<$idx>(vec, 124 as $elem);
+                        assert_eq!($extract::<$idx>(tmp), 124 as $elem);
+                    )*
                 }
             }
         }
@@ -1706,7 +2428,6 @@ pub mod tests {
         count: 4,
         indices: [0, 1, 2, 3],
     }
-    #[cfg(not(only_node_compatible_functions))]
     test_extract! {
         name: test_i64x2_extract_replace,
         extract: i64x2_extract_lane,
@@ -1723,7 +2444,6 @@ pub mod tests {
         count: 4,
         indices: [0, 1, 2, 3],
     }
-    #[cfg(not(only_node_compatible_functions))]
     test_extract! {
         name: test_f64x2_extract_replace,
         extract: f64x2_extract_lane,
@@ -1739,7 +2459,7 @@ pub mod tests {
                 $([$($vec1:tt)*] ($op:tt | $f:ident) [$($vec2:tt)*],)*
             })*
         ) => ($(
-            #[wasm_bindgen_test]
+            #[test]
             fn $name() {
                 unsafe {
                     $(
@@ -1768,7 +2488,7 @@ pub mod tests {
                 $(($op:tt | $f:ident) [$($vec1:tt)*],)*
             })*
         ) => ($(
-            #[wasm_bindgen_test]
+            #[test]
             fn $name() {
                 unsafe {
                     $(
@@ -1816,19 +2536,6 @@ pub mod tests {
                 (- | i8x16_sub)
             [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8],
         }
-        test_i8x16_mul => {
-            [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-                (* | i8x16_mul)
-            [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-
-            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
-                (* | i8x16_mul)
-            [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18],
-
-            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
-                (* | i8x16_mul)
-            [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 30, 3],
-        }
 
         test_i16x8_add => {
             [0i16, 0, 0, 0, 0, 0, 0, 0]
@@ -1910,425 +2617,401 @@ pub mod tests {
         // TODO: test_i64x2_neg
     }
 
-    // #[wasm_bindgen_test]
-    // fn v8x16_shuffle() {
-    //     unsafe {
-    //         let a = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
-    //         let b = [
-    //             16_u8, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
-    //             31,
-    //         ];
-    //
-    //         let vec_a: v128 = transmute(a);
-    //         let vec_b: v128 = transmute(b);
-    //
-    //         let vec_r = v8x16_shuffle!(
-    //             vec_a,
-    //             vec_b,
-    //             [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
-    //         );
-    //
-    //         let e =
-    //             [0_u8, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30];
-    //         let vec_e: v128 = transmute(e);
-    //         compare_bytes(vec_r, vec_e);
-    //     }
-    // }
-    //
-    // macro_rules! floating_point {
-    //     (f32) => {
-    //         true
-    //     };
-    //     (f64) => {
-    //         true
-    //     };
-    //     ($id:ident) => {
-    //         false
-    //     };
-    // }
-    //
-    // trait IsNan: Sized {
-    //     fn is_nan(self) -> bool {
-    //         false
-    //     }
-    // }
-    // impl IsNan for i8 {}
-    // impl IsNan for i16 {}
-    // impl IsNan for i32 {}
-    // impl IsNan for i64 {}
-    //
-    // macro_rules! test_bop {
-    //     ($id:ident[$ety:ident; $ecount:expr] |
-    //      $binary_op:ident [$op_test_id:ident] :
-    //      ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
-    //         test_bop!(
-    //             $id[$ety; $ecount] => $ety | $binary_op [ $op_test_id ]:
-    //             ([$($in_a),*], [$($in_b),*]) => [$($out),*]
-    //         );
-    //
-    //     };
-    //     ($id:ident[$ety:ident; $ecount:expr] => $oty:ident |
-    //      $binary_op:ident [$op_test_id:ident] :
-    //      ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
-    //         #[wasm_bindgen_test]
-    //         fn $op_test_id() {
-    //             unsafe {
-    //                 let a_input: [$ety; $ecount] = [$($in_a),*];
-    //                 let b_input: [$ety; $ecount] = [$($in_b),*];
-    //                 let output: [$oty; $ecount] = [$($out),*];
-    //
-    //                 let a_vec_in: v128 = transmute(a_input);
-    //                 let b_vec_in: v128 = transmute(b_input);
-    //                 let vec_res: v128 = $id::$binary_op(a_vec_in, b_vec_in);
-    //
-    //                 let res: [$oty; $ecount] = transmute(vec_res);
-    //
-    //                 if !floating_point!($ety) {
-    //                     assert_eq!(res, output);
-    //                 } else {
-    //                     for i in 0..$ecount {
-    //                         let r = res[i];
-    //                         let o = output[i];
-    //                         assert_eq!(r.is_nan(), o.is_nan());
-    //                         if !r.is_nan() {
-    //                             assert_eq!(r, o);
-    //                         }
-    //                     }
-    //                 }
-    //             }
-    //         }
-    //     }
-    // }
-    //
-    // macro_rules! test_bops {
-    //     ($id:ident[$ety:ident; $ecount:expr] |
-    //      $binary_op:ident [$op_test_id:ident]:
-    //      ([$($in_a:expr),*], $in_b:expr) => [$($out:expr),*]) => {
-    //         #[wasm_bindgen_test]
-    //         fn $op_test_id() {
-    //             unsafe {
-    //                 let a_input: [$ety; $ecount] = [$($in_a),*];
-    //                 let output: [$ety; $ecount] = [$($out),*];
-    //
-    //                 let a_vec_in: v128 = transmute(a_input);
-    //                 let vec_res: v128 = $id::$binary_op(a_vec_in, $in_b);
-    //
-    //                 let res: [$ety; $ecount] = transmute(vec_res);
-    //                 assert_eq!(res, output);
-    //             }
-    //         }
-    //     }
-    // }
-    //
-    // macro_rules! test_uop {
-    //     ($id:ident[$ety:ident; $ecount:expr] |
-    //      $unary_op:ident [$op_test_id:ident]: [$($in_a:expr),*] => [$($out:expr),*]) => {
-    //         #[wasm_bindgen_test]
-    //         fn $op_test_id() {
-    //             unsafe {
-    //                 let a_input: [$ety; $ecount] = [$($in_a),*];
-    //                 let output: [$ety; $ecount] = [$($out),*];
-    //
-    //                 let a_vec_in: v128 = transmute(a_input);
-    //                 let vec_res: v128 = $id::$unary_op(a_vec_in);
-    //
-    //                 let res: [$ety; $ecount] = transmute(vec_res);
-    //                 assert_eq!(res, output);
-    //             }
-    //         }
-    //     }
-    // }
-    //
-    //
-    //
-    // test_bops!(i8x16[i8; 16] | shl[i8x16_shl_test]:
-    //            ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
-    //            [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]);
-    // test_bops!(i16x8[i16; 8] | shl[i16x8_shl_test]:
-    //            ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
-    //            [0, -2, 4, 6, 8, 10, 12, -2]);
-    // test_bops!(i32x4[i32; 4] | shl[i32x4_shl_test]:
-    //            ([0, -1, 2, 3], 1) => [0, -2, 4, 6]);
-    // test_bops!(i64x2[i64; 2] | shl[i64x2_shl_test]:
-    //            ([0, -1], 1) => [0, -2]);
-    //
-    // test_bops!(i8x16[i8; 16] | shr_s[i8x16_shr_s_test]:
-    //            ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
-    //            [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
-    // test_bops!(i16x8[i16; 8] | shr_s[i16x8_shr_s_test]:
-    //            ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
-    //            [0, -1, 1, 1, 2, 2, 3, i16::MAX / 2]);
-    // test_bops!(i32x4[i32; 4] | shr_s[i32x4_shr_s_test]:
-    //            ([0, -1, 2, 3], 1) => [0, -1, 1, 1]);
-    // test_bops!(i64x2[i64; 2] | shr_s[i64x2_shr_s_test]:
-    //            ([0, -1], 1) => [0, -1]);
-    //
-    // test_bops!(i8x16[i8; 16] | shr_u[i8x16_uhr_u_test]:
-    //            ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
-    //            [0, i8::MAX, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
-    // test_bops!(i16x8[i16; 8] | shr_u[i16x8_uhr_u_test]:
-    //            ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
-    //            [0, i16::MAX, 1, 1, 2, 2, 3, i16::MAX / 2]);
-    // test_bops!(i32x4[i32; 4] | shr_u[i32x4_uhr_u_test]:
-    //            ([0, -1, 2, 3], 1) => [0, i32::MAX, 1, 1]);
-    // test_bops!(i64x2[i64; 2] | shr_u[i64x2_uhr_u_test]:
-    //            ([0, -1], 1) => [0, i64::MAX]);
-    //
-    // #[wasm_bindgen_test]
-    // fn v128_bitwise_logical_ops() {
-    //     unsafe {
-    //         let a: [u32; 4] = [u32::MAX, 0, u32::MAX, 0];
-    //         let b: [u32; 4] = [u32::MAX; 4];
-    //         let c: [u32; 4] = [0; 4];
-    //
-    //         let vec_a: v128 = transmute(a);
-    //         let vec_b: v128 = transmute(b);
-    //         let vec_c: v128 = transmute(c);
-    //
-    //         let r: v128 = v128::and(vec_a, vec_a);
-    //         compare_bytes(r, vec_a);
-    //         let r: v128 = v128::and(vec_a, vec_b);
-    //         compare_bytes(r, vec_a);
-    //         let r: v128 = v128::or(vec_a, vec_b);
-    //         compare_bytes(r, vec_b);
-    //         let r: v128 = v128::not(vec_b);
-    //         compare_bytes(r, vec_c);
-    //         let r: v128 = v128::xor(vec_a, vec_c);
-    //         compare_bytes(r, vec_a);
-    //
-    //         let r: v128 = v128::bitselect(vec_b, vec_c, vec_b);
-    //         compare_bytes(r, vec_b);
-    //         let r: v128 = v128::bitselect(vec_b, vec_c, vec_c);
-    //         compare_bytes(r, vec_c);
-    //         let r: v128 = v128::bitselect(vec_b, vec_c, vec_a);
-    //         compare_bytes(r, vec_a);
-    //     }
-    // }
-    //
-    // macro_rules! test_bool_red {
-    //     ($id:ident[$test_id:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => {
-    //         #[wasm_bindgen_test]
-    //         fn $test_id() {
-    //             unsafe {
-    //                 let vec_a: v128 = transmute([$($true),*]); // true
-    //                 let vec_b: v128 = transmute([$($false),*]); // false
-    //                 let vec_c: v128 = transmute([$($alt),*]); // alternating
-    //
-    //                 assert_eq!($id::any_true(vec_a), 1);
-    //                 assert_eq!($id::any_true(vec_b), 0);
-    //                 assert_eq!($id::any_true(vec_c), 1);
-    //
-    //                 assert_eq!($id::all_true(vec_a), 1);
-    //                 assert_eq!($id::all_true(vec_b), 0);
-    //                 assert_eq!($id::all_true(vec_c), 0);
-    //             }
-    //         }
-    //     }
-    // }
-    //
-    // test_bool_red!(
-    //     i8x16[i8x16_boolean_reductions]
-    //         | [1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-    //         | [0_i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-    //         | [1_i8, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
-    // );
-    // test_bool_red!(
-    //     i16x8[i16x8_boolean_reductions]
-    //         | [1_i16, 1, 1, 1, 1, 1, 1, 1]
-    //         | [0_i16, 0, 0, 0, 0, 0, 0, 0]
-    //         | [1_i16, 0, 1, 0, 1, 0, 1, 0]
-    // );
-    // test_bool_red!(
-    //     i32x4[i32x4_boolean_reductions]
-    //         | [1_i32, 1, 1, 1]
-    //         | [0_i32, 0, 0, 0]
-    //         | [1_i32, 0, 1, 0]
-    // );
-    // test_bool_red!(
-    //     i64x2[i64x2_boolean_reductions] | [1_i64, 1] | [0_i64, 0] | [1_i64, 0]
-    // );
-    //
-    // test_bop!(i8x16[i8; 16] | eq[i8x16_eq_test]:
-    //           ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
-    //            [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
-    //           [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
-    // test_bop!(i16x8[i16; 8] | eq[i16x8_eq_test]:
-    //           ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
-    //           [-1, 0, -1, 0 ,-1, 0, -1, -1]);
-    // test_bop!(i32x4[i32; 4] | eq[i32x4_eq_test]:
-    //           ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
-    // test_bop!(i64x2[i64; 2] | eq[i64x2_eq_test]: ([0, 1], [0, 2]) => [-1, 0]);
-    // test_bop!(f32x4[f32; 4] => i32 | eq[f32x4_eq_test]:
-    //           ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
-    // test_bop!(f64x2[f64; 2] => i64 | eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
-    //
-    // test_bop!(i8x16[i8; 16] | ne[i8x16_ne_test]:
-    //           ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
-    //            [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
-    //           [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
-    // test_bop!(i16x8[i16; 8] | ne[i16x8_ne_test]:
-    //           ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
-    //           [0, -1, 0, -1 ,0, -1, 0, 0]);
-    // test_bop!(i32x4[i32; 4] | ne[i32x4_ne_test]:
-    //           ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
-    // test_bop!(i64x2[i64; 2] | ne[i64x2_ne_test]: ([0, 1], [0, 2]) => [0, -1]);
-    // test_bop!(f32x4[f32; 4] => i32 | ne[f32x4_ne_test]:
-    //           ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
-    // test_bop!(f64x2[f64; 2] => i64 | ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]);
-    //
-    // test_bop!(i8x16[i8; 16] | lt[i8x16_lt_test]:
-    //           ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
-    //            [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
-    //           [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
-    // test_bop!(i16x8[i16; 8] | lt[i16x8_lt_test]:
-    //           ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
-    //           [0, -1, 0, -1 ,0, -1, 0, 0]);
-    // test_bop!(i32x4[i32; 4] | lt[i32x4_lt_test]:
-    //           ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
-    // test_bop!(i64x2[i64; 2] | lt[i64x2_lt_test]: ([0, 1], [0, 2]) => [0, -1]);
-    // test_bop!(f32x4[f32; 4] => i32 | lt[f32x4_lt_test]:
-    //           ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
-    // test_bop!(f64x2[f64; 2] => i64 | lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]);
-    //
-    // test_bop!(i8x16[i8; 16] | gt[i8x16_gt_test]:
-    //       ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15],
-    //        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) =>
-    //           [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
-    // test_bop!(i16x8[i16; 8] | gt[i16x8_gt_test]:
-    //           ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
-    //           [0, -1, 0, -1 ,0, -1, 0, 0]);
-    // test_bop!(i32x4[i32; 4] | gt[i32x4_gt_test]:
-    //           ([0, 2, 2, 4], [0, 1, 2, 3]) => [0, -1, 0, -1]);
-    // test_bop!(i64x2[i64; 2] | gt[i64x2_gt_test]: ([0, 2], [0, 1]) => [0, -1]);
-    // test_bop!(f32x4[f32; 4] => i32 | gt[f32x4_gt_test]:
-    //           ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]);
-    // test_bop!(f64x2[f64; 2] => i64 | gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]);
-    //
-    // test_bop!(i8x16[i8; 16] | ge[i8x16_ge_test]:
-    //           ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
-    //            [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
-    //           [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
-    // test_bop!(i16x8[i16; 8] | ge[i16x8_ge_test]:
-    //           ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
-    //           [-1, 0, -1, 0 ,-1, 0, -1, -1]);
-    // test_bop!(i32x4[i32; 4] | ge[i32x4_ge_test]:
-    //           ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
-    // test_bop!(i64x2[i64; 2] | ge[i64x2_ge_test]: ([0, 1], [0, 2]) => [-1, 0]);
-    // test_bop!(f32x4[f32; 4] => i32 | ge[f32x4_ge_test]:
-    //           ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
-    // test_bop!(f64x2[f64; 2] => i64 | ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
-    //
-    // test_bop!(i8x16[i8; 16] | le[i8x16_le_test]:
-    //           ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15],
-    //            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    //           ) =>
-    //           [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
-    // test_bop!(i16x8[i16; 8] | le[i16x8_le_test]:
-    //           ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
-    //           [-1, 0, -1, 0 ,-1, 0, -1, -1]);
-    // test_bop!(i32x4[i32; 4] | le[i32x4_le_test]:
-    //           ([0, 2, 2, 4], [0, 1, 2, 3]) => [-1, 0, -1, 0]);
-    // test_bop!(i64x2[i64; 2] | le[i64x2_le_test]: ([0, 2], [0, 1]) => [-1, 0]);
-    // test_bop!(f32x4[f32; 4] => i32 | le[f32x4_le_test]:
-    //           ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]);
-    // test_bop!(f64x2[f64; 2] => i64 | le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]);
-    //
-    // #[wasm_bindgen_test]
-    // fn v128_bitwise_load_store() {
-    //     unsafe {
-    //         let mut arr: [i32; 4] = [0, 1, 2, 3];
-    //
-    //         let vec = v128::load(arr.as_ptr() as *const v128);
-    //         let vec = i32x4::add(vec, vec);
-    //         v128::store(arr.as_mut_ptr() as *mut v128, vec);
-    //
-    //         assert_eq!(arr, [0, 2, 4, 6]);
-    //     }
-    // }
-    //
-    // test_uop!(f32x4[f32; 4] | neg[f32x4_neg_test]: [0., 1., 2., 3.] => [ 0., -1., -2., -3.]);
-    // test_uop!(f32x4[f32; 4] | abs[f32x4_abs_test]: [0., -1., 2., -3.] => [ 0., 1., 2., 3.]);
-    // test_bop!(f32x4[f32; 4] | min[f32x4_min_test]:
-    //           ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., -3., -4., 8.]);
-    // test_bop!(f32x4[f32; 4] | min[f32x4_min_test_nan]:
-    //           ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
-    //           => [0., -3., -4., std::f32::NAN]);
-    // test_bop!(f32x4[f32; 4] | max[f32x4_max_test]:
-    //           ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -1., 7., 10.]);
-    // test_bop!(f32x4[f32; 4] | max[f32x4_max_test_nan]:
-    //           ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
-    //           => [1., -1., 7., std::f32::NAN]);
-    // test_bop!(f32x4[f32; 4] | add[f32x4_add_test]:
-    //           ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -4., 3., 18.]);
-    // test_bop!(f32x4[f32; 4] | sub[f32x4_sub_test]:
-    //           ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [-1., 2., 11., -2.]);
-    // test_bop!(f32x4[f32; 4] | mul[f32x4_mul_test]:
-    //           ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., 3., -28., 80.]);
-    // test_bop!(f32x4[f32; 4] | div[f32x4_div_test]:
-    //           ([0., -8., 70., 8.], [1., 4., 10., 2.]) => [0., -2., 7., 4.]);
-    //
-    // test_uop!(f64x2[f64; 2] | neg[f64x2_neg_test]: [0., 1.] => [ 0., -1.]);
-    // test_uop!(f64x2[f64; 2] | abs[f64x2_abs_test]: [0., -1.] => [ 0., 1.]);
-    // test_bop!(f64x2[f64; 2] | min[f64x2_min_test]:
-    //           ([0., -1.], [1., -3.]) => [0., -3.]);
-    // test_bop!(f64x2[f64; 2] | min[f64x2_min_test_nan]:
-    //           ([7., 8.], [-4., std::f64::NAN])
-    //           => [ -4., std::f64::NAN]);
-    // test_bop!(f64x2[f64; 2] | max[f64x2_max_test]:
-    //           ([0., -1.], [1., -3.]) => [1., -1.]);
-    // test_bop!(f64x2[f64; 2] | max[f64x2_max_test_nan]:
-    //           ([7., 8.], [ -4., std::f64::NAN])
-    //           => [7., std::f64::NAN]);
-    // test_bop!(f64x2[f64; 2] | add[f64x2_add_test]:
-    //           ([0., -1.], [1., -3.]) => [1., -4.]);
-    // test_bop!(f64x2[f64; 2] | sub[f64x2_sub_test]:
-    //           ([0., -1.], [1., -3.]) => [-1., 2.]);
-    // test_bop!(f64x2[f64; 2] | mul[f64x2_mul_test]:
-    //           ([0., -1.], [1., -3.]) => [0., 3.]);
-    // test_bop!(f64x2[f64; 2] | div[f64x2_div_test]:
-    //           ([0., -8.], [1., 4.]) => [0., -2.]);
-    //
-    // macro_rules! test_conv {
-    //     ($test_id:ident | $conv_id:ident | $to_ty:ident | $from:expr,  $to:expr) => {
-    //         #[wasm_bindgen_test]
-    //         fn $test_id() {
-    //             unsafe {
-    //                 let from: v128 = transmute($from);
-    //                 let to: v128 = transmute($to);
-    //
-    //                 let r: v128 = $to_ty::$conv_id(from);
-    //
-    //                 compare_bytes(r, to);
-    //             }
-    //         }
-    //     };
-    // }
-    //
-    // test_conv!(
-    //     f32x4_convert_s_i32x4 | convert_s_i32x4 | f32x4 | [1_i32, 2, 3, 4],
-    //     [1_f32, 2., 3., 4.]
-    // );
-    // test_conv!(
-    //     f32x4_convert_u_i32x4
-    //         | convert_u_i32x4
-    //         | f32x4
-    //         | [u32::MAX, 2, 3, 4],
-    //     [u32::MAX as f32, 2., 3., 4.]
-    // );
-    // test_conv!(
-    //     f64x2_convert_s_i64x2 | convert_s_i64x2 | f64x2 | [1_i64, 2],
-    //     [1_f64, 2.]
-    // );
+    #[test]
+    fn test_v8x16_shuffle() {
+        unsafe {
+            let a = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
+            let b = [
+                16_u8, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+            ];
+
+            let vec_a: v128 = transmute(a);
+            let vec_b: v128 = transmute(b);
+
+            let vec_r = v8x16_shuffle::<0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30>(
+                vec_a, vec_b,
+            );
+
+            let e = [0_u8, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30];
+            let vec_e: v128 = transmute(e);
+            compare_bytes(vec_r, vec_e);
+        }
+    }
+
+    macro_rules! floating_point {
+        (f32) => {
+            true
+        };
+        (f64) => {
+            true
+        };
+        ($id:ident) => {
+            false
+        };
+    }
+
+    trait IsNan: Sized {
+        fn is_nan(self) -> bool {
+            false
+        }
+    }
+    impl IsNan for i8 {}
+    impl IsNan for i16 {}
+    impl IsNan for i32 {}
+    impl IsNan for i64 {}
+
+    macro_rules! test_bop {
+         ($id:ident[$ety:ident; $ecount:expr] |
+          $binary_op:ident [$op_test_id:ident] :
+          ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
+             test_bop!(
+                 $id[$ety; $ecount] => $ety | $binary_op [ $op_test_id ]:
+                 ([$($in_a),*], [$($in_b),*]) => [$($out),*]
+             );
+
+         };
+         ($id:ident[$ety:ident; $ecount:expr] => $oty:ident |
+          $binary_op:ident [$op_test_id:ident] :
+          ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
+             #[test]
+             fn $op_test_id() {
+                 unsafe {
+                     let a_input: [$ety; $ecount] = [$($in_a),*];
+                     let b_input: [$ety; $ecount] = [$($in_b),*];
+                     let output: [$oty; $ecount] = [$($out),*];
+
+                     let a_vec_in: v128 = transmute(a_input);
+                     let b_vec_in: v128 = transmute(b_input);
+                     let vec_res: v128 = $binary_op(a_vec_in, b_vec_in);
+
+                     let res: [$oty; $ecount] = transmute(vec_res);
+
+                     if !floating_point!($ety) {
+                         assert_eq!(res, output);
+                     } else {
+                         for i in 0..$ecount {
+                             let r = res[i];
+                             let o = output[i];
+                             assert_eq!(r.is_nan(), o.is_nan());
+                             if !r.is_nan() {
+                                 assert_eq!(r, o);
+                             }
+                         }
+                     }
+                 }
+             }
+         }
+     }
+
+    macro_rules! test_bops {
+         ($id:ident[$ety:ident; $ecount:expr] |
+          $binary_op:ident [$op_test_id:ident]:
+          ([$($in_a:expr),*], $in_b:expr) => [$($out:expr),*]) => {
+             #[test]
+             fn $op_test_id() {
+                 unsafe {
+                     let a_input: [$ety; $ecount] = [$($in_a),*];
+                     let output: [$ety; $ecount] = [$($out),*];
+
+                     let a_vec_in: v128 = transmute(a_input);
+                     let vec_res: v128 = $binary_op(a_vec_in, $in_b);
+
+                     let res: [$ety; $ecount] = transmute(vec_res);
+                     assert_eq!(res, output);
+                 }
+             }
+         }
+     }
+
+    macro_rules! test_uop {
+         ($id:ident[$ety:ident; $ecount:expr] |
+          $unary_op:ident [$op_test_id:ident]: [$($in_a:expr),*] => [$($out:expr),*]) => {
+             #[test]
+             fn $op_test_id() {
+                 unsafe {
+                     let a_input: [$ety; $ecount] = [$($in_a),*];
+                     let output: [$ety; $ecount] = [$($out),*];
+
+                     let a_vec_in: v128 = transmute(a_input);
+                     let vec_res: v128 = $unary_op(a_vec_in);
+
+                     let res: [$ety; $ecount] = transmute(vec_res);
+                     assert_eq!(res, output);
+                 }
+             }
+         }
+     }
+
+    test_bops!(i8x16[i8; 16] | i8x16_shl[i8x16_shl_test]:
+               ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+               [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]);
+    test_bops!(i16x8[i16; 8] | i16x8_shl[i16x8_shl_test]:
+                ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
+                [0, -2, 4, 6, 8, 10, 12, -2]);
+    test_bops!(i32x4[i32; 4] | i32x4_shl[i32x4_shl_test]:
+                ([0, -1, 2, 3], 1) => [0, -2, 4, 6]);
+    test_bops!(i64x2[i64; 2] | i64x2_shl[i64x2_shl_test]:
+                ([0, -1], 1) => [0, -2]);
+
+    test_bops!(i8x16[i8; 16] | i8x16_shr_s[i8x16_shr_s_test]:
+               ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+               [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
+    test_bops!(i16x8[i16; 8] | i16x8_shr_s[i16x8_shr_s_test]:
+               ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
+               [0, -1, 1, 1, 2, 2, 3, i16::MAX / 2]);
+    test_bops!(i32x4[i32; 4] | i32x4_shr_s[i32x4_shr_s_test]:
+               ([0, -1, 2, 3], 1) => [0, -1, 1, 1]);
+    test_bops!(i64x2[i64; 2] | i64x2_shr_s[i64x2_shr_s_test]:
+               ([0, -1], 1) => [0, -1]);
+
+    test_bops!(i8x16[i8; 16] | i8x16_shr_u[i8x16_uhr_u_test]:
+                ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+                [0, i8::MAX, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
+    test_bops!(i16x8[i16; 8] | i16x8_shr_u[i16x8_uhr_u_test]:
+                ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
+                [0, i16::MAX, 1, 1, 2, 2, 3, i16::MAX / 2]);
+    test_bops!(i32x4[i32; 4] | i32x4_shr_u[i32x4_uhr_u_test]:
+                ([0, -1, 2, 3], 1) => [0, i32::MAX, 1, 1]);
+    test_bops!(i64x2[i64; 2] | i64x2_shr_u[i64x2_uhr_u_test]:
+                ([0, -1], 1) => [0, i64::MAX]);
+
+    #[test]
+    fn v128_bitwise_logical_ops() {
+        unsafe {
+            let a: [u32; 4] = [u32::MAX, 0, u32::MAX, 0];
+            let b: [u32; 4] = [u32::MAX; 4];
+            let c: [u32; 4] = [0; 4];
+
+            let vec_a: v128 = transmute(a);
+            let vec_b: v128 = transmute(b);
+            let vec_c: v128 = transmute(c);
+
+            let r: v128 = v128_and(vec_a, vec_a);
+            compare_bytes(r, vec_a);
+            let r: v128 = v128_and(vec_a, vec_b);
+            compare_bytes(r, vec_a);
+            let r: v128 = v128_or(vec_a, vec_b);
+            compare_bytes(r, vec_b);
+            let r: v128 = v128_not(vec_b);
+            compare_bytes(r, vec_c);
+            let r: v128 = v128_xor(vec_a, vec_c);
+            compare_bytes(r, vec_a);
+
+            let r: v128 = v128_bitselect(vec_b, vec_c, vec_b);
+            compare_bytes(r, vec_b);
+            let r: v128 = v128_bitselect(vec_b, vec_c, vec_c);
+            compare_bytes(r, vec_c);
+            let r: v128 = v128_bitselect(vec_b, vec_c, vec_a);
+            compare_bytes(r, vec_a);
+        }
+    }
+
+    macro_rules! test_bool_red {
+         ([$test_id:ident, $any:ident, $all:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => {
+             #[test]
+             fn $test_id() {
+                 unsafe {
+                     let vec_a: v128 = transmute([$($true),*]); // true
+                     let vec_b: v128 = transmute([$($false),*]); // false
+                     let vec_c: v128 = transmute([$($alt),*]); // alternating
+
+                     assert_eq!($any(vec_a), 1);
+                     assert_eq!($any(vec_b), 0);
+                     assert_eq!($any(vec_c), 1);
+
+                     assert_eq!($all(vec_a), 1);
+                     assert_eq!($all(vec_b), 0);
+                     assert_eq!($all(vec_c), 0);
+                 }
+             }
+         }
+     }
+
+    test_bool_red!(
+        [i8x16_boolean_reductions, i8x16_any_true, i8x16_all_true]
+            | [1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            | [0_i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+            | [1_i8, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
+    );
+    test_bool_red!(
+        [i16x8_boolean_reductions, i16x8_any_true, i16x8_all_true]
+            | [1_i16, 1, 1, 1, 1, 1, 1, 1]
+            | [0_i16, 0, 0, 0, 0, 0, 0, 0]
+            | [1_i16, 0, 1, 0, 1, 0, 1, 0]
+    );
+    test_bool_red!(
+        [i32x4_boolean_reductions, i32x4_any_true, i32x4_all_true]
+            | [1_i32, 1, 1, 1]
+            | [0_i32, 0, 0, 0]
+            | [1_i32, 0, 1, 0]
+    );
+
+    test_bop!(i8x16[i8; 16] | i8x16_eq[i8x16_eq_test]:
+              ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+               [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+              [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i16x8[i16; 8] | i16x8_eq[i16x8_eq_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i32x4[i32; 4] | i32x4_eq[i32x4_eq_test]:
+               ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
+    test_bop!(f32x4[f32; 4] => i32 | f32x4_eq[f32x4_eq_test]:
+               ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
+    test_bop!(f64x2[f64; 2] => i64 | f64x2_eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
+
+    test_bop!(i8x16[i8; 16] | i8x16_ne[i8x16_ne_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+                [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i16x8[i16; 8] | i16x8_ne[i16x8_ne_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i32x4[i32; 4] | i32x4_ne[i32x4_ne_test]:
+               ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
+    test_bop!(f32x4[f32; 4] => i32 | f32x4_ne[f32x4_ne_test]:
+               ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
+    test_bop!(f64x2[f64; 2] => i64 | f64x2_ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]);
+
+    test_bop!(i8x16[i8; 16] | i8x16_lt_s[i8x16_lt_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+                [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i16x8[i16; 8] | i16x8_lt_s[i16x8_lt_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i32x4[i32; 4] | i32x4_lt_s[i32x4_lt_test]:
+               ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
+    test_bop!(f32x4[f32; 4] => i32 | f32x4_lt[f32x4_lt_test]:
+               ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
+    test_bop!(f64x2[f64; 2] => i64 | f64x2_lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]);
+
+    test_bop!(i8x16[i8; 16] | i8x16_gt_s[i8x16_gt_test]:
+           ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15],
+            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i16x8[i16; 8] | i16x8_gt_s[i16x8_gt_test]:
+               ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i32x4[i32; 4] | i32x4_gt_s[i32x4_gt_test]:
+               ([0, 2, 2, 4], [0, 1, 2, 3]) => [0, -1, 0, -1]);
+    test_bop!(f32x4[f32; 4] => i32 | f32x4_gt[f32x4_gt_test]:
+               ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]);
+    test_bop!(f64x2[f64; 2] => i64 | f64x2_gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]);
+
+    test_bop!(i8x16[i8; 16] | i8x16_ge_s[i8x16_ge_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+                [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i16x8[i16; 8] | i16x8_ge_s[i16x8_ge_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i32x4[i32; 4] | i32x4_ge_s[i32x4_ge_test]:
+               ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
+    test_bop!(f32x4[f32; 4] => i32 | f32x4_ge[f32x4_ge_test]:
+               ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
+    test_bop!(f64x2[f64; 2] => i64 | f64x2_ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
+
+    test_bop!(i8x16[i8; 16] | i8x16_le_s[i8x16_le_test]:
+               ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15],
+                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+               ) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i16x8[i16; 8] | i16x8_le_s[i16x8_le_test]:
+               ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i32x4[i32; 4] | i32x4_le_s[i32x4_le_test]:
+               ([0, 2, 2, 4], [0, 1, 2, 3]) => [-1, 0, -1, 0]);
+    test_bop!(f32x4[f32; 4] => i32 | f32x4_le[f32x4_le_test]:
+               ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]);
+    test_bop!(f64x2[f64; 2] => i64 | f64x2_le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]);
+
+    #[test]
+    fn v128_bitwise_load_store() {
+        unsafe {
+            let mut arr: [i32; 4] = [0, 1, 2, 3];
+
+            let vec = v128_load(arr.as_ptr() as *const v128);
+            let vec = i32x4_add(vec, vec);
+            v128_store(arr.as_mut_ptr() as *mut v128, vec);
+
+            assert_eq!(arr, [0, 2, 4, 6]);
+        }
+    }
+
+    test_uop!(f32x4[f32; 4] | f32x4_neg[f32x4_neg_test]: [0., 1., 2., 3.] => [ 0., -1., -2., -3.]);
+    test_uop!(f32x4[f32; 4] | f32x4_abs[f32x4_abs_test]: [0., -1., 2., -3.] => [ 0., 1., 2., 3.]);
+    test_bop!(f32x4[f32; 4] | f32x4_min[f32x4_min_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., -3., -4., 8.]);
+    test_bop!(f32x4[f32; 4] | f32x4_min[f32x4_min_test_nan]:
+              ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
+              => [0., -3., -4., std::f32::NAN]);
+    test_bop!(f32x4[f32; 4] | f32x4_max[f32x4_max_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -1., 7., 10.]);
+    test_bop!(f32x4[f32; 4] | f32x4_max[f32x4_max_test_nan]:
+              ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
+              => [1., -1., 7., std::f32::NAN]);
+    test_bop!(f32x4[f32; 4] | f32x4_add[f32x4_add_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -4., 3., 18.]);
+    test_bop!(f32x4[f32; 4] | f32x4_sub[f32x4_sub_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [-1., 2., 11., -2.]);
+    test_bop!(f32x4[f32; 4] | f32x4_mul[f32x4_mul_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., 3., -28., 80.]);
+    test_bop!(f32x4[f32; 4] | f32x4_div[f32x4_div_test]:
+              ([0., -8., 70., 8.], [1., 4., 10., 2.]) => [0., -2., 7., 4.]);
+
+    test_uop!(f64x2[f64; 2] | f64x2_neg[f64x2_neg_test]: [0., 1.] => [ 0., -1.]);
+    test_uop!(f64x2[f64; 2] | f64x2_abs[f64x2_abs_test]: [0., -1.] => [ 0., 1.]);
+    test_bop!(f64x2[f64; 2] | f64x2_min[f64x2_min_test]:
+               ([0., -1.], [1., -3.]) => [0., -3.]);
+    test_bop!(f64x2[f64; 2] | f64x2_min[f64x2_min_test_nan]:
+               ([7., 8.], [-4., std::f64::NAN])
+               => [ -4., std::f64::NAN]);
+    test_bop!(f64x2[f64; 2] | f64x2_max[f64x2_max_test]:
+               ([0., -1.], [1., -3.]) => [1., -1.]);
+    test_bop!(f64x2[f64; 2] | f64x2_max[f64x2_max_test_nan]:
+               ([7., 8.], [ -4., std::f64::NAN])
+               => [7., std::f64::NAN]);
+    test_bop!(f64x2[f64; 2] | f64x2_add[f64x2_add_test]:
+               ([0., -1.], [1., -3.]) => [1., -4.]);
+    test_bop!(f64x2[f64; 2] | f64x2_sub[f64x2_sub_test]:
+               ([0., -1.], [1., -3.]) => [-1., 2.]);
+    test_bop!(f64x2[f64; 2] | f64x2_mul[f64x2_mul_test]:
+               ([0., -1.], [1., -3.]) => [0., 3.]);
+    test_bop!(f64x2[f64; 2] | f64x2_div[f64x2_div_test]:
+               ([0., -8.], [1., 4.]) => [0., -2.]);
+
+    macro_rules! test_conv {
+        ($test_id:ident | $conv_id:ident | $to_ty:ident | $from:expr,  $to:expr) => {
+            #[test]
+            fn $test_id() {
+                unsafe {
+                    let from: v128 = transmute($from);
+                    let to: v128 = transmute($to);
+
+                    let r: v128 = $conv_id(from);
+
+                    compare_bytes(r, to);
+                }
+            }
+        };
+    }
+
+    test_conv!(
+        f32x4_convert_s_i32x4 | f32x4_convert_i32x4_s | f32x4 | [1_i32, 2, 3, 4],
+        [1_f32, 2., 3., 4.]
+    );
+    test_conv!(
+        f32x4_convert_u_i32x4 | f32x4_convert_i32x4_u | f32x4 | [u32::MAX, 2, 3, 4],
+        [u32::MAX as f32, 2., 3., 4.]
+    );
+
+    // FIXME: this fails, and produces 0 instead of saturating at i32::MAX
     // test_conv!(
-    //     f64x2_convert_u_i64x2
-    //         | convert_u_i64x2
-    //         | f64x2
-    //         | [u64::MAX, 2],
-    //     [18446744073709552000.0, 2.]
+    //     i32x4_trunc_s_f32x4_sat
+    //         | i32x4_trunc_sat_f32x4_s
+    //         | i32x4
+    //         | [f32::NAN, 2., (i32::MAX as f32 + 1.), 4.],
+    //     [0, 2, i32::MAX, 4]
     // );
-    //
-    // // FIXME: this fails, and produces -2147483648 instead of saturating at
-    // // i32::MAX test_conv!(i32x4_trunc_s_f32x4_sat | trunc_s_f32x4_sat
-    // // | i32x4 | [1_f32, 2., (i32::MAX as f32 + 1.), 4.],
-    // // [1_i32, 2, i32::MAX, 4]); FIXME: add other saturating tests
+    // FIXME: add other saturating tests
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 77d1f7c5063..c44f44c1b34 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -56,6 +56,7 @@ cfg_if! {
         mod arch;
     } else {
         // Unimplemented architecture:
+        #[allow(dead_code)]
         mod arch {
             #[doc(hidden)]
             pub(crate) enum Feature {
@@ -117,6 +118,7 @@ cfg_if! {
 
 /// Performs run-time feature detection.
 #[inline]
+#[allow(dead_code)]
 fn check_for(x: Feature) -> bool {
     cache::test(x as u32, self::os::detect_features)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/other.rs b/library/stdarch/crates/std_detect/src/detect/os/other.rs
index bf7be87f073..091fafc4ebf 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/other.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/other.rs
@@ -2,6 +2,7 @@
 
 use crate::detect::cache;
 
+#[allow(dead_code)]
 pub(crate) fn detect_features() -> cache::Initializer {
     cache::Initializer::default()
 }
diff --git a/library/stdarch/crates/stdarch-test/Cargo.toml b/library/stdarch/crates/stdarch-test/Cargo.toml
index 2b445f8dc5b..9eb6b64d168 100644
--- a/library/stdarch/crates/stdarch-test/Cargo.toml
+++ b/library/stdarch/crates/stdarch-test/Cargo.toml
@@ -11,10 +11,13 @@ lazy_static = "1.0"
 rustc-demangle = "0.1.8"
 cfg-if = "0.1"
 
-[target.wasm32-unknown-unknown.dependencies]
-wasm-bindgen = "0.2.47"
-js-sys = "0.3"
-console_error_panic_hook = "0.1"
+# We use a crates.io dependency to disassemble wasm binaries to look for
+# instructions for `#[assert_instr]`. Note that we use an `=` dependency here
+# instead of a floating dependency because the text format for wasm changes over
+# time, and we want to make updates to this explicit rather than automatically
+# picking up updates which might break CI with new instruction names.
+[target.'cfg(target_arch = "wasm32")'.dependencies]
+wasmprinter = "=0.2.6"
 
 [features]
 default = []
diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs
index 38025b1701b..c66b6a8d9d6 100644
--- a/library/stdarch/crates/stdarch-test/src/lib.rs
+++ b/library/stdarch/crates/stdarch-test/src/lib.rs
@@ -19,19 +19,8 @@ pub use assert_instr_macro::*;
 pub use simd_test_macro::*;
 use std::{cmp, collections::HashSet, env, hash, str, sync::atomic::AtomicPtr};
 
-// `println!` doesn't work on wasm32 right now, so shadow the compiler's `println!`
-// macro with our own shim that redirects to `console.log`.
-#[allow(unused)]
-#[cfg(target_arch = "wasm32")]
-#[macro_export]
-macro_rules! println {
-    ($($args:tt)*) => (crate::wasm::js_console_log(&format!($($args)*)))
-}
-
 cfg_if! {
     if #[cfg(target_arch = "wasm32")] {
-        extern crate wasm_bindgen;
-        extern crate console_error_panic_hook;
         pub mod wasm;
         use wasm::disassemble_myself;
     } else {
diff --git a/library/stdarch/crates/stdarch-test/src/wasm.rs b/library/stdarch/crates/stdarch-test/src/wasm.rs
index 612ff10d90b..bf411c12148 100644
--- a/library/stdarch/crates/stdarch-test/src/wasm.rs
+++ b/library/stdarch/crates/stdarch-test/src/wasm.rs
@@ -1,49 +1,17 @@
 //! Disassembly calling function for `wasm32` targets.
-use wasm_bindgen::prelude::*;
 
 use crate::Function;
 use std::collections::HashSet;
 
-#[wasm_bindgen(module = "child_process")]
-extern "C" {
-    #[wasm_bindgen(js_name = execFileSync)]
-    fn exec_file_sync(cmd: &str, args: &js_sys::Array, opts: &js_sys::Object) -> Buffer;
-}
-
-#[wasm_bindgen(module = "buffer")]
-extern "C" {
-    type Buffer;
-    #[wasm_bindgen(method, js_name = toString)]
-    fn to_string(this: &Buffer) -> String;
-}
-
-#[wasm_bindgen]
-extern "C" {
-    #[wasm_bindgen(js_namespace = require)]
-    fn resolve(module: &str) -> String;
-    #[wasm_bindgen(js_namespace = console, js_name = log)]
-    pub fn js_console_log(s: &str);
-}
-
 pub(crate) fn disassemble_myself() -> HashSet<Function> {
-    use std::path::Path;
-    ::console_error_panic_hook::set_once();
-    // Our wasm module in the wasm-bindgen test harness is called
-    // "wasm-bindgen-test_bg". When running in node this is actually a shim JS
-    // file. Ask node where that JS file is, and then we use that with a wasm
-    // extension to find the wasm file itself.
-    let js_shim = resolve("wasm-bindgen-test");
-    let js_shim = Path::new(&js_shim).with_file_name("wasm-bindgen-test_bg.wasm");
-
-    // Execute `wasm2wat` synchronously, waiting for and capturing all of its
-    // output. Note that we pass in a custom `maxBuffer` parameter because we're
-    // generating a ton of output that needs to be buffered.
-    let args = js_sys::Array::new();
-    args.push(&js_shim.display().to_string().into());
-    args.push(&"--enable-simd".into());
-    let opts = js_sys::Object::new();
-    js_sys::Reflect::set(&opts, &"maxBuffer".into(), &(200 * 1024 * 1024).into()).unwrap();
-    let output = exec_file_sync("wasm2wat", &args, &opts).to_string();
+    // Use `std::env::args` to find the path to our executable. Assume the
+    // environment is configured such that we can read that file. Read it and
+    // use the `wasmprinter` crate to transform the binary to text, then search
+    // the text for appropriately named functions.
+    let me = std::env::args()
+        .next()
+        .expect("failed to find current wasm file");
+    let output = wasmprinter::print_file(&me).unwrap();
 
     let mut ret: HashSet<Function> = HashSet::new();
     let mut lines = output.lines().map(|s| s.trim());
diff --git a/library/stdarch/examples/Cargo.toml b/library/stdarch/examples/Cargo.toml
index 6f00d46230b..72599b4182c 100644
--- a/library/stdarch/examples/Cargo.toml
+++ b/library/stdarch/examples/Cargo.toml
@@ -7,16 +7,14 @@ authors = [
     "Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>",
 ]
 description = "Examples of the stdarch crate."
+edition = "2018"
 
 [dependencies]
 core_arch = { path = "../crates/core_arch" }
 std_detect = { path = "../crates/std_detect" }
-quickcheck = "0.8"
+quickcheck = "0.9"
 rand = "0.7"
 
-[target.'cfg(target_arch = "wasm32")'.dependencies]
-rand = { version = "0.6", features = ["wasm-bindgen"] }
-
 [[bin]]
 name = "hex"
 path = "hex.rs"
diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs
index b3d6fb0786d..43826989b50 100644
--- a/library/stdarch/examples/hex.rs
+++ b/library/stdarch/examples/hex.rs
@@ -12,8 +12,9 @@
 //!
 //! and you should see `746573740a` get printed out.
 
-#![feature(stdsimd)]
+#![feature(stdsimd, wasm_target_feature)]
 #![cfg_attr(test, feature(test))]
+#![cfg_attr(target_arch = "wasm32", feature(wasm_simd))]
 #![allow(
     clippy::result_unwrap_used,
     clippy::print_stdout,
@@ -25,25 +26,15 @@
     clippy::missing_docs_in_private_items
 )]
 
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[macro_use(is_x86_feature_detected)]
-extern crate std_detect;
-
-extern crate core_arch;
-
-#[cfg(test)]
-#[macro_use]
-extern crate quickcheck;
-
 use std::{
     io::{self, Read},
     str,
 };
 
 #[cfg(target_arch = "x86")]
-use core_arch::x86::*;
+use {core_arch::arch::x86::*, std_detect::is_x86_feature_detected};
 #[cfg(target_arch = "x86_64")]
-use core_arch::x86_64::*;
+use {core_arch::arch::x86_64::*, std_detect::is_x86_feature_detected};
 
 fn main() {
     let mut input = Vec::new();
@@ -68,6 +59,12 @@ fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
             return unsafe { hex_encode_sse41(src, dst) };
         }
     }
+    #[cfg(target_arch = "wasm32")]
+    {
+        if true {
+            return unsafe { hex_encode_simd128(src, dst) };
+        }
+    }
 
     hex_encode_fallback(src, dst)
 }
@@ -157,6 +154,54 @@ unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a
     Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
 }
 
+#[cfg(target_arch = "wasm32")]
+#[target_feature(enable = "simd128")]
+unsafe fn hex_encode_simd128<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
+    use core_arch::arch::wasm32::*;
+
+    let ascii_zero = i8x16_splat(b'0' as i8);
+    let nines = i8x16_splat(9);
+    let ascii_a = i8x16_splat((b'a' - 9 - 1) as i8);
+    let and4bits = i8x16_splat(0xf);
+
+    let mut i = 0_isize;
+    while src.len() >= 16 {
+        let invec = v128_load(src.as_ptr() as *const _);
+
+        let masked1 = v128_and(invec, and4bits);
+        let masked2 = v128_and(i8x16_shr_u(invec, 4), and4bits);
+
+        // return 0xff corresponding to the elements > 9, or 0x00 otherwise
+        let cmpmask1 = i8x16_gt_u(masked1, nines);
+        let cmpmask2 = i8x16_gt_u(masked2, nines);
+
+        // add '0' or the offset depending on the masks
+        let masked1 = i8x16_add(masked1, v128_bitselect(ascii_a, ascii_zero, cmpmask1));
+        let masked2 = i8x16_add(masked2, v128_bitselect(ascii_a, ascii_zero, cmpmask2));
+
+        // Next we need to shuffle around masked{1,2} to get back to the
+        // original source text order. The first element (res1) we'll store uses
+        // all the low bytes from the 2 masks and the second element (res2) uses
+        // all the upper bytes.
+        let res1 = v8x16_shuffle::<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>(
+            masked2, masked1,
+        );
+        let res2 = v8x16_shuffle::<8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31>(
+            masked2, masked1,
+        );
+
+        v128_store(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
+        v128_store(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
+        src = &src[16..];
+        i += 16;
+    }
+
+    let i = i as usize;
+    let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
+
+    Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
+}
+
 fn hex_encode_fallback<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
     fn hex(byte: u8) -> u8 {
         static TABLE: &[u8] = b"0123456789abcdef";
@@ -186,10 +231,10 @@ mod tests {
 
         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
         unsafe {
-            if is_x86_feature_detected!("avx2") {
+            if self::is_x86_feature_detected!("avx2") {
                 assert_eq!(hex_encode_avx2(input, &mut tmp()).unwrap(), output);
             }
-            if is_x86_feature_detected!("sse4.1") {
+            if self::is_x86_feature_detected!("sse4.1") {
                 assert_eq!(hex_encode_sse41(input, &mut tmp()).unwrap(), output);
             }
         }
@@ -236,7 +281,7 @@ mod tests {
         );
     }
 
-    quickcheck! {
+    quickcheck::quickcheck! {
         fn encode_equals_fallback(input: Vec<u8>) -> bool {
             let mut space1 = vec![0; input.len() * 2];
             let mut space2 = vec![0; input.len() * 2];
@@ -247,7 +292,7 @@ mod tests {
 
         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
         fn avx_equals_fallback(input: Vec<u8>) -> bool {
-            if !is_x86_feature_detected!("avx2") {
+            if !self::is_x86_feature_detected!("avx2") {
                 return true
             }
             let mut space1 = vec![0; input.len() * 2];
@@ -259,7 +304,7 @@ mod tests {
 
         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
         fn sse41_equals_fallback(input: Vec<u8>) -> bool {
-            if !is_x86_feature_detected!("avx2") {
+            if !self::is_x86_feature_detected!("avx2") {
                 return true
             }
             let mut space1 = vec![0; input.len() * 2];
@@ -328,28 +373,28 @@ mod benches {
 
         #[bench]
         fn small_avx2(b: &mut test::Bencher) {
-            if is_x86_feature_detected!("avx2") {
+            if self::is_x86_feature_detected!("avx2") {
                 doit(b, SMALL_LEN, hex_encode_avx2);
             }
         }
 
         #[bench]
         fn small_sse41(b: &mut test::Bencher) {
-            if is_x86_feature_detected!("sse4.1") {
+            if self::is_x86_feature_detected!("sse4.1") {
                 doit(b, SMALL_LEN, hex_encode_sse41);
             }
         }
 
         #[bench]
         fn large_avx2(b: &mut test::Bencher) {
-            if is_x86_feature_detected!("avx2") {
+            if self::is_x86_feature_detected!("avx2") {
                 doit(b, LARGE_LEN, hex_encode_avx2);
             }
         }
 
         #[bench]
         fn large_sse41(b: &mut test::Bencher) {
-            if is_x86_feature_detected!("sse4.1") {
+            if self::is_x86_feature_detected!("sse4.1") {
                 doit(b, LARGE_LEN, hex_encode_sse41);
             }
         }
diff --git a/library/stdarch/examples/wasm.rs b/library/stdarch/examples/wasm.rs
index 53f9c55d4e4..6b92ae9b87d 100644
--- a/library/stdarch/examples/wasm.rs
+++ b/library/stdarch/examples/wasm.rs
@@ -3,11 +3,9 @@
 #![feature(stdsimd)]
 #![cfg(target_arch = "wasm32")]
 
-extern crate core_arch;
-
 use std::ptr;
 
-use core_arch::wasm32::*;
+use core_arch::arch::wasm32::*;
 
 static mut HEAD: *mut *mut u8 = 0 as _;
 
-- 
cgit 1.4.1-3-g733a5


From 6a0969d12f75930641abbdc15d53bb4304471b5b Mon Sep 17 00:00:00 2001
From: Thom Chiovoloni <chiovolonit@gmail.com>
Date: Wed, 16 Sep 2020 18:43:25 -0700
Subject: Optimize std_detect's caching (#908)

---
 .../stdarch/crates/std_detect/src/detect/cache.rs  | 79 +++++++++++++---------
 .../stdarch/crates/std_detect/src/detect/mod.rs    |  2 +-
 2 files changed, 47 insertions(+), 34 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 6bcbace4bbb..e79c96dafaf 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -43,7 +43,6 @@ impl Default for Initializer {
 // the one fitting our cache.
 impl Initializer {
     /// Tests the `bit` of the cache.
-    #[allow(dead_code)]
     #[inline]
     pub(crate) fn test(self, bit: u32) -> bool {
         debug_assert!(
@@ -80,60 +79,65 @@ impl Initializer {
 // Note: on x64, we only use the first slot
 static CACHE: [Cache; 2] = [Cache::uninitialized(), Cache::uninitialized()];
 
-/// Feature cache with capacity for `usize::MAX - 1` features.
+/// Feature cache with capacity for `size_of::<usize::MAX>() * 8 - 1` features.
 ///
-/// Note: the last feature bit is used to represent an
-/// uninitialized cache.
+/// Note: 0 is used to represent an uninitialized cache, and (at least) the most
+/// significant bit is set on any cache which has been initialized.
 ///
-/// Note: we can use `Relaxed` atomic operations, because we are only interested
-/// in the effects of operations on a single memory location. That is, we only
-/// need "modification order", and not the full-blown "happens before". However,
-/// we use `SeqCst` just to be on the safe side.
+/// Note: we use `Relaxed` atomic operations, because we are only interested in
+/// the effects of operations on a single memory location. That is, we only need
+/// "modification order", and not the full-blown "happens before".
 struct Cache(AtomicUsize);
 
 impl Cache {
     const CAPACITY: u32 = (core::mem::size_of::<usize>() * 8 - 1) as u32;
     const MASK: usize = (1 << Cache::CAPACITY) - 1;
+    const INITIALIZED_BIT: usize = 1usize << Cache::CAPACITY;
 
     /// Creates an uninitialized cache.
     #[allow(clippy::declare_interior_mutable_const)]
     const fn uninitialized() -> Self {
-        Cache(AtomicUsize::new(usize::MAX))
-    }
-    /// Is the cache uninitialized?
-    #[inline]
-    pub(crate) fn is_uninitialized(&self) -> bool {
-        self.0.load(Ordering::SeqCst) == usize::MAX
+        Cache(AtomicUsize::new(0))
     }
 
-    /// Is the `bit` in the cache set?
+    /// Is the `bit` in the cache set? Returns `None` if the cache has not been initialized.
     #[inline]
-    pub(crate) fn test(&self, bit: u32) -> bool {
-        test_bit(self.0.load(Ordering::SeqCst) as u64, bit)
+    pub(crate) fn test(&self, bit: u32) -> Option<bool> {
+        let cached = self.0.load(Ordering::Relaxed);
+        if cached == 0 {
+            None
+        } else {
+            Some(test_bit(cached as u64, bit))
+        }
     }
 
     /// Initializes the cache.
     #[inline]
-    fn initialize(&self, value: usize) {
-        self.0.store(value, Ordering::SeqCst);
+    fn initialize(&self, value: usize) -> usize {
+        debug_assert_eq!((value & !Cache::MASK), 0);
+        self.0
+            .store(value | Cache::INITIALIZED_BIT, Ordering::Relaxed);
+        value
     }
 }
 
 cfg_if::cfg_if! {
     if #[cfg(feature = "std_detect_env_override")] {
-        #[inline(never)]
-        fn initialize(mut value: Initializer) {
+        #[inline]
+        fn initialize(mut value: Initializer) -> Initializer {
             if let Ok(disable) = crate::env::var("RUST_STD_DETECT_UNSTABLE") {
                 for v in disable.split(" ") {
                     let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32));
                 }
             }
             do_initialize(value);
+            value
         }
     } else {
         #[inline]
-        fn initialize(value: Initializer) {
+        fn initialize(value: Initializer) -> Initializer {
             do_initialize(value);
+            value
         }
     }
 }
@@ -144,8 +148,22 @@ fn do_initialize(value: Initializer) {
     CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK);
 }
 
+// We only have to detect features once, and it's fairly costly, so hint to LLVM
+// that it should assume that cache hits are more common than misses (which is
+// the point of caching). It's possibly unfortunate that this function needs to
+// reach across modules like this to call `os::detect_features`, but it produces
+// the best code out of several attempted variants.
+//
+// The `Initializer` that the cache was initialized with is returned, so that
+// the caller can call `test()` on it without having to load the value from the
+// cache again.
+#[cold]
+fn detect_and_initialize() -> Initializer {
+    initialize(super::os::detect_features())
+}
+
 /// Tests the `bit` of the storage. If the storage has not been initialized,
-/// initializes it with the result of `f()`.
+/// initializes it with the result of `os::detect_features()`.
 ///
 /// On its first invocation, it detects the CPU features and caches them in the
 /// `CACHE` global variable as an `AtomicU64`.
@@ -157,18 +175,13 @@ fn do_initialize(value: Initializer) {
 /// variable `RUST_STD_DETECT_UNSTABLE` and uses its its content to disable
 /// Features that would had been otherwise detected.
 #[inline]
-pub(crate) fn test<F>(bit: u32, f: F) -> bool
-where
-    F: FnOnce() -> Initializer,
-{
-    let (bit, idx) = if bit < Cache::CAPACITY {
+pub(crate) fn test(bit: u32) -> bool {
+    let (relative_bit, idx) = if bit < Cache::CAPACITY {
         (bit, 0)
     } else {
         (bit - Cache::CAPACITY, 1)
     };
-
-    if CACHE[idx].is_uninitialized() {
-        initialize(f())
-    }
-    CACHE[idx].test(bit)
+    CACHE[idx]
+        .test(relative_bit)
+        .unwrap_or_else(|| detect_and_initialize().test(bit))
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index c44f44c1b34..7aedef47d66 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -120,7 +120,7 @@ cfg_if! {
 #[inline]
 #[allow(dead_code)]
 fn check_for(x: Feature) -> bool {
-    cache::test(x as u32, self::os::detect_features)
+    cache::test(x as u32)
 }
 
 /// Returns an `Iterator<Item=(&'static str, bool)>` where
-- 
cgit 1.4.1-3-g733a5


From 390ce8dca55c82828a860588842ce0c8b4353a19 Mon Sep 17 00:00:00 2001
From: Carlos Pérez <37264926+CPerezz@users.noreply.github.com>
Date: Tue, 19 Jan 2021 00:56:58 +0100
Subject: Fix false rustdoc broken_intra_link detection (#984)

I saw in https://github.com/rust-lang/rust/issues/81037
that when you document private items with rustdoc the
`x86 AVX-512 BITALG` feature comment does not contain
scape characters in the message and therefore rustdoc
was emiting warnings.

This fixes it.
---
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 08a223fa02f..e21ee6c3cb2 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -158,7 +158,7 @@ features! {
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vnni: "avx512vnni";
     /// AVX-512 VNNI (Vector Neural Network Instructions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bitalg: "avx512bitalg";
-    /// AVX-512 BITALG (Support for VPOPCNT[B,W] and VPSHUFBITQMB)
+    /// AVX-512 BITALG (Support for VPOPCNT\[B,W\] and VPSHUFBITQMB)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bf16: "avx512bf16";
     /// AVX-512 BF16 (BFLOAT16 instructions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vp2intersect: "avx512vp2intersect";
-- 
cgit 1.4.1-3-g733a5


From f32f7cb89967f211af8df3c8838755881d9f6408 Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Sun, 14 Feb 2021 22:14:37 +0000
Subject: Turn std_detect into a no_std crate (#1005)

---
 .../stdarch/crates/std_detect/src/detect/cache.rs  |  17 ++--
 .../stdarch/crates/std_detect/src/detect/mod.rs    |   6 +-
 .../std_detect/src/detect/os/freebsd/auxvec.rs     |   2 +-
 .../crates/std_detect/src/detect/os/freebsd/mod.rs |  12 +--
 .../std_detect/src/detect/os/linux/aarch64.rs      |  10 ++-
 .../crates/std_detect/src/detect/os/linux/arm.rs   |   8 +-
 .../std_detect/src/detect/os/linux/auxvec.rs       | 100 +++++++++++++++------
 .../std_detect/src/detect/os/linux/cpuinfo.rs      |  15 ++--
 .../crates/std_detect/src/detect/os/linux/mod.rs   |  49 ++++++++--
 .../std_detect/src/detect/os/linux/powerpc.rs      |   5 +-
 .../stdarch/crates/std_detect/src/detect/os/x86.rs |   6 +-
 library/stdarch/crates/std_detect/src/lib.rs       |  25 ++----
 library/stdarch/crates/std_detect/src/mod.rs       |   5 --
 13 files changed, 165 insertions(+), 95 deletions(-)
 delete mode 100644 library/stdarch/crates/std_detect/src/mod.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index e79c96dafaf..d01a5ea244d 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -3,9 +3,9 @@
 
 #![allow(dead_code)] // not used on all platforms
 
-use crate::sync::atomic::Ordering;
+use core::sync::atomic::Ordering;
 
-use crate::sync::atomic::AtomicUsize;
+use core::sync::atomic::AtomicUsize;
 
 /// Sets the `bit` of `x`.
 #[inline]
@@ -125,9 +125,16 @@ cfg_if::cfg_if! {
     if #[cfg(feature = "std_detect_env_override")] {
         #[inline]
         fn initialize(mut value: Initializer) -> Initializer {
-            if let Ok(disable) = crate::env::var("RUST_STD_DETECT_UNSTABLE") {
-                for v in disable.split(" ") {
-                    let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32));
+            let env = unsafe {
+                libc::getenv(b"RUST_STD_DETECT_UNSTABLE\0".as_ptr() as *const libc::c_char)
+            };
+            if !env.is_null() {
+                let len = unsafe { libc::strlen(env) };
+                let env = unsafe { core::slice::from_raw_parts(env as *const u8, len) };
+                if let Ok(disable) = core::str::from_utf8(env) {
+                    for v in disable.split(" ") {
+                        let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32));
+                    }
                 }
             }
             do_initialize(value);
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 7aedef47d66..1b7768ae8f0 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -98,10 +98,10 @@ cfg_if! {
         // On x86/x86_64 no OS specific functionality is required.
         #[path = "os/x86.rs"]
         mod os;
-    } else if #[cfg(all(target_os = "linux", feature = "use_std"))] {
+    } else if #[cfg(all(target_os = "linux", feature = "libc"))] {
         #[path = "os/linux/mod.rs"]
         mod os;
-    } else if #[cfg(all(target_os = "freebsd", feature = "use_std"))] {
+    } else if #[cfg(all(target_os = "freebsd", feature = "libc"))] {
         #[cfg(target_arch = "aarch64")]
         #[path = "os/aarch64.rs"]
         mod aarch64;
@@ -140,7 +140,7 @@ pub fn features() -> impl Iterator<Item = (&'static str, bool)> {
             target_arch = "mips64",
         ))] {
             (0_u8..Feature::_last as u8).map(|discriminant: u8| {
-                let f: Feature = unsafe { crate::mem::transmute(discriminant) };
+                let f: Feature = unsafe { core::mem::transmute(discriminant) };
                 let name: &'static str = f.to_str();
                 let enabled: bool = check_for(f);
                 (name, enabled)
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
index c595ec459b5..832ce2252ed 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
@@ -42,7 +42,7 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
 
 /// Tries to read the `key` from the auxiliary vector.
 fn archauxv(key: usize) -> Result<usize, ()> {
-    use crate::mem;
+    use core::mem;
 
     #[derive(Copy, Clone)]
     #[repr(C)]
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
index 4321bce74dd..ade7fb6269d 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs
@@ -5,18 +5,18 @@ mod auxvec;
 cfg_if::cfg_if! {
     if #[cfg(target_arch = "aarch64")] {
         mod aarch64;
-        pub use self::aarch64::check_for;
+        pub(crate) use self::aarch64::detect_features;
     } else if #[cfg(target_arch = "arm")] {
         mod arm;
-        pub use self::arm::check_for;
+        pub(crate) use self::arm::detect_features;
     } else if #[cfg(target_arch = "powerpc64")] {
         mod powerpc;
-        pub use self::powerpc::check_for;
+        pub(crate) use self::powerpc::detect_features;
     } else {
-        use crate::arch::detect::Feature;
+        use crate::detect::cache;
         /// Performs run-time feature detection.
-        pub fn check_for(_x: Feature) -> bool {
-            false
+        pub(crate) fn detect_features() -> cache::Initializer {
+            cache::Initializer::default()
         }
     }
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index b1b68f763e5..80c36e9b997 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -1,6 +1,6 @@
 //! Run-time feature detection for Aarch64 on Linux.
 
-use super::{auxvec, cpuinfo};
+use super::auxvec;
 use crate::detect::{bit, cache, Feature};
 
 /// Try to read the features from the auxiliary vector, and if that fails, try
@@ -10,7 +10,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
         let hwcap: AtHwcap = auxv.into();
         return hwcap.cache();
     }
-    if let Ok(c) = cpuinfo::CpuInfo::new() {
+    #[cfg(feature = "std_detect_file_io")]
+    if let Ok(c) = super::cpuinfo::CpuInfo::new() {
         let hwcap: AtHwcap = c.into();
         return hwcap.cache();
     }
@@ -77,9 +78,10 @@ impl From<auxvec::AuxVec> for AtHwcap {
     }
 }
 
-impl From<cpuinfo::CpuInfo> for AtHwcap {
+#[cfg(feature = "std_detect_file_io")]
+impl From<super::cpuinfo::CpuInfo> for AtHwcap {
     /// Reads AtHwcap from /proc/cpuinfo .
-    fn from(c: cpuinfo::CpuInfo) -> Self {
+    fn from(c: super::cpuinfo::CpuInfo) -> Self {
         let f = &c.field("Features");
         AtHwcap {
             // 64-bit names. FIXME: In 32-bit compatibility mode /proc/cpuinfo will
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
index 4b0cb586bba..66cfd05e806 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
@@ -1,6 +1,6 @@
 //! Run-time feature detection for ARM on Linux.
 
-use super::{auxvec, cpuinfo};
+use super::auxvec;
 use crate::detect::{bit, cache, Feature};
 
 /// Try to read the features from the auxiliary vector, and if that fails, try
@@ -31,7 +31,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
         return value;
     }
 
-    if let Ok(c) = cpuinfo::CpuInfo::new() {
+    #[cfg(feature = "std_detect_file_io")]
+    if let Ok(c) = super::cpuinfo::CpuInfo::new() {
         enable_feature(
             &mut value,
             Feature::neon,
@@ -55,7 +56,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
 /// Is the CPU known to have a broken NEON unit?
 ///
 /// See https://crbug.com/341598.
-fn has_broken_neon(cpuinfo: &cpuinfo::CpuInfo) -> bool {
+#[cfg(feature = "std_detect_file_io")]
+fn has_broken_neon(cpuinfo: &super::cpuinfo::CpuInfo) -> bool {
     cpuinfo.field("CPU implementer") == "0x51"
         && cpuinfo.field("CPU architecture") == "7"
         && cpuinfo.field("CPU variant") == "0x1"
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 6ebae67fbf8..d556b23b1de 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -1,13 +1,16 @@
 //! Parses ELF auxiliary vectors.
 #![cfg_attr(not(target_arch = "aarch64"), allow(dead_code))]
 
-#[cfg(feature = "std_detect_file_io")]
-use crate::{fs::File, io::Read};
+pub(crate) const AT_NULL: usize = 0;
 
 /// Key to access the CPU Hardware capabilities bitfield.
 pub(crate) const AT_HWCAP: usize = 16;
 /// Key to access the CPU Hardware capabilities 2 bitfield.
-#[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+#[cfg(any(
+    target_arch = "arm",
+    target_arch = "powerpc",
+    target_arch = "powerpc64"
+))]
 pub(crate) const AT_HWCAP2: usize = 26;
 
 /// Cache HWCAP bitfields of the ELF Auxiliary Vector.
@@ -17,7 +20,11 @@ pub(crate) const AT_HWCAP2: usize = 26;
 #[derive(Debug, Copy, Clone)]
 pub(crate) struct AuxVec {
     pub hwcap: usize,
-    #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+    #[cfg(any(
+        target_arch = "arm",
+        target_arch = "powerpc",
+        target_arch = "powerpc64"
+    ))]
     pub hwcap2: usize,
 }
 
@@ -64,7 +71,11 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
             }
 
             // Targets with AT_HWCAP and AT_HWCAP2:
-            #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+            #[cfg(any(
+                target_arch = "arm",
+                target_arch = "powerpc",
+                target_arch = "powerpc64"
+            ))]
             {
                 if let Ok(hwcap2) = getauxval(AT_HWCAP2) {
                     if hwcap != 0 && hwcap2 != 0 {
@@ -74,21 +85,11 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
             }
             drop(hwcap);
         }
-        #[cfg(feature = "std_detect_file_io")]
-        {
-            // If calling getauxval fails, try to read the auxiliary vector from
-            // its file:
-            auxv_from_file("/proc/self/auxv")
-        }
-        #[cfg(not(feature = "std_detect_file_io"))]
-        {
-            Err(())
-        }
     }
 
     #[cfg(not(feature = "std_detect_dlsym_getauxval"))]
     {
-        let hwcap = unsafe { ffi_getauxval(AT_HWCAP) };
+        let hwcap = unsafe { libc::getauxval(AT_HWCAP) };
 
         // Targets with only AT_HWCAP:
         #[cfg(any(target_arch = "aarch64", target_arch = "mips", target_arch = "mips64"))]
@@ -99,14 +100,29 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
         }
 
         // Targets with AT_HWCAP and AT_HWCAP2:
-        #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+        #[cfg(any(
+            target_arch = "arm",
+            target_arch = "powerpc",
+            target_arch = "powerpc64"
+        ))]
         {
-            let hwcap2 = unsafe { ffi_getauxval(AT_HWCAP2) };
+            let hwcap2 = unsafe { libc::getauxval(AT_HWCAP2) };
             if hwcap != 0 && hwcap2 != 0 {
                 return Ok(AuxVec { hwcap, hwcap2 });
             }
         }
     }
+
+    #[cfg(feature = "std_detect_file_io")]
+    {
+        // If calling getauxval fails, try to read the auxiliary vector from
+        // its file:
+        auxv_from_file("/proc/self/auxv")
+    }
+    #[cfg(not(feature = "std_detect_file_io"))]
+    {
+        Err(())
+    }
 }
 
 /// Tries to read the `key` from the auxiliary vector by calling the
@@ -122,7 +138,7 @@ fn getauxval(key: usize) -> Result<usize, ()> {
             return Err(());
         }
 
-        let ffi_getauxval: F = mem::transmute(ptr);
+        let ffi_getauxval: F = core::mem::transmute(ptr);
         Ok(ffi_getauxval(key))
     }
 }
@@ -131,7 +147,7 @@ fn getauxval(key: usize) -> Result<usize, ()> {
 /// function returns `Err`.
 #[cfg(feature = "std_detect_file_io")]
 fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
-    let mut file = File::open(file).map_err(|_| ())?;
+    let file = super::read_file(file)?;
 
     // See <https://github.com/torvalds/linux/blob/v3.19/include/uapi/linux/auxvec.h>.
     //
@@ -139,10 +155,11 @@ fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
     // `AT_EXECFN = 31` to `AT_NULL = 0`. That is, a buffer of
     // 2*32 `usize` elements is enough to read the whole vector.
     let mut buf = [0_usize; 64];
-    {
-        let raw: &mut [u8; 64 * mem::size_of::<usize>()] = unsafe { mem::transmute(&mut buf) };
-        file.read(raw).map_err(|_| ())?;
+    let len = core::mem::size_of_val(&buf).max(file.len());
+    unsafe {
+        core::ptr::copy_nonoverlapping(file.as_ptr(), buf.as_mut_ptr() as *mut u8, len);
     }
+
     auxv_from_buf(&buf)
 }
 
@@ -155,18 +172,24 @@ fn auxv_from_buf(buf: &[usize; 64]) -> Result<AuxVec, ()> {
     {
         for el in buf.chunks(2) {
             match el[0] {
+                AT_NULL => break,
                 AT_HWCAP => return Ok(AuxVec { hwcap: el[1] }),
                 _ => (),
             }
         }
     }
     // Targets with AT_HWCAP and AT_HWCAP2:
-    #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+    #[cfg(any(
+        target_arch = "arm",
+        target_arch = "powerpc",
+        target_arch = "powerpc64"
+    ))]
     {
         let mut hwcap = None;
         let mut hwcap2 = None;
         for el in buf.chunks(2) {
             match el[0] {
+                AT_NULL => break,
                 AT_HWCAP => hwcap = Some(el[1]),
                 AT_HWCAP2 => hwcap2 = Some(el[1]),
                 _ => (),
@@ -214,7 +237,12 @@ mod tests {
 
     // FIXME: on mips/mips64 getauxval returns 0, and /proc/self/auxv
     // does not always contain the AT_HWCAP key under qemu.
-    #[cfg(not(any(target_arch = "mips", target_arch = "mips64", target_arch = "powerpc")))]
+    #[cfg(any(
+        target_arch = "aarch64",
+        target_arch = "arm",
+        target_arch = "powerpc",
+        target_arch = "powerpc64"
+    ))]
     #[test]
     fn auxv_crate() {
         let v = auxv();
@@ -224,7 +252,11 @@ mod tests {
         }
 
         // Targets with AT_HWCAP and AT_HWCAP2:
-        #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+        #[cfg(any(
+            target_arch = "arm",
+            target_arch = "powerpc",
+            target_arch = "powerpc64"
+        ))]
         {
             if let Some(hwcap2) = auxv_crate_getauxval(AT_HWCAP2) {
                 let rt_hwcap2 = v.expect("failed to find hwcap2 key").hwcap2;
@@ -243,7 +275,7 @@ mod tests {
     }
 
     #[cfg(feature = "std_detect_file_io")]
-    cfg_if! {
+    cfg_if::cfg_if! {
         if #[cfg(target_arch = "arm")] {
             #[test]
             fn linux_rpi3() {
@@ -264,6 +296,7 @@ mod tests {
                 // want to fall back to /proc/cpuinfo in this case, so
                 // reading should fail. assert_eq!(v.hwcap, 126614527);
                 // assert_eq!(v.hwcap2, 0);
+                let _ = v;
             }
         } else if #[cfg(target_arch = "aarch64")] {
             #[test]
@@ -286,7 +319,14 @@ mod tests {
         }
     }
 
+    #[cfg(any(
+        target_arch = "aarch64",
+        target_arch = "arm",
+        target_arch = "powerpc",
+        target_arch = "powerpc64"
+    ))]
     #[test]
+    #[cfg(feature = "std_detect_file_io")]
     fn auxv_crate_procfs() {
         let v = auxv();
         if let Some(hwcap) = auxv_crate_getprocfs(AT_HWCAP) {
@@ -294,7 +334,11 @@ mod tests {
         }
 
         // Targets with AT_HWCAP and AT_HWCAP2:
-        #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+        #[cfg(any(
+            target_arch = "arm",
+            target_arch = "powerpc",
+            target_arch = "powerpc64"
+        ))]
         {
             if let Some(hwcap2) = auxv_crate_getprocfs(AT_HWCAP2) {
                 assert_eq!(v.unwrap().hwcap2, hwcap2);
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
index f76c48a4b16..1f403df01fc 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
@@ -1,8 +1,7 @@
 //! Parses /proc/cpuinfo
 #![cfg_attr(not(target_arch = "arm"), allow(dead_code))]
 
-extern crate std;
-use self::std::{fs::File, io, io::Read, prelude::v1::*};
+use alloc::string::String;
 
 /// cpuinfo
 pub(crate) struct CpuInfo {
@@ -11,11 +10,11 @@ pub(crate) struct CpuInfo {
 
 impl CpuInfo {
     /// Reads /proc/cpuinfo into CpuInfo.
-    pub(crate) fn new() -> Result<Self, io::Error> {
-        let mut file = File::open("/proc/cpuinfo")?;
-        let mut cpui = Self { raw: String::new() };
-        file.read_to_string(&mut cpui.raw)?;
-        Ok(cpui)
+    pub(crate) fn new() -> Result<Self, ()> {
+        let raw = super::read_file("/proc/cpuinfo")?;
+        Ok(Self {
+            raw: String::from_utf8(raw).map_err(|_| ())?,
+        })
     }
     /// Returns the value of the cpuinfo `field`.
     pub(crate) fn field(&self, field: &str) -> CpuInfoField {
@@ -34,7 +33,7 @@ impl CpuInfo {
     }
 
     #[cfg(test)]
-    fn from_str(other: &str) -> Result<Self, ::std::io::Error> {
+    fn from_str(other: &str) -> Result<Self, ()> {
         Ok(Self {
             raw: String::from(other),
         })
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
index e02d5e6dcda..4b6776e982a 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
@@ -1,28 +1,61 @@
 //! Run-time feature detection on Linux
+//!
+#[cfg(feature = "std_detect_file_io")]
+use alloc::vec::Vec;
 
 mod auxvec;
 
 #[cfg(feature = "std_detect_file_io")]
 mod cpuinfo;
 
-cfg_if! {
+#[cfg(feature = "std_detect_file_io")]
+fn read_file(path: &str) -> Result<Vec<u8>, ()> {
+    let mut path = Vec::from(path.as_bytes());
+    path.push(0);
+
+    unsafe {
+        let file = libc::open(path.as_ptr() as *const libc::c_char, libc::O_RDONLY);
+        if file == -1 {
+            return Err(());
+        }
+
+        let mut data = Vec::new();
+        loop {
+            data.reserve(4096);
+            let spare = data.spare_capacity_mut();
+            match libc::read(file, spare.as_mut_ptr() as *mut _, spare.len()) {
+                -1 => {
+                    libc::close(file);
+                    return Err(());
+                }
+                0 => break,
+                n => data.set_len(data.len() + n as usize),
+            }
+        }
+
+        libc::close(file);
+        Ok(data)
+    }
+}
+
+cfg_if::cfg_if! {
     if #[cfg(target_arch = "aarch64")] {
         mod aarch64;
-        pub use self::aarch64::check_for;
+        pub(crate) use self::aarch64::detect_features;
     } else if #[cfg(target_arch = "arm")] {
         mod arm;
-        pub use self::arm::check_for;
+        pub(crate) use self::arm::detect_features;
     } else  if #[cfg(any(target_arch = "mips", target_arch = "mips64"))] {
         mod mips;
-        pub use self::mips::check_for;
+        pub(crate) use self::mips::detect_features;
     } else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] {
         mod powerpc;
-        pub use self::powerpc::check_for;
+        pub(crate) use self::powerpc::detect_features;
     } else {
-        use crate::detect::Feature;
+        use crate::detect::cache;
         /// Performs run-time feature detection.
-        pub fn check_for(_x: Feature) -> bool {
-            false
+        pub(crate) fn detect_features() -> cache::Initializer {
+            cache::Initializer::default()
         }
     }
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
index 97afe49fe50..c3308e81582 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
@@ -1,6 +1,6 @@
 //! Run-time feature detection for PowerPC on Linux.
 
-use super::{auxvec, cpuinfo};
+use super::auxvec;
 use crate::detect::{cache, Feature};
 
 /// Try to read the features from the auxiliary vector, and if that fails, try
@@ -27,7 +27,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
     // PowerPC's /proc/cpuinfo lacks a proper Feature field,
     // but `altivec` support is indicated in the `cpu` field.
-    if let Ok(c) = cpuinfo::CpuInfo::new() {
+    #[cfg(feature = "std_detect_file_io")]
+    if let Ok(c) = super::cpuinfo::CpuInfo::new() {
         enable_feature(&mut value, Feature::altivec, c.field("cpu").has("altivec"));
         return value;
     }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 436fb00f061..388af2e3043 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -1,11 +1,11 @@
 //! x86 run-time feature detection is OS independent.
 
 #[cfg(target_arch = "x86")]
-use crate::arch::x86::*;
+use core::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
-use crate::arch::x86_64::*;
+use core::arch::x86_64::*;
 
-use crate::mem;
+use core::mem;
 
 use crate::detect::{bit, cache, Feature};
 
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 8cd02c9616e..46cf8fb68d6 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -15,30 +15,17 @@
 #![feature(const_fn, staged_api, stdsimd, doc_cfg, allow_internal_unstable)]
 #![allow(clippy::shadow_reuse)]
 #![deny(clippy::missing_inline_in_public_items)]
-#![cfg_attr(target_os = "linux", feature(linkage))]
 #![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(llvm_asm))]
 #![cfg_attr(test, allow(unused_imports))]
+#![cfg_attr(feature = "std_detect_file_io", feature(vec_spare_capacity))]
 #![no_std]
 
-cfg_if::cfg_if! {
-    if #[cfg(any(feature = "std_detect_file_io", feature = "std_detect_env_override"))] {
-        #[cfg_attr(test, macro_use(println))]
-        extern crate std;
+#[cfg(feature = "std_detect_file_io")]
+extern crate alloc;
 
-        #[allow(unused_imports)]
-        use std::{arch, env, fs, io, mem, sync};
-    } else {
-        #[cfg(test)]
-        #[macro_use(println)]
-        extern crate std;
-
-        #[allow(unused_imports)]
-        use core::{arch, mem, sync};
-    }
-}
-
-#[cfg(feature = "std_detect_dlsym_getauxval")]
-extern crate libc;
+#[cfg(test)]
+#[macro_use]
+extern crate std;
 
 #[doc(hidden)]
 #[unstable(feature = "stdsimd", issue = "27731")]
diff --git a/library/stdarch/crates/std_detect/src/mod.rs b/library/stdarch/crates/std_detect/src/mod.rs
deleted file mode 100644
index b630e7ff383..00000000000
--- a/library/stdarch/crates/std_detect/src/mod.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-//! `std_detect`
-
-#[doc(hidden)] // unstable implementation detail
-#[unstable(feature = "stdsimd", issue = "27731")]
-pub mod detect;
-- 
cgit 1.4.1-3-g733a5


From 41bfaf89bf559f6f68bb3ee8b682602bc1ef3b0a Mon Sep 17 00:00:00 2001
From: Aaron Hill <aa1ronham@gmail.com>
Date: Sat, 13 Mar 2021 14:51:54 -0500
Subject: Remove trailing semicolon from macro expression (#1080)

Unblocks https://github.com/rust-lang/rust/pull/83089
---
 library/stdarch/crates/std_detect/src/detect/macros.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index b9dbf9184ec..2607e07bf37 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -18,7 +18,7 @@ macro_rules! features {
                 };
             )*
             $(
-                ($bind_feature) => { $macro_name!($feature_impl); };
+                ($bind_feature) => { $macro_name!($feature_impl) };
             )*
             $(
                 ($nort_feature) => {
-- 
cgit 1.4.1-3-g733a5


From a1e151e8389c983196ae62027174151861f980f7 Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Wed, 17 Mar 2021 01:44:35 +0000
Subject: Convert all uses of llvm_asm! to asm! (#1052)

---
 .../crates/core_arch/src/acle/barrier/cp15.rs      | 15 ++++-
 library/stdarch/crates/core_arch/src/acle/hints.rs | 38 ++++---------
 .../crates/core_arch/src/acle/registers/mod.rs     |  8 +--
 .../stdarch/crates/core_arch/src/arm/armclang.rs   | 65 ++++++++--------------
 library/stdarch/crates/core_arch/src/lib.rs        |  1 -
 library/stdarch/crates/core_arch/src/macros.rs     |  7 +++
 library/stdarch/crates/core_arch/src/x86/bt.rs     | 48 ++++++++++------
 library/stdarch/crates/core_arch/src/x86/cpuid.rs  | 43 +++++++-------
 library/stdarch/crates/core_arch/src/x86/eflags.rs |  8 +--
 library/stdarch/crates/core_arch/src/x86_64/bt.rs  | 48 ++++++++++------
 .../crates/std_detect/src/detect/os/aarch64.rs     | 18 +++++-
 library/stdarch/crates/std_detect/src/lib.rs       |  2 +-
 12 files changed, 161 insertions(+), 140 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/acle/barrier/cp15.rs b/library/stdarch/crates/core_arch/src/acle/barrier/cp15.rs
index a4d101ad827..25d4c3c8cb4 100644
--- a/library/stdarch/crates/core_arch/src/acle/barrier/cp15.rs
+++ b/library/stdarch/crates/core_arch/src/acle/barrier/cp15.rs
@@ -8,20 +8,29 @@ pub struct SY;
 impl super::super::sealed::Dmb for SY {
     #[inline(always)]
     unsafe fn __dmb(&self) {
-        llvm_asm!("mcr p15, 0, r0, c7, c10, 5" : : : "memory" : "volatile")
+        asm!(
+            "mcr p15, 0, r0, c7, c10, 5",
+            options(preserves_flags, nostack)
+        )
     }
 }
 
 impl super::super::sealed::Dsb for SY {
     #[inline(always)]
     unsafe fn __dsb(&self) {
-        llvm_asm!("mcr p15, 0, r0, c7, c10, 4" : : : "memory" : "volatile")
+        asm!(
+            "mcr p15, 0, r0, c7, c10, 4",
+            options(preserves_flags, nostack)
+        )
     }
 }
 
 impl super::super::sealed::Isb for SY {
     #[inline(always)]
     unsafe fn __isb(&self) {
-        llvm_asm!("mcr p15, 0, r0, c7, c5, 4" : : : "memory" : "volatile")
+        asm!(
+            "mcr p15, 0, r0, c7, c5, 4",
+            options(preserves_flags, nostack)
+        )
     }
 }
diff --git a/library/stdarch/crates/core_arch/src/acle/hints.rs b/library/stdarch/crates/core_arch/src/acle/hints.rs
index 892d4f517f5..280aa00cf87 100644
--- a/library/stdarch/crates/core_arch/src/acle/hints.rs
+++ b/library/stdarch/crates/core_arch/src/acle/hints.rs
@@ -80,33 +80,13 @@ pub unsafe fn __yield() {
 // and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2
 // "Thumb instruction set support"
 #[cfg(target_feature = "v7")]
+#[cfg(any(target_arch = "arm", doc))]
+#[doc(cfg(target_arch = "arm"))]
 #[inline(always)]
-#[rustc_args_required_const(0)]
-pub unsafe fn __dbg(imm4: u32) {
-    macro_rules! call {
-        ($imm4:expr) => {
-            llvm_asm!(concat!("DBG ", stringify!($imm4)) : : : : "volatile")
-        }
-    }
-
-    match imm4 & 0b1111 {
-        0 => call!(0),
-        1 => call!(1),
-        2 => call!(2),
-        3 => call!(3),
-        4 => call!(4),
-        5 => call!(5),
-        6 => call!(6),
-        7 => call!(7),
-        8 => call!(8),
-        9 => call!(9),
-        10 => call!(10),
-        11 => call!(11),
-        12 => call!(12),
-        13 => call!(13),
-        14 => call!(14),
-        _ => call!(15),
-    }
+#[rustc_legacy_const_generics(0)]
+pub unsafe fn __dbg<const IMM4: i32>() {
+    static_assert_imm4!(IMM4);
+    dbg(IMM4);
 }
 
 /// Generates an unspecified no-op instruction.
@@ -117,13 +97,17 @@ pub unsafe fn __dbg(imm4: u32) {
 /// will increase execution time.
 #[inline(always)]
 pub unsafe fn __nop() {
-    llvm_asm!("NOP" : : : : "volatile")
+    asm!("nop", options(nomem, nostack, preserves_flags));
 }
 
 extern "C" {
     #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.hint")]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")]
     fn hint(_: i32);
+
+    #[cfg(target_arch = "arm")]
+    #[link_name = "llvm.arm.dbg"]
+    fn dbg(_: i32);
 }
 
 // from LLVM 7.0.1's lib/Target/ARM/{ARMInstrThumb,ARMInstrInfo,ARMInstrThumb2}.td
diff --git a/library/stdarch/crates/core_arch/src/acle/registers/mod.rs b/library/stdarch/crates/core_arch/src/acle/registers/mod.rs
index 391a5f08241..71e94ee5cb5 100644
--- a/library/stdarch/crates/core_arch/src/acle/registers/mod.rs
+++ b/library/stdarch/crates/core_arch/src/acle/registers/mod.rs
@@ -4,7 +4,7 @@ macro_rules! rsr {
         impl super::super::sealed::Rsr for $R {
             unsafe fn __rsr(&self) -> u32 {
                 let r: u32;
-                llvm_asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile");
+                asm!(concat!("mrs {},", stringify!($R)), out(reg) r, options(nomem, nostack));
                 r
             }
         }
@@ -17,7 +17,7 @@ macro_rules! rsrp {
         impl super::super::sealed::Rsrp for $R {
             unsafe fn __rsrp(&self) -> *const u8 {
                 let r: *const u8;
-                llvm_asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile");
+                asm!(concat!("mrs {},", stringify!($R)), out(reg) r, options(nomem, nostack));
                 r
             }
         }
@@ -29,7 +29,7 @@ macro_rules! wsr {
     ($R:ident) => {
         impl super::super::sealed::Wsr for $R {
             unsafe fn __wsr(&self, value: u32) {
-                llvm_asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile");
+                asm!(concat!("msr ", stringify!($R), ", {}"), in(reg) value, options(nomem, nostack));
             }
         }
     };
@@ -40,7 +40,7 @@ macro_rules! wsrp {
     ($R:ident) => {
         impl super::super::sealed::Wsrp for $R {
             unsafe fn __wsrp(&self, value: *const u8) {
-                llvm_asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile");
+                asm!(concat!("msr ", stringify!($R), ", {}"), in(reg) value, options(nomem, nostack));
             }
         }
     };
diff --git a/library/stdarch/crates/core_arch/src/arm/armclang.rs b/library/stdarch/crates/core_arch/src/arm/armclang.rs
index 2604727984a..aa4bab49f18 100644
--- a/library/stdarch/crates/core_arch/src/arm/armclang.rs
+++ b/library/stdarch/crates/core_arch/src/arm/armclang.rs
@@ -11,58 +11,41 @@ use stdarch_test::assert_instr;
 
 /// Inserts a breakpoint instruction.
 ///
-/// `val` is a compile-time constant integer in range `[0, 255]`.
+/// `VAL` is a compile-time constant integer in range `[0, 65535]`.
 ///
-/// The breakpoint instruction inserted is:
-///
-/// * `BKPT` when compiling as T32,
-/// * `BRK` when compiling as A32 or A64.
+/// The breakpoint instruction inserted is `BRK` on A64.
+#[cfg(all(target_arch = "aarch64", not(doc)))]
+#[cfg_attr(test, assert_instr(brk, VAL = 0))]
+#[inline(always)]
+#[rustc_legacy_const_generics(0)]
+pub unsafe fn __breakpoint<const VAL: i32>() {
+    static_assert_imm16!(VAL);
+    asm!("brk {}", const VAL);
+}
+
+/// Inserts a breakpoint instruction.
 ///
-/// # Safety
+/// `VAL` is a compile-time constant integer in range `[0, 255]`.
 ///
-/// If `val` is out-of-range the behavior is **undefined**.
+/// The breakpoint instruction inserted is `BKPT` on A32/T32.
 ///
 /// # Note
 ///
 /// [ARM's documentation][arm_docs] defines that `__breakpoint` accepts the
-/// following values for `val`:
+/// following values for `VAL`:
 ///
-/// - `0...65535` when compiling as A32 or A64,
+/// - `0...65535` when compiling as A32,
 /// - `0...255` when compiling as T32.
 ///
-/// The current implementation only accepts values in range `[0, 255]` - if the
-/// value is out-of-range the behavior is **undefined**.
+/// The current implementation only accepts values in range `[0, 255]`.
 ///
 /// [arm_docs]: https://developer.arm.com/docs/100067/latest/compiler-specific-intrinsics/__breakpoint-intrinsic
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(bkpt, val = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(brk, val = 0))]
+#[cfg(any(target_arch = "arm", doc))]
+#[doc(cfg(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(bkpt, VAL = 0))]
 #[inline(always)]
-#[rustc_args_required_const(0)]
-pub unsafe fn __breakpoint(val: i32) {
-    // Ensure that this compiles correctly on non-arm architectures, so libstd
-    // doc builds work. The proper macro will shadow this definition below.
-    #[allow(unused_macros)]
-    macro_rules! call {
-        ($e:expr) => {
-            ()
-        };
-    }
-
-    #[cfg(target_arch = "arm")]
-    macro_rules! call {
-        ($imm8:expr) => {
-            llvm_asm!(concat!("BKPT ", stringify!($imm8)) : : : : "volatile")
-        }
-    }
-
-    #[cfg(target_arch = "aarch64")]
-    macro_rules! call {
-        ($imm8:expr) => {
-            llvm_asm!(concat!("BRK ", stringify!($imm8)) : : : : "volatile")
-        }
-    }
-
-    // We can't `panic!` inside this intrinsic, so we can't really validate the
-    // arguments here. If `val` is out-of-range this macro uses `val == 255`:
-    constify_imm8!(val, call);
+#[rustc_legacy_const_generics(0)]
+pub unsafe fn __breakpoint<const VAL: i32>() {
+    static_assert_imm8!(VAL);
+    asm!("bkpt #{}", const VAL);
 }
diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index 57fa44bd8b5..4dca7581489 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -12,7 +12,6 @@
     platform_intrinsics,
     repr_simd,
     simd_ffi,
-    llvm_asm,
     proc_macro_hygiene,
     stmt_expr_attributes,
     core_intrinsics,
diff --git a/library/stdarch/crates/core_arch/src/macros.rs b/library/stdarch/crates/core_arch/src/macros.rs
index 87e49fba4bc..d7735c76d91 100644
--- a/library/stdarch/crates/core_arch/src/macros.rs
+++ b/library/stdarch/crates/core_arch/src/macros.rs
@@ -58,6 +58,13 @@ macro_rules! static_assert_imm8 {
     };
 }
 
+#[allow(unused_macros)]
+macro_rules! static_assert_imm16 {
+    ($imm:ident) => {
+        let _ = $crate::core_arch::macros::ValidateConstImm::<$imm, 0, { (1 << 16) - 1 }>::VALID;
+    };
+}
+
 #[allow(unused)]
 macro_rules! static_assert {
     ($imm:ident : $ty:ty where $e:expr) => {
diff --git a/library/stdarch/crates/core_arch/src/x86/bt.rs b/library/stdarch/crates/core_arch/src/x86/bt.rs
index dc172f6f38b..4c588b424ca 100644
--- a/library/stdarch/crates/core_arch/src/x86/bt.rs
+++ b/library/stdarch/crates/core_arch/src/x86/bt.rs
@@ -7,10 +7,14 @@ use stdarch_test::assert_instr;
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittest(p: *const i32, b: i32) -> u8 {
     let r: u8;
-    llvm_asm!("btl $2, $1\n\tsetc ${0:b}"
-              : "=r"(r)
-              : "*m"(p), "r"(b)
-              : "cc", "memory");
+    asm!(
+        "bt [{p}], {b:e}",
+        "setc {r}",
+        p = in(reg) p,
+        b = in(reg) b,
+        r = out(reg_byte) r,
+        options(readonly, nostack, pure)
+    );
     r
 }
 
@@ -20,10 +24,14 @@ pub unsafe fn _bittest(p: *const i32, b: i32) -> u8 {
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittestandset(p: *mut i32, b: i32) -> u8 {
     let r: u8;
-    llvm_asm!("btsl $2, $1\n\tsetc ${0:b}"
-              : "=r"(r), "+*m"(p)
-              : "r"(b)
-              : "cc", "memory");
+    asm!(
+        "bts [{p}], {b:e}",
+        "setc {r}",
+        p = in(reg) p,
+        b = in(reg) b,
+        r = out(reg_byte) r,
+        options(nostack)
+    );
     r
 }
 
@@ -33,10 +41,14 @@ pub unsafe fn _bittestandset(p: *mut i32, b: i32) -> u8 {
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittestandreset(p: *mut i32, b: i32) -> u8 {
     let r: u8;
-    llvm_asm!("btrl $2, $1\n\tsetc ${0:b}"
-              : "=r"(r), "+*m"(p)
-              : "r"(b)
-              : "cc", "memory");
+    asm!(
+        "btr [{p}], {b:e}",
+        "setc {r}",
+        p = in(reg) p,
+        b = in(reg) b,
+        r = out(reg_byte) r,
+        options(nostack)
+    );
     r
 }
 
@@ -46,10 +58,14 @@ pub unsafe fn _bittestandreset(p: *mut i32, b: i32) -> u8 {
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittestandcomplement(p: *mut i32, b: i32) -> u8 {
     let r: u8;
-    llvm_asm!("btcl $2, $1\n\tsetc ${0:b}"
-              : "=r"(r), "+*m"(p)
-              : "r"(b)
-              : "cc", "memory");
+    asm!(
+        "btc [{p}], {b:e}",
+        "setc {r}",
+        p = in(reg) p,
+        b = in(reg) b,
+        r = out(reg_byte) r,
+        options(nostack)
+    );
     r
 }
 
diff --git a/library/stdarch/crates/core_arch/src/x86/cpuid.rs b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
index 42cc95b4d09..3574ee178ab 100644
--- a/library/stdarch/crates/core_arch/src/x86/cpuid.rs
+++ b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
@@ -125,30 +125,25 @@ pub fn has_cpuid() -> bool {
             // the 21st bit of the EFLAGS register is modifiable or not.
             // If it is, then `cpuid` is available.
             let result: u32;
-            let _temp: u32;
-            llvm_asm!(r#"
-                      # Read eflags into $0 and copy it into $1:
-                      pushfd
-                      pop     $0
-                      mov     $1, $0
-                      # Flip 21st bit of $0.
-                      xor     $0, 0x200000
-                      # Set eflags to the value of $0
-                      #
-                      # Bit 21st can only be modified if cpuid is available
-                      push    $0
-                      popfd          # A
-                      # Read eflags into $0:
-                      pushfd         # B
-                      pop     $0
-                      # xor with the original eflags sets the bits that
-                      # have been modified:
-                      xor     $0, $1
-                      "#
-                      : "=r"(result), "=r"(_temp)
-                      :
-                      : "cc", "memory"
-                      : "intel");
+            asm!(
+                // Read eflags and save a copy of it
+                "pushfd",
+                "pop {result}",
+                "mov {saved_flags}, {result}",
+                // Flip 21st bit of the flags
+                "xor {result}, 0x200000",
+                // Load the modified flags and read them back.
+                // Bit 21 can only be modified if cpuid is available.
+                "push {result}",
+                "popfd",
+                "pushfd",
+                "pop {result}",
+                // Use xor to find out whether bit 21 has changed
+                "xor {result}, {saved_flags}",
+                result = out(reg) result,
+                saved_flags = out(reg) _,
+                options(nomem),
+            );
             // There is a race between popfd (A) and pushfd (B)
             // where other bits beyond 21st may have been modified due to
             // interrupts, a debugger stepping through the asm, etc.
diff --git a/library/stdarch/crates/core_arch/src/x86/eflags.rs b/library/stdarch/crates/core_arch/src/x86/eflags.rs
index acb77bc3ece..80e950e4567 100644
--- a/library/stdarch/crates/core_arch/src/x86/eflags.rs
+++ b/library/stdarch/crates/core_arch/src/x86/eflags.rs
@@ -13,7 +13,7 @@
 #[doc(hidden)]
 pub unsafe fn __readeflags() -> u32 {
     let eflags: u32;
-    llvm_asm!("pushfd; popl $0" : "=r"(eflags) : : : "volatile");
+    asm!("pushfd", "pop {}", out(reg) eflags, options(nomem));
     eflags
 }
 
@@ -30,7 +30,7 @@ pub unsafe fn __readeflags() -> u32 {
 #[doc(hidden)]
 pub unsafe fn __readeflags() -> u64 {
     let eflags: u64;
-    llvm_asm!("pushfq; popq $0" : "=r"(eflags) : : : "volatile");
+    asm!("pushfq", "pop {}", out(reg) eflags, options(nomem));
     eflags
 }
 
@@ -46,7 +46,7 @@ pub unsafe fn __readeflags() -> u64 {
 )]
 #[doc(hidden)]
 pub unsafe fn __writeeflags(eflags: u32) {
-    llvm_asm!("pushl $0; popfd" : : "r"(eflags) : "cc", "flags" : "volatile");
+    asm!("push {}", "popfd", in(reg) eflags, options(nomem));
 }
 
 /// Write EFLAGS.
@@ -61,7 +61,7 @@ pub unsafe fn __writeeflags(eflags: u32) {
 )]
 #[doc(hidden)]
 pub unsafe fn __writeeflags(eflags: u64) {
-    llvm_asm!("pushq $0; popfq" : : "r"(eflags) : "cc", "flags" : "volatile");
+    asm!("push {}", "popfq", in(reg) eflags, options(nomem));
 }
 
 #[cfg(test)]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bt.rs b/library/stdarch/crates/core_arch/src/x86_64/bt.rs
index f0b16baab6f..0e4fcd7b2ab 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bt.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bt.rs
@@ -7,10 +7,14 @@ use stdarch_test::assert_instr;
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittest64(p: *const i64, b: i64) -> u8 {
     let r: u8;
-    llvm_asm!("btq $2, $1\n\tsetc ${0:b}"
-              : "=r"(r)
-              : "*m"(p), "r"(b)
-              : "cc", "memory");
+    asm!(
+        "bt [{p}], {b}",
+        "setc {r}",
+        p = in(reg) p,
+        b = in(reg) b,
+        r = out(reg_byte) r,
+        options(readonly, nostack, pure)
+    );
     r
 }
 
@@ -20,10 +24,14 @@ pub unsafe fn _bittest64(p: *const i64, b: i64) -> u8 {
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittestandset64(p: *mut i64, b: i64) -> u8 {
     let r: u8;
-    llvm_asm!("btsq $2, $1\n\tsetc ${0:b}"
-              : "=r"(r), "+*m"(p)
-              : "r"(b)
-              : "cc", "memory");
+    asm!(
+        "bts [{p}], {b}",
+        "setc {r}",
+        p = in(reg) p,
+        b = in(reg) b,
+        r = out(reg_byte) r,
+        options(nostack)
+    );
     r
 }
 
@@ -33,10 +41,14 @@ pub unsafe fn _bittestandset64(p: *mut i64, b: i64) -> u8 {
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittestandreset64(p: *mut i64, b: i64) -> u8 {
     let r: u8;
-    llvm_asm!("btrq $2, $1\n\tsetc ${0:b}"
-              : "=r"(r), "+*m"(p)
-              : "r"(b)
-              : "cc", "memory");
+    asm!(
+        "btr [{p}], {b}",
+        "setc {r}",
+        p = in(reg) p,
+        b = in(reg) b,
+        r = out(reg_byte) r,
+        options(nostack)
+    );
     r
 }
 
@@ -46,10 +58,14 @@ pub unsafe fn _bittestandreset64(p: *mut i64, b: i64) -> u8 {
 #[unstable(feature = "simd_x86_bittest", issue = "59414")]
 pub unsafe fn _bittestandcomplement64(p: *mut i64, b: i64) -> u8 {
     let r: u8;
-    llvm_asm!("btcq $2, $1\n\tsetc ${0:b}"
-              : "=r"(r), "+*m"(p)
-              : "r"(b)
-              : "cc", "memory");
+    asm!(
+        "btc [{p}], {b}",
+        "setc {r}",
+        p = in(reg) p,
+        b = in(reg) b,
+        r = out(reg_byte) r,
+        options(nostack)
+    );
     r
 }
 
diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
index c95b6889701..2f289f58074 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -34,7 +34,11 @@ pub(crate) fn detect_features() -> cache::Initializer {
         // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
         let aa64isar0: u64;
         unsafe {
-            llvm_asm!("mrs $0, ID_AA64ISAR0_EL1" : "=r"(aa64isar0));
+            asm!(
+                "mrs {}, ID_AA64ISAR0_EL1",
+                out(reg) aa64isar0,
+                options(pure, nomem, preserves_flags, nostack)
+            );
         }
 
         let aes = bits_shift(aa64isar0, 7, 4) >= 1;
@@ -51,7 +55,11 @@ pub(crate) fn detect_features() -> cache::Initializer {
         // ID_AA64PFR0_EL1 - Processor Feature Register 0
         let aa64pfr0: u64;
         unsafe {
-            llvm_asm!("mrs $0, ID_AA64PFR0_EL1" : "=r"(aa64pfr0));
+            asm!(
+                "mrs {}, ID_AA64PFR0_EL1",
+                out(reg) aa64pfr0,
+                options(pure, nomem, preserves_flags, nostack)
+            );
         }
 
         let fp = bits_shift(aa64pfr0, 19, 16) < 0xF;
@@ -74,7 +82,11 @@ pub(crate) fn detect_features() -> cache::Initializer {
         // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1
         let aa64isar1: u64;
         unsafe {
-            llvm_asm!("mrs $0, ID_AA64ISAR1_EL1" : "=r"(aa64isar1));
+            asm!(
+                "mrs {}, ID_AA64ISAR1_EL1",
+                out(reg) aa64isar1,
+                options(pure, nomem, preserves_flags, nostack)
+            );
         }
 
         enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1);
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 46cf8fb68d6..b051be5c0a5 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -15,7 +15,7 @@
 #![feature(const_fn, staged_api, stdsimd, doc_cfg, allow_internal_unstable)]
 #![allow(clippy::shadow_reuse)]
 #![deny(clippy::missing_inline_in_public_items)]
-#![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(llvm_asm))]
+#![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))]
 #![cfg_attr(test, allow(unused_imports))]
 #![cfg_attr(feature = "std_detect_file_io", feature(vec_spare_capacity))]
 #![no_std]
-- 
cgit 1.4.1-3-g733a5


From 72dda3aae088f39ea4face0df61436d6adb7479d Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Thu, 18 Mar 2021 20:36:40 +0000
Subject: Fix building std_detect as a dependency of std (#1089)

---
 library/stdarch/crates/std_detect/Cargo.toml               | 14 ++++++++++++--
 .../crates/std_detect/src/detect/os/linux/auxvec.rs        |  4 ++--
 library/stdarch/crates/std_detect/src/lib.rs               |  1 +
 3 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/Cargo.toml b/library/stdarch/crates/std_detect/Cargo.toml
index 252e42c569f..4d0f93d8cfd 100644
--- a/library/stdarch/crates/std_detect/Cargo.toml
+++ b/library/stdarch/crates/std_detect/Cargo.toml
@@ -25,12 +25,22 @@ maintenance = { status = "experimental" }
 libc = { version = "0.2", optional = true, default-features = false }
 cfg-if = "0.1.10"
 
+# When built as part of libstd
+core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" }
+compiler_builtins = { version = "0.1.2", optional = true }
+alloc = { version = "1.0.0", optional = true, package = "rustc-std-workspace-alloc" }
+
 [dev-dependencies]
 auxv = "0.3.3"
 cupid = "0.6.0"
 
 [features]
 default = [ "std_detect_dlsym_getauxval", "std_detect_file_io" ]
-std_detect_file_io = []
+std_detect_file_io = [ "libc" ]
 std_detect_dlsym_getauxval = [ "libc" ]
-std_detect_env_override = []
+std_detect_env_override = [ "libc" ]
+rustc-dep-of-std = [
+    "core",
+    "compiler_builtins",
+    "alloc",
+]
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index d556b23b1de..077fc9e4c89 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -89,7 +89,7 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
 
     #[cfg(not(feature = "std_detect_dlsym_getauxval"))]
     {
-        let hwcap = unsafe { libc::getauxval(AT_HWCAP) };
+        let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize };
 
         // Targets with only AT_HWCAP:
         #[cfg(any(target_arch = "aarch64", target_arch = "mips", target_arch = "mips64"))]
@@ -106,7 +106,7 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
             target_arch = "powerpc64"
         ))]
         {
-            let hwcap2 = unsafe { libc::getauxval(AT_HWCAP2) };
+            let hwcap2 = unsafe { libc::getauxval(AT_HWCAP2 as libc::c_ulong) as usize };
             if hwcap != 0 && hwcap2 != 0 {
                 return Ok(AuxVec { hwcap, hwcap2 });
             }
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index b051be5c0a5..6658c3e6bda 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -20,6 +20,7 @@
 #![cfg_attr(feature = "std_detect_file_io", feature(vec_spare_capacity))]
 #![no_std]
 
+#[cfg_attr(feature = "rustc-dep-of-std", allow(unused_extern_crates))]
 #[cfg(feature = "std_detect_file_io")]
 extern crate alloc;
 
-- 
cgit 1.4.1-3-g733a5


From 7bab2c06958fc21081e6a4e1a4e1a9d4bc35f701 Mon Sep 17 00:00:00 2001
From: Joshua Nelson <jyn514@gmail.com>
Date: Wed, 7 Apr 2021 00:46:39 -0400
Subject: Deny 2018 idiom lints (#1108)

This lint is allow by default, which is why this wasn't spotted earlier.
It's denied by rust-lang/rust, so it's good to warn about it here so it
can be fixed more quickly.
---
 library/stdarch/crates/assert-instr-macro/src/lib.rs             | 3 ++-
 library/stdarch/crates/core_arch/src/lib.rs                      | 4 +---
 library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs     | 2 +-
 library/stdarch/crates/core_arch/src/x86/fxsr.rs                 | 2 +-
 library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs            | 2 +-
 library/stdarch/crates/core_arch/src/x86/xsave.rs                | 2 +-
 library/stdarch/crates/core_arch/src/x86_64/fxsr.rs              | 2 +-
 library/stdarch/crates/core_arch/src/x86_64/xsave.rs             | 2 +-
 library/stdarch/crates/simd-test-macro/src/lib.rs                | 1 +
 library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs | 2 +-
 library/stdarch/crates/std_detect/src/lib.rs                     | 4 +++-
 library/stdarch/crates/stdarch-test/src/lib.rs                   | 3 +--
 library/stdarch/crates/stdarch-verify/src/lib.rs                 | 7 +++----
 13 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/assert-instr-macro/src/lib.rs b/library/stdarch/crates/assert-instr-macro/src/lib.rs
index b475ec8bc4b..0a879e037e2 100644
--- a/library/stdarch/crates/assert-instr-macro/src/lib.rs
+++ b/library/stdarch/crates/assert-instr-macro/src/lib.rs
@@ -7,6 +7,7 @@
 //! The procedural macro here is relatively simple, it simply appends a
 //! `#[test]` function to the original token stream which asserts that the
 //! function itself contains the relevant instruction.
+#![deny(rust_2018_idioms)]
 
 extern crate proc_macro;
 extern crate proc_macro2;
@@ -177,7 +178,7 @@ struct Invoc {
 }
 
 impl syn::parse::Parse for Invoc {
-    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
+    fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result<Self> {
         use syn::{ext::IdentExt, Token};
 
         let mut instr = String::new();
diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index 4dca7581489..6e273bda281 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -2,6 +2,7 @@
 #![allow(improper_ctypes_definitions)]
 #![allow(dead_code)]
 #![allow(unused_features)]
+#![deny(rust_2018_idioms)]
 #![feature(
     asm,
     const_fn,
@@ -68,9 +69,6 @@ extern crate std;
 #[cfg(test)]
 #[macro_use]
 extern crate std_detect;
-#[cfg(test)]
-extern crate stdarch_test;
-
 #[path = "mod.rs"]
 mod core_arch;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs
index 3256c8dba06..9bfeb903a7b 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs
@@ -9,7 +9,7 @@ use crate::core_arch::x86::__m256i;
 use crate::core_arch::x86::__m512i;
 
 #[cfg(test)]
-use crate::stdarch_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "C" {
diff --git a/library/stdarch/crates/core_arch/src/x86/fxsr.rs b/library/stdarch/crates/core_arch/src/x86/fxsr.rs
index b1bac1a0aca..4a39bb8de93 100644
--- a/library/stdarch/crates/core_arch/src/x86/fxsr.rs
+++ b/library/stdarch/crates/core_arch/src/x86/fxsr.rs
@@ -87,7 +87,7 @@ mod tests {
     }
 
     impl fmt::Debug for FxsaveArea {
-        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
             write!(f, "[")?;
             for i in 0..self.data.len() {
                 write!(f, "{}", self.data[i])?;
diff --git a/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs
index 6ccf3a62a60..a2ebdf9c855 100644
--- a/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs
+++ b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs
@@ -8,7 +8,7 @@
 use crate::core_arch::x86::__m128i;
 
 #[cfg(test)]
-use crate::stdarch_test::assert_instr;
+use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 extern "C" {
diff --git a/library/stdarch/crates/core_arch/src/x86/xsave.rs b/library/stdarch/crates/core_arch/src/x86/xsave.rs
index 5e2148840b0..30f807e4479 100644
--- a/library/stdarch/crates/core_arch/src/x86/xsave.rs
+++ b/library/stdarch/crates/core_arch/src/x86/xsave.rs
@@ -196,7 +196,7 @@ mod tests {
     }
 
     impl fmt::Debug for XsaveArea {
-        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
             write!(f, "[")?;
             for i in 0..self.data.len() {
                 write!(f, "{}", self.data[i])?;
diff --git a/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
index ef7e311cfc9..4274b4c4767 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
@@ -87,7 +87,7 @@ mod tests {
     }
 
     impl fmt::Debug for FxsaveArea {
-        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
             write!(f, "[")?;
             for i in 0..self.data.len() {
                 write!(f, "{}", self.data[i])?;
diff --git a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
index 215e5a541e2..2afd3e433a2 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
@@ -164,7 +164,7 @@ mod tests {
     }
 
     impl fmt::Debug for XsaveArea {
-        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
             write!(f, "[")?;
             for i in 0..self.data.len() {
                 write!(f, "{}", self.data[i])?;
diff --git a/library/stdarch/crates/simd-test-macro/src/lib.rs b/library/stdarch/crates/simd-test-macro/src/lib.rs
index 8e65c43992f..4c863ae7012 100644
--- a/library/stdarch/crates/simd-test-macro/src/lib.rs
+++ b/library/stdarch/crates/simd-test-macro/src/lib.rs
@@ -2,6 +2,7 @@
 //!
 //! This macro expands to a `#[test]` function which tests the local machine
 //! for the appropriate cfg before calling the inner test function.
+#![deny(rust_2018_idioms)]
 
 extern crate proc_macro;
 extern crate proc_macro2;
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
index 1f403df01fc..91279b9ed76 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
@@ -17,7 +17,7 @@ impl CpuInfo {
         })
     }
     /// Returns the value of the cpuinfo `field`.
-    pub(crate) fn field(&self, field: &str) -> CpuInfoField {
+    pub(crate) fn field(&self, field: &str) -> CpuInfoField<'_> {
         for l in self.raw.lines() {
             if l.trim().starts_with(field) {
                 return CpuInfoField::new(l.split(": ").nth(1));
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 6658c3e6bda..471526bbeb8 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -13,6 +13,7 @@
 
 #![unstable(feature = "stdsimd", issue = "27731")]
 #![feature(const_fn, staged_api, stdsimd, doc_cfg, allow_internal_unstable)]
+#![deny(rust_2018_idioms)]
 #![allow(clippy::shadow_reuse)]
 #![deny(clippy::missing_inline_in_public_items)]
 #![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))]
@@ -20,7 +21,8 @@
 #![cfg_attr(feature = "std_detect_file_io", feature(vec_spare_capacity))]
 #![no_std]
 
-#[cfg_attr(feature = "rustc-dep-of-std", allow(unused_extern_crates))]
+// rust-lang/rust#83888: removing `extern crate` gives an error that `vec_spare_capacity` is unknown
+#[cfg_attr(feature = "std_detect_file_io", allow(unused_extern_crates))]
 #[cfg(feature = "std_detect_file_io")]
 extern crate alloc;
 
diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs
index 88409f5dd42..25852c9479d 100644
--- a/library/stdarch/crates/stdarch-test/src/lib.rs
+++ b/library/stdarch/crates/stdarch-test/src/lib.rs
@@ -4,13 +4,12 @@
 //! output once globally and then provides the `assert` function which makes
 //! assertions about the disassembly of a function.
 #![feature(test)] // For black_box
+#![deny(rust_2018_idioms)]
 #![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]
 
 extern crate assert_instr_macro;
-extern crate cc;
 #[macro_use]
 extern crate lazy_static;
-extern crate rustc_demangle;
 extern crate simd_test_macro;
 #[macro_use]
 extern crate cfg_if;
diff --git a/library/stdarch/crates/stdarch-verify/src/lib.rs b/library/stdarch/crates/stdarch-verify/src/lib.rs
index 48b402737ff..7c947d8ac7e 100644
--- a/library/stdarch/crates/stdarch-verify/src/lib.rs
+++ b/library/stdarch/crates/stdarch-verify/src/lib.rs
@@ -1,5 +1,4 @@
-extern crate proc_macro;
-extern crate proc_macro2;
+#![deny(rust_2018_idioms)]
 #[macro_use]
 extern crate quote;
 #[macro_use]
@@ -357,7 +356,7 @@ fn find_instrs(attrs: &[syn::Attribute]) -> Vec<String> {
     // TODO: should probably just reuse `Invoc` from the `assert-instr-macro`
     // crate.
     impl syn::parse::Parse for AssertInstr {
-        fn parse(content: syn::parse::ParseStream) -> syn::Result<Self> {
+        fn parse(content: syn::parse::ParseStream<'_>) -> syn::Result<Self> {
             let input;
             parenthesized!(input in content);
             let _ = input.parse::<syn::Meta>()?;
@@ -443,7 +442,7 @@ struct RustcArgsRequiredConst {
 }
 
 impl syn::parse::Parse for RustcArgsRequiredConst {
-    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
+    fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result<Self> {
         let content;
         parenthesized!(content in input);
         let list =
-- 
cgit 1.4.1-3-g733a5


From 03e109a2f3e351e0c279afa282338874c891311a Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Fri, 23 Apr 2021 17:46:38 +0200
Subject: remove unused const_fn feature (#1140)

---
 library/stdarch/crates/std_detect/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 471526bbeb8..a95af4145fd 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -12,7 +12,7 @@
 //! * `powerpc64`: [`is_powerpc64_feature_detected`]
 
 #![unstable(feature = "stdsimd", issue = "27731")]
-#![feature(const_fn, staged_api, stdsimd, doc_cfg, allow_internal_unstable)]
+#![feature(staged_api, stdsimd, doc_cfg, allow_internal_unstable)]
 #![deny(rust_2018_idioms)]
 #![allow(clippy::shadow_reuse)]
 #![deny(clippy::missing_inline_in_public_items)]
-- 
cgit 1.4.1-3-g733a5


From 63daa088fd92f078528442929b4537612f8d724d Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Sat, 24 Apr 2021 08:02:24 +0100
Subject: Move cfg!(target_feature) directly into is_*_feature_detected!()
 (#1141)

Fixes #1135
---
 library/stdarch/crates/std_detect/src/detect/macros.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 2607e07bf37..a4af3d1c027 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -14,7 +14,8 @@ macro_rules! features {
         macro_rules! $macro_name {
             $(
                 ($feature_lit) => {
-                    $crate::detect::__is_feature_detected::$feature()
+                    cfg!(target_feature = $feature_lit) ||
+                        $crate::detect::__is_feature_detected::$feature()
                 };
             )*
             $(
@@ -94,8 +95,7 @@ macro_rules! features {
                 #[doc(hidden)]
                 #[$stability_attr]
                 pub fn $feature() -> bool {
-                    cfg!(target_feature = $feature_lit) ||
-                        $crate::detect::check_for($crate::detect::Feature::$feature)
+                    $crate::detect::check_for($crate::detect::Feature::$feature)
                 }
             )*
         }
-- 
cgit 1.4.1-3-g733a5


From 1069e6643997c2c2a0a348be12c8996e7338ce1d Mon Sep 17 00:00:00 2001
From: Adam Gemmell <adam.gemmell@arm.com>
Date: Fri, 28 May 2021 01:37:20 +0100
Subject: Update aarch64 linux feature detection (#1146)

---
 .../crates/core_arch/src/arm_shared/crypto.rs      | 141 +++++++++++---
 .../crates/std_detect/src/detect/arch/aarch64.rs   | 138 ++++++++++++--
 .../crates/std_detect/src/detect/arch/arm.rs       |   6 +-
 .../stdarch/crates/std_detect/src/detect/bit.rs    |   2 +-
 .../crates/std_detect/src/detect/os/aarch64.rs     |  12 +-
 .../std_detect/src/detect/os/freebsd/aarch64.rs    |   3 +-
 .../std_detect/src/detect/os/freebsd/auxvec.rs     |   9 +-
 .../std_detect/src/detect/os/linux/aarch64.rs      | 204 +++++++++++++++++----
 .../crates/std_detect/src/detect/os/linux/arm.rs   |  13 ++
 .../std_detect/src/detect/os/windows/aarch64.rs    |   6 +-
 .../crates/std_detect/tests/cpu-detection.rs       |  40 +++-
 11 files changed, 481 insertions(+), 93 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs b/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs
index b4d5b2978fa..19d9190917a 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs
@@ -51,9 +51,16 @@ extern "C" {
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
+// Rust compilers without 8a57820bca64a252489790a57cb5ea23db6f9198 need crypto (hence the bootstrap check)
+// LLVM builds without b8baa2a9132498ea286dbb0d03f005760ecc6fdb need crypto for arm (hence the target_arch check)
+
 /// AES single round encryption.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "aes")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(aese))]
 pub unsafe fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
@@ -62,7 +69,11 @@ pub unsafe fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
 
 /// AES single round decryption.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "aes")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(aesd))]
 pub unsafe fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
@@ -71,7 +82,11 @@ pub unsafe fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
 
 /// AES mix columns.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "aes")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(aesmc))]
 pub unsafe fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t {
@@ -80,7 +95,11 @@ pub unsafe fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t {
 
 /// AES inverse mix columns.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "aes")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(aesimc))]
 pub unsafe fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t {
@@ -89,7 +108,11 @@ pub unsafe fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t {
 
 /// SHA1 fixed rotate.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "sha2")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(sha1h))]
 pub unsafe fn vsha1h_u32(hash_e: u32) -> u32 {
@@ -98,7 +121,11 @@ pub unsafe fn vsha1h_u32(hash_e: u32) -> u32 {
 
 /// SHA1 hash update accelerator, choose.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "sha2")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(sha1c))]
 pub unsafe fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
@@ -107,7 +134,11 @@ pub unsafe fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) ->
 
 /// SHA1 hash update accelerator, majority.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "sha2")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(sha1m))]
 pub unsafe fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
@@ -116,7 +147,11 @@ pub unsafe fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) ->
 
 /// SHA1 hash update accelerator, parity.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "sha2")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(sha1p))]
 pub unsafe fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
@@ -125,7 +160,11 @@ pub unsafe fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) ->
 
 /// SHA1 schedule update accelerator, first part.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "sha2")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(sha1su0))]
 pub unsafe fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t {
@@ -134,7 +173,11 @@ pub unsafe fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_
 
 /// SHA1 schedule update accelerator, second part.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "sha2")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(sha1su1))]
 pub unsafe fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t {
@@ -143,7 +186,11 @@ pub unsafe fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t
 
 /// SHA256 hash update accelerator.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "sha2")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(sha256h))]
 pub unsafe fn vsha256hq_u32(
@@ -156,7 +203,11 @@ pub unsafe fn vsha256hq_u32(
 
 /// SHA256 hash update accelerator, upper part.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "sha2")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(sha256h2))]
 pub unsafe fn vsha256h2q_u32(
@@ -169,7 +220,11 @@ pub unsafe fn vsha256h2q_u32(
 
 /// SHA256 schedule update accelerator, first part.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "sha2")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(sha256su0))]
 pub unsafe fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t {
@@ -178,7 +233,11 @@ pub unsafe fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t
 
 /// SHA256 schedule update accelerator, second part.
 #[inline]
-#[target_feature(enable = "crypto")]
+#[cfg_attr(any(bootstrap, target_arch = "arm"), target_feature(enable = "crypto"))]
+#[cfg_attr(
+    not(any(bootstrap, target_arch = "arm")),
+    target_feature(enable = "sha2")
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(sha256su1))]
 pub unsafe fn vsha256su1q_u32(
@@ -196,7 +255,11 @@ mod tests {
     use std::mem;
     use stdarch_test::simd_test;
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(
+        all(not(bootstrap), target_arch = "aarch64"),
+        simd_test(enable = "aes")
+    )]
     unsafe fn test_vaeseq_u8() {
         let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
         let key = mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
@@ -209,7 +272,11 @@ mod tests {
         );
     }
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(
+        all(not(bootstrap), target_arch = "aarch64"),
+        simd_test(enable = "aes")
+    )]
     unsafe fn test_vaesdq_u8() {
         let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
         let key = mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
@@ -220,7 +287,11 @@ mod tests {
         );
     }
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(
+        all(not(bootstrap), target_arch = "aarch64"),
+        simd_test(enable = "aes")
+    )]
     unsafe fn test_vaesmcq_u8() {
         let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
         let r: u8x16 = mem::transmute(vaesmcq_u8(data));
@@ -230,7 +301,11 @@ mod tests {
         );
     }
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(
+        all(not(bootstrap), target_arch = "aarch64"),
+        simd_test(enable = "aes")
+    )]
     unsafe fn test_vaesimcq_u8() {
         let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
         let r: u8x16 = mem::transmute(vaesimcq_u8(data));
@@ -240,13 +315,15 @@ mod tests {
         );
     }
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(not(any(bootstrap, target_arch = "arm")), simd_test(enable = "sha2"))]
     unsafe fn test_vsha1h_u32() {
         assert_eq!(vsha1h_u32(0x1234), 0x048d);
         assert_eq!(vsha1h_u32(0x5678), 0x159e);
     }
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(not(any(bootstrap, target_arch = "arm")), simd_test(enable = "sha2"))]
     unsafe fn test_vsha1su0q_u32() {
         let r: u32x4 = mem::transmute(vsha1su0q_u32(
             mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)),
@@ -256,7 +333,8 @@ mod tests {
         assert_eq!(r, u32x4::new(0x9abc, 0xdef0, 0x1234, 0x5678));
     }
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(not(any(bootstrap, target_arch = "arm")), simd_test(enable = "sha2"))]
     unsafe fn test_vsha1su1q_u32() {
         let r: u32x4 = mem::transmute(vsha1su1q_u32(
             mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
@@ -268,7 +346,8 @@ mod tests {
         );
     }
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(not(any(bootstrap, target_arch = "arm")), simd_test(enable = "sha2"))]
     unsafe fn test_vsha1cq_u32() {
         let r: u32x4 = mem::transmute(vsha1cq_u32(
             mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
@@ -281,7 +360,8 @@ mod tests {
         );
     }
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(not(any(bootstrap, target_arch = "arm")), simd_test(enable = "sha2"))]
     unsafe fn test_vsha1pq_u32() {
         let r: u32x4 = mem::transmute(vsha1pq_u32(
             mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
@@ -294,7 +374,8 @@ mod tests {
         );
     }
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(not(any(bootstrap, target_arch = "arm")), simd_test(enable = "sha2"))]
     unsafe fn test_vsha1mq_u32() {
         let r: u32x4 = mem::transmute(vsha1mq_u32(
             mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
@@ -307,7 +388,8 @@ mod tests {
         );
     }
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(not(any(bootstrap, target_arch = "arm")), simd_test(enable = "sha2"))]
     unsafe fn test_vsha256hq_u32() {
         let r: u32x4 = mem::transmute(vsha256hq_u32(
             mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
@@ -320,7 +402,8 @@ mod tests {
         );
     }
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(not(any(bootstrap, target_arch = "arm")), simd_test(enable = "sha2"))]
     unsafe fn test_vsha256h2q_u32() {
         let r: u32x4 = mem::transmute(vsha256h2q_u32(
             mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
@@ -333,7 +416,8 @@ mod tests {
         );
     }
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(not(any(bootstrap, target_arch = "arm")), simd_test(enable = "sha2"))]
     unsafe fn test_vsha256su0q_u32() {
         let r: u32x4 = mem::transmute(vsha256su0q_u32(
             mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
@@ -345,7 +429,8 @@ mod tests {
         );
     }
 
-    #[simd_test(enable = "crypto")]
+    #[cfg_attr(any(bootstrap, target_arch = "arm"), simd_test(enable = "crypto"))]
+    #[cfg_attr(not(any(bootstrap, target_arch = "arm")), simd_test(enable = "sha2"))]
     unsafe fn test_vsha256su1q_u32() {
         let r: u32x4 = mem::transmute(vsha256su1q_u32(
             mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 761cde9583d..81979ba9672 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -4,35 +4,145 @@ features! {
     @TARGET: aarch64;
     @MACRO_NAME: is_aarch64_feature_detected;
     @MACRO_ATTRS:
-    /// Checks if `aarch64` feature is enabled.
+    /// This macro tests, at runtime, whether an `aarch64` feature is enabled on aarch64 platforms.
+    /// Currently most features are only supported on linux-based platforms.
+    ///
+    /// This macro takes one argument which is a string literal of the feature being tested for.
+    /// The feature names are mostly taken from their FEAT_* definitiions in the [ARM Architecture
+    /// Reference Manual][docs].
+    ///
+    /// ## Supported arguments
+    ///
+    /// * `"asimd"` or "neon" - FEAT_AdvSIMD
+    /// * `"pmull"` - FEAT_PMULL
+    /// * `"fp"` - FEAT_FP
+    /// * `"fp16"` - FEAT_FP16
+    /// * `"sve"` - FEAT_SVE
+    /// * `"crc"` - FEAT_CRC
+    /// * `"lse"` - FEAT_LSE
+    /// * `"lse2"` - FEAT_LSE2
+    /// * `"rdm"` - FEAT_RDM
+    /// * `"rcpc"` - FEAT_LRCPC
+    /// * `"rcpc2"` - FEAT_LRCPC2
+    /// * `"dotprod"` - FEAT_DotProd
+    /// * `"tme"` - FEAT_TME
+    /// * `"fhm"` - FEAT_FHM
+    /// * `"dit"` - FEAT_DIT
+    /// * `"flagm"` - FEAT_FLAGM
+    /// * `"ssbs"` - FEAT_SSBS
+    /// * `"sb"` - FEAT_SB
+    /// * `"pauth"` - FEAT_PAuth
+    /// * `"dpb"` - FEAT_DPB
+    /// * `"dpb2"` - FEAT_DPB2
+    /// * `"sve2"` - FEAT_SVE2
+    /// * `"sve2-aes"` - FEAT_SVE2_AES
+    /// * `"sve2-sm4"` - FEAT_SVE2_SM4
+    /// * `"sve2-sha3"` - FEAT_SVE2_SHA3
+    /// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm
+    /// * `"frintts"` - FEAT_FRINTTS
+    /// * `"i8mm"` - FEAT_I8MM
+    /// * `"f32mm"` - FEAT_F32MM
+    /// * `"f64mm"` - FEAT_F64MM
+    /// * `"bf16"` - FEAT_BF16
+    /// * `"rand"` - FEAT_RNG
+    /// * `"bti"` - FEAT_BTI
+    /// * `"mte"` - FEAT_MTE
+    /// * `"jsconv"` - FEAT_JSCVT
+    /// * `"fcma"` - FEAT_FCMA
+    /// * `"aes"` - FEAT_AES
+    /// * `"sha2"` - FEAT_SHA1 & FEAT_SHA256
+    /// * `"sha3"` - FEAT_SHA512 & FEAT_SHA3
+    /// * `"sm4"` - FEAT_SM3 & FEAT_SM4
+    ///
+    /// [docs]: https://developer.arm.com/documentation/ddi0487/latest
     #[unstable(feature = "stdsimd", issue = "27731")]
     @BIND_FEATURE_NAME: "asimd"; "neon";
     @NO_RUNTIME_DETECTION: "ras";
     @NO_RUNTIME_DETECTION: "v8.1a";
     @NO_RUNTIME_DETECTION: "v8.2a";
     @NO_RUNTIME_DETECTION: "v8.3a";
+    @NO_RUNTIME_DETECTION: "v8.4a";
+    @NO_RUNTIME_DETECTION: "v8.5a";
+    @NO_RUNTIME_DETECTION: "v8.6a";
+    @NO_RUNTIME_DETECTION: "v8.7a";
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] asimd: "neon";
-    /// ARM Advanced SIMD (ASIMD)
+    /// FEAT_AdvSIMD (Advanced SIMD/NEON)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] pmull: "pmull";
-    /// Polynomial Multiply
+    /// FEAT_PMULL (Polynomial Multiply)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] fp: "fp";
-    /// Floating point support
+    /// FEAT_FP (Floating point support)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] fp16: "fp16";
-    /// Half-float support.
+    /// FEAT_FP16 (Half-float support)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve: "sve";
-    /// Scalable Vector Extension (SVE)
+    /// FEAT_SVE (Scalable Vector Extension)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crc: "crc";
-    /// CRC32 (Cyclic Redundancy Check)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crypto: "crypto";
-    /// Crypto: AES + PMULL + SHA1 + SHA2
+    /// FEAT_CRC32 (Cyclic Redundancy Check)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] lse: "lse";
-    /// Atomics (Large System Extension)
+    /// FEAT_LSE (Large System Extension - atomics)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] lse2: "lse2";
+    /// FEAT_LSE2 (unaligned and register-pair atomics)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rdm: "rdm";
-    /// Rounding Double Multiply (ASIMDRDM)
+    /// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rcpc: "rcpc";
-    /// Release consistent Processor consistent (RcPc)
+    /// FEAT_LRCPC (Release consistent Processor consistent)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rcpc2: "rcpc2";
+    /// FEAT_LRCPC2 (RCPC with immediate offsets)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dotprod: "dotprod";
-    /// Vector Dot-Product (ASIMDDP)
+    /// FEAT_DotProd (Vector Dot-Product - ASIMDDP)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] tme: "tme";
-    /// Transactional Memory Extensions (TME)
+    /// FEAT_TME (Transactional Memory Extensions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] fhm: "fhm";
+    /// FEAT_FHM (fp16 multiplication instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dit: "dit";
+    /// FEAT_DIT (Data Independent Timing instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] flagm: "flagm";
+    /// FEAT_FLAGM (flag manipulation instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] ssbs: "ssbs";
+    /// FEAT_SSBS (speculative store bypass safe)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sb: "sb";
+    /// FEAT_SB (speculation barrier)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] pauth: "pauth";
+    /// FEAT_PAuth (pointer authentication)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dpb: "dpb";
+    /// FEAT_DPB (aka dcpop - data cache clean to point of persistance)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dpb2: "dpb2";
+    /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistance)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve2: "sve2";
+    /// FEAT_SVE2 (Scalable Vector Extension 2)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve2_aes: "sve2-aes";
+    /// FEAT_SVE_AES (SVE2 AES crypto)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve2_sm4: "sve2-sm4";
+    /// FEAT_SVE_SM4 (SVE2 SM4 crypto)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve2_sha3: "sve2-sha3";
+    /// FEAT_SVE_SHA3 (SVE2 SHA3 crypto)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve2_bitperm: "sve2-bitperm";
+    /// FEAT_SVE_BitPerm (SVE2 bit permutation instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] frintts: "frintts";
+    /// FEAT_FRINTTS (float to integer rounding instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] i8mm: "i8mm";
+    /// FEAT_I8MM (integer matrix multiplication, plus ASIMD support)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] f32mm: "f32mm";
+    /// FEAT_F32MM (single-precision matrix multiplication)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] f64mm: "f64mm";
+    /// FEAT_F64MM (double-precision matrix multiplication)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] bf16: "bf16";
+    /// FEAT_BF16 (BFloat16 type, plus MM instructions, plus ASIMD support)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rand: "rand";
+    /// FEAT_RNG (Random Number Generator)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] bti: "bti";
+    /// FEAT_BTI (Branch Target Identification)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] mte: "mte";
+    /// FEAT_MTE (Memory Tagging Extension)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] jsconv: "jsconv";
+    /// FEAT_JSCVT (JavaScript float conversion instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] fcma: "fcma";
+    /// FEAT_FCMA (float complex number operations)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] aes: "aes";
+    /// FEAT_AES (AES instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sha2: "sha2";
+    /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sha3: "sha3";
+    /// FEAT_SHA512 & FEAT_SHA3 (SHA2-512 & SHA3 instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sm4: "sm4";
+    /// FEAT_SM3 & FEAT_SM4 (SM3 & SM4 instructions)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index 46fd56384be..d96514c8449 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -17,5 +17,9 @@ features! {
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crc: "crc";
     /// CRC32 (Cyclic Redundancy Check)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crypto: "crypto";
-    /// Crypto: AES + PMULL + SHA1 + SHA2
+    /// Crypto: AES + PMULL + SHA1 + SHA256. Prefer using the individual features where possible.
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] aes: "aes";
+    /// FEAT_AES (AES instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sha2: "sha2";
+    /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/bit.rs b/library/stdarch/crates/std_detect/src/detect/bit.rs
index 578f0b16b74..6f06c5523e4 100644
--- a/library/stdarch/crates/std_detect/src/detect/bit.rs
+++ b/library/stdarch/crates/std_detect/src/detect/bit.rs
@@ -4,6 +4,6 @@
 #[allow(dead_code)]
 #[inline]
 pub(crate) fn test(x: usize, bit: u32) -> bool {
-    debug_assert!(bit < 32, "bit index out-of-bounds");
+    debug_assert!(bit < usize::BITS, "bit index out-of-bounds");
     x & (1 << bit) != 0
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
index 2f289f58074..0fd2702731f 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -41,13 +41,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
             );
         }
 
-        let aes = bits_shift(aa64isar0, 7, 4) >= 1;
-        let pmull = bits_shift(aa64isar0, 7, 4) >= 2;
-        let sha1 = bits_shift(aa64isar0, 11, 8) >= 1;
-        let sha2 = bits_shift(aa64isar0, 15, 12) >= 1;
-        enable_feature(Feature::pmull, pmull);
-        // Crypto is specified as AES + PMULL + SHA1 + SHA2 per LLVM/hosts.cpp
-        enable_feature(Feature::crypto, aes && pmull && sha1 && sha2);
+        enable_feature(Feature::pmull, bits_shift(aa64isar0, 7, 4) >= 2);
         enable_feature(Feature::tme, bits_shift(aa64isar0, 27, 24) == 1);
         enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 1);
         enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1);
@@ -72,6 +66,10 @@ pub(crate) fn detect_features() -> cache::Initializer {
         // supported, it also requires half-float support:
         enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp));
         // SIMD extensions require SIMD support:
+        enable_feature(Feature::aes, asimd && bits_shift(aa64isar0, 7, 4) >= 1);
+        let sha1 = bits_shift(aa64isar0, 11, 8) >= 1;
+        let sha2 = bits_shift(aa64isar0, 15, 12) >= 1;
+        enable_feature(Feature::sha2, asimd && sha1 && sha2);
         enable_feature(Feature::rdm, asimd && bits_shift(aa64isar0, 31, 28) >= 1);
         enable_feature(
             Feature::dotprod,
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
index 7e086ca057b..7d972b373f8 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
@@ -1,6 +1,6 @@
 //! Run-time feature detection for Aarch64 on FreeBSD.
 
-pub use super::super::aarch64::detect_features;
+pub(crate) use super::super::aarch64::detect_features;
 
 #[cfg(test)]
 mod tests {
@@ -12,7 +12,6 @@ mod tests {
         println!("fp16: {:?}", is_aarch64_feature_detected!("fp16"));
         println!("sve: {:?}", is_aarch64_feature_detected!("sve"));
         println!("crc: {:?}", is_aarch64_feature_detected!("crc"));
-        println!("crypto: {:?}", is_aarch64_feature_detected!("crypto"));
         println!("lse: {:?}", is_aarch64_feature_detected!("lse"));
         println!("rdm: {:?}", is_aarch64_feature_detected!("rdm"));
         println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc"));
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
index 832ce2252ed..f12476adac5 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
@@ -1,5 +1,12 @@
 //! Parses ELF auxiliary vectors.
-#![cfg_attr(any(target_arch = "arm", target_arch = "powerpc64"), allow(dead_code))]
+#![cfg_attr(
+    any(
+        target_arch = "aarch64",
+        target_arch = "arm",
+        target_arch = "powerpc64"
+    ),
+    allow(dead_code)
+)]
 
 /// Key to access the CPU Hardware capabilities bitfield.
 pub(crate) const AT_HWCAP: usize = 25;
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index 80c36e9b997..4dc63620578 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -20,11 +20,13 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
 /// These values are part of the platform-specific [asm/hwcap.h][hwcap] .
 ///
+/// The names match those used for cpuinfo.
+///
 /// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
 struct AtHwcap {
     fp: bool,    // 0
     asimd: bool, // 1
-    // evtstrm: bool, // 2
+    // evtstrm: bool, // 2 No LLVM support
     aes: bool,     // 3
     pmull: bool,   // 4
     sha1: bool,    // 5
@@ -33,18 +35,46 @@ struct AtHwcap {
     atomics: bool, // 8
     fphp: bool,    // 9
     asimdhp: bool, // 10
-    // cpuid: bool, // 11
+    // cpuid: bool, // 11 No LLVM support
     asimdrdm: bool, // 12
-    // jscvt: bool, // 13
-    // fcma: bool, // 14
-    lrcpc: bool, // 15
-    // dcpop: bool, // 16
-    // sha3: bool, // 17
-    // sm3: bool, // 18
-    // sm4: bool, // 19
-    asimddp: bool, // 20
-    // sha512: bool, // 21
-    sve: bool, // 22
+    jscvt: bool,    // 13
+    fcma: bool,     // 14
+    lrcpc: bool,    // 15
+    dcpop: bool,    // 16
+    sha3: bool,     // 17
+    sm3: bool,      // 18
+    sm4: bool,      // 19
+    asimddp: bool,  // 20
+    sha512: bool,   // 21
+    sve: bool,      // 22
+    fhm: bool,      // 23
+    dit: bool,      // 24
+    uscat: bool,    // 25
+    ilrcpc: bool,   // 26
+    flagm: bool,    // 27
+    ssbs: bool,     // 28
+    sb: bool,       // 29
+    paca: bool,     // 30
+    pacg: bool,     // 31
+    dcpodp: bool,   // 32
+    sve2: bool,     // 33
+    sveaes: bool,   // 34
+    // svepmull: bool, // 35 No LLVM support
+    svebitperm: bool, // 36
+    svesha3: bool,    // 37
+    svesm4: bool,     // 38
+    // flagm2: bool, // 39 No LLVM support
+    frint: bool, // 40
+    // svei8mm: bool, // 41 See i8mm feature
+    svef32mm: bool, // 42
+    svef64mm: bool, // 43
+    // svebf16: bool, // 44 See bf16 feature
+    i8mm: bool, // 45
+    bf16: bool, // 46
+    // dgh: bool, // 47 No LLVM support
+    rng: bool, // 48
+    bti: bool, // 49
+    mte: bool, // 50
 }
 
 impl From<auxvec::AuxVec> for AtHwcap {
@@ -64,16 +94,44 @@ impl From<auxvec::AuxVec> for AtHwcap {
             asimdhp: bit::test(auxv.hwcap, 10),
             // cpuid: bit::test(auxv.hwcap, 11),
             asimdrdm: bit::test(auxv.hwcap, 12),
-            // jscvt: bit::test(auxv.hwcap, 13),
-            // fcma: bit::test(auxv.hwcap, 14),
+            jscvt: bit::test(auxv.hwcap, 13),
+            fcma: bit::test(auxv.hwcap, 14),
             lrcpc: bit::test(auxv.hwcap, 15),
-            // dcpop: bit::test(auxv.hwcap, 16),
-            // sha3: bit::test(auxv.hwcap, 17),
-            // sm3: bit::test(auxv.hwcap, 18),
-            // sm4: bit::test(auxv.hwcap, 19),
+            dcpop: bit::test(auxv.hwcap, 16),
+            sha3: bit::test(auxv.hwcap, 17),
+            sm3: bit::test(auxv.hwcap, 18),
+            sm4: bit::test(auxv.hwcap, 19),
             asimddp: bit::test(auxv.hwcap, 20),
-            // sha512: bit::test(auxv.hwcap, 21),
+            sha512: bit::test(auxv.hwcap, 21),
             sve: bit::test(auxv.hwcap, 22),
+            fhm: bit::test(auxv.hwcap, 23),
+            dit: bit::test(auxv.hwcap, 24),
+            uscat: bit::test(auxv.hwcap, 25),
+            ilrcpc: bit::test(auxv.hwcap, 26),
+            flagm: bit::test(auxv.hwcap, 27),
+            ssbs: bit::test(auxv.hwcap, 28),
+            sb: bit::test(auxv.hwcap, 29),
+            paca: bit::test(auxv.hwcap, 30),
+            pacg: bit::test(auxv.hwcap, 31),
+            dcpodp: bit::test(auxv.hwcap, 32),
+            sve2: bit::test(auxv.hwcap, 33),
+            sveaes: bit::test(auxv.hwcap, 34),
+            // svepmull: bit::test(auxv.hwcap, 35),
+            svebitperm: bit::test(auxv.hwcap, 36),
+            svesha3: bit::test(auxv.hwcap, 37),
+            svesm4: bit::test(auxv.hwcap, 38),
+            // flagm2: bit::test(auxv.hwcap, 39),
+            frint: bit::test(auxv.hwcap, 40),
+            // svei8mm: bit::test(auxv.hwcap, 41),
+            svef32mm: bit::test(auxv.hwcap, 42),
+            svef64mm: bit::test(auxv.hwcap, 43),
+            // svebf16: bit::test(auxv.hwcap, 44),
+            i8mm: bit::test(auxv.hwcap, 45),
+            bf16: bit::test(auxv.hwcap, 46),
+            // dgh: bit::test(auxv.hwcap, 47),
+            rng: bit::test(auxv.hwcap, 48),
+            bti: bit::test(auxv.hwcap, 49),
+            mte: bit::test(auxv.hwcap, 50),
         }
     }
 }
@@ -100,16 +158,44 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
             asimdhp: f.has("asimdhp"),
             // cpuid: f.has("cpuid"),
             asimdrdm: f.has("asimdrdm"),
-            // jscvt: f.has("jscvt"),
-            // fcma: f.has("fcma"),
+            jscvt: f.has("jscvt"),
+            fcma: f.has("fcma"),
             lrcpc: f.has("lrcpc"),
-            // dcpop: f.has("dcpop"),
-            // sha3: f.has("sha3"),
-            // sm3: f.has("sm3"),
-            // sm4: f.has("sm4"),
+            dcpop: f.has("dcpop"),
+            sha3: f.has("sha3"),
+            sm3: f.has("sm3"),
+            sm4: f.has("sm4"),
             asimddp: f.has("asimddp"),
-            // sha512: f.has("sha512"),
+            sha512: f.has("sha512"),
             sve: f.has("sve"),
+            fhm: f.has("asimdfhm"),
+            dit: f.has("dit"),
+            uscat: f.has("uscat"),
+            ilrcpc: f.has("ilrcpc"),
+            flagm: f.has("flagm"),
+            ssbs: f.has("ssbs"),
+            sb: f.has("sb"),
+            paca: f.has("paca"),
+            pacg: f.has("pacg"),
+            dcpodp: f.has("dcpodp"),
+            sve2: f.has("sve2"),
+            sveaes: f.has("sveaes"),
+            // svepmull: f.has("svepmull"),
+            svebitperm: f.has("svebitperm"),
+            svesha3: f.has("svesha3"),
+            svesm4: f.has("svesm4"),
+            // flagm2: f.has("flagm2"),
+            frint: f.has("frint"),
+            // svei8mm: f.has("svei8mm"),
+            svef32mm: f.has("svef32mm"),
+            svef64mm: f.has("svef64mm"),
+            // svebf16: f.has("svebf16"),
+            i8mm: f.has("i8mm"),
+            bf16: f.has("bf16"),
+            // dgh: f.has("dgh"),
+            rng: f.has("rng"),
+            bti: f.has("bti"),
+            mte: f.has("mte"),
         }
     }
 }
@@ -117,8 +203,8 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
 impl AtHwcap {
     /// Initializes the cache from the feature -bits.
     ///
-    /// The features are enabled approximately like in LLVM host feature detection:
-    /// https://github.com/llvm-mirror/llvm/blob/master/lib/Support/Host.cpp#L1273
+    /// The feature dependencies here come directly from LLVM's feature definintions:
+    /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td
     fn cache(self) -> cache::Initializer {
         let mut value = cache::Initializer::default();
         {
@@ -131,25 +217,73 @@ impl AtHwcap {
             enable_feature(Feature::fp, self.fp);
             // Half-float support requires float support
             enable_feature(Feature::fp16, self.fp && self.fphp);
+            // FHM (fp16fml in LLVM) requires half float support
+            enable_feature(Feature::fhm, self.fphp && self.fhm);
             enable_feature(Feature::pmull, self.pmull);
             enable_feature(Feature::crc, self.crc32);
             enable_feature(Feature::lse, self.atomics);
+            enable_feature(Feature::lse2, self.uscat);
             enable_feature(Feature::rcpc, self.lrcpc);
+            // RCPC2 (rcpc-immo in LLVM) requires RCPC support
+            enable_feature(Feature::rcpc2, self.ilrcpc && self.lrcpc);
+            enable_feature(Feature::dit, self.dit);
+            enable_feature(Feature::flagm, self.flagm);
+            enable_feature(Feature::ssbs, self.ssbs);
+            enable_feature(Feature::sb, self.sb);
+            // FEAT_PAuth provides both paca & pacg
+            enable_feature(Feature::pauth, self.paca && self.pacg);
+            enable_feature(Feature::dpb, self.dcpop);
+            enable_feature(Feature::dpb2, self.dcpodp);
+            enable_feature(Feature::rand, self.rng);
+            enable_feature(Feature::bti, self.bti);
+            enable_feature(Feature::mte, self.mte);
+            // jsconv requires float support
+            enable_feature(Feature::jsconv, self.jscvt && self.fp);
+            enable_feature(Feature::rdm, self.asimdrdm);
+            enable_feature(Feature::dotprod, self.asimddp);
+            enable_feature(Feature::frintts, self.frint);
+
+            // FEAT_I8MM & FEAT_BF16 also include optional SVE components which linux exposes
+            // separately. We ignore that distinction here.
+            enable_feature(Feature::i8mm, self.i8mm);
+            enable_feature(Feature::bf16, self.bf16);
 
-            // SIMD support requires float support - if half-floats are
+            // ASIMD support requires float support - if half-floats are
             // supported, it also requires half-float support:
             let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp);
             enable_feature(Feature::asimd, asimd);
-            // SIMD extensions require SIMD support:
-            enable_feature(Feature::rdm, self.asimdrdm && asimd);
-            enable_feature(Feature::dotprod, self.asimddp && asimd);
+            // ASIMD extensions require ASIMD support:
+            enable_feature(Feature::fcma, self.fcma && asimd);
             enable_feature(Feature::sve, self.sve && asimd);
 
-            // Crypto is specified as AES + PMULL + SHA1 + SHA2 per LLVM/hosts.cpp
+            // SVE extensions require SVE & ASIMD
+            enable_feature(Feature::f32mm, self.svef32mm && self.sve && asimd);
+            enable_feature(Feature::f64mm, self.svef64mm && self.sve && asimd);
+
+            // Cryptographic extensions require ASIMD
+            enable_feature(Feature::aes, self.aes && asimd);
+            enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd);
+            // SHA512/SHA3 require SHA1 & SHA256
+            enable_feature(
+                Feature::sha3,
+                self.sha512 && self.sha3 && self.sha1 && self.sha2 && asimd,
+            );
+            enable_feature(Feature::sm4, self.sm3 && self.sm4 && asimd);
+
+            // SVE2 requires SVE
+            let sve2 = self.sve2 && self.sve && asimd;
+            enable_feature(Feature::sve2, sve2);
+            // SVE2 extensions require SVE2 and crypto features
+            enable_feature(Feature::sve2_aes, self.sveaes && sve2 && self.aes);
+            enable_feature(
+                Feature::sve2_sm4,
+                self.svesm4 && sve2 && self.sm3 && self.sm4,
+            );
             enable_feature(
-                Feature::crypto,
-                self.aes && self.pmull && self.sha1 && self.sha2,
+                Feature::sve2_sha3,
+                self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2,
             );
+            enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2);
         }
         value
     }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
index 66cfd05e806..7383e487f1c 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
@@ -28,6 +28,13 @@ pub(crate) fn detect_features() -> cache::Initializer {
                 && bit::test(auxv.hwcap2, 2)
                 && bit::test(auxv.hwcap2, 3),
         );
+        enable_feature(&mut value, Feature::aes, bit::test(auxv.hwcap2, 0));
+        // SHA2 requires SHA1 & SHA2 features
+        enable_feature(
+            &mut value,
+            Feature::sha2,
+            bit::test(auxv.hwcap2, 2) && bit::test(auxv.hwcap2, 3),
+        );
         return value;
     }
 
@@ -48,6 +55,12 @@ pub(crate) fn detect_features() -> cache::Initializer {
                 && c.field("Features").has("sha1")
                 && c.field("Features").has("sha2"),
         );
+        enable_feature(&mut value, Feature::aes, c.field("Features").has("aes"));
+        enable_feature(
+            &mut value,
+            Feature::sha2,
+            c.field("Features").has("sha1") && c.field("Features").has("sha2"),
+        );
         return value;
     }
     value
diff --git a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
index 3c5edfb44ab..051ad6d1bfc 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
@@ -42,13 +42,17 @@ pub(crate) fn detect_features() -> cache::Initializer {
             // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and
             // pmull support
             enable_feature(
-                Feature::crypto,
+                Feature::aes,
                 IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE,
             );
             enable_feature(
                 Feature::pmull,
                 IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE,
             );
+            enable_feature(
+                Feature::sha2,
+                IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
         }
     }
     value
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 8f29ea45cde..adbe4fa9a77 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -26,6 +26,8 @@ fn arm_linux() {
     println!("pmull: {}", is_arm_feature_detected!("pmull"));
     println!("crc: {}", is_arm_feature_detected!("crc"));
     println!("crypto: {}", is_arm_feature_detected!("crypto"));
+    println!("aes: {}", is_arm_feature_detected!("aes"));
+    println!("sha2: {}", is_arm_feature_detected!("sha2"));
 }
 
 #[test]
@@ -34,18 +36,50 @@ fn arm_linux() {
     any(target_os = "linux", target_os = "android")
 ))]
 fn aarch64_linux() {
+    println!("asimd: {}", is_aarch64_feature_detected!("asimd"));
+    println!("neon: {}", is_aarch64_feature_detected!("neon"));
+    println!("pmull: {}", is_aarch64_feature_detected!("pmull"));
     println!("fp: {}", is_aarch64_feature_detected!("fp"));
     println!("fp16: {}", is_aarch64_feature_detected!("fp16"));
-    println!("neon: {}", is_aarch64_feature_detected!("neon"));
-    println!("asimd: {}", is_aarch64_feature_detected!("asimd"));
     println!("sve: {}", is_aarch64_feature_detected!("sve"));
     println!("crc: {}", is_aarch64_feature_detected!("crc"));
-    println!("crypto: {}", is_aarch64_feature_detected!("crypto"));
     println!("lse: {}", is_aarch64_feature_detected!("lse"));
+    println!("lse2: {}", is_aarch64_feature_detected!("lse2"));
     println!("rdm: {}", is_aarch64_feature_detected!("rdm"));
     println!("rcpc: {}", is_aarch64_feature_detected!("rcpc"));
+    println!("rcpc2: {}", is_aarch64_feature_detected!("rcpc2"));
     println!("dotprod: {}", is_aarch64_feature_detected!("dotprod"));
     println!("tme: {}", is_aarch64_feature_detected!("tme"));
+    println!("fhm: {}", is_aarch64_feature_detected!("fhm"));
+    println!("dit: {}", is_aarch64_feature_detected!("dit"));
+    println!("flagm: {}", is_aarch64_feature_detected!("flagm"));
+    println!("ssbs: {}", is_aarch64_feature_detected!("ssbs"));
+    println!("sb: {}", is_aarch64_feature_detected!("sb"));
+    println!("pauth: {}", is_aarch64_feature_detected!("pauth"));
+    println!("dpb: {}", is_aarch64_feature_detected!("dpb"));
+    println!("dpb2: {}", is_aarch64_feature_detected!("dpb2"));
+    println!("sve2: {}", is_aarch64_feature_detected!("sve2"));
+    println!("sve2-aes: {}", is_aarch64_feature_detected!("sve2-aes"));
+    println!("sve2-sm4: {}", is_aarch64_feature_detected!("sve2-sm4"));
+    println!("sve2-sha3: {}", is_aarch64_feature_detected!("sve2-sha3"));
+    println!(
+        "sve2-bitperm: {}",
+        is_aarch64_feature_detected!("sve2-bitperm")
+    );
+    println!("frintts: {}", is_aarch64_feature_detected!("frintts"));
+    println!("i8mm: {}", is_aarch64_feature_detected!("i8mm"));
+    println!("f32mm: {}", is_aarch64_feature_detected!("f32mm"));
+    println!("f64mm: {}", is_aarch64_feature_detected!("f64mm"));
+    println!("bf16: {}", is_aarch64_feature_detected!("bf16"));
+    println!("rand: {}", is_aarch64_feature_detected!("rand"));
+    println!("bti: {}", is_aarch64_feature_detected!("bti"));
+    println!("mte: {}", is_aarch64_feature_detected!("mte"));
+    println!("jsconv: {}", is_aarch64_feature_detected!("jsconv"));
+    println!("fcma: {}", is_aarch64_feature_detected!("fcma"));
+    println!("aes: {}", is_aarch64_feature_detected!("aes"));
+    println!("sha2: {}", is_aarch64_feature_detected!("sha2"));
+    println!("sha3: {}", is_aarch64_feature_detected!("sha3"));
+    println!("sm4: {}", is_aarch64_feature_detected!("sm4"));
 }
 
 #[test]
-- 
cgit 1.4.1-3-g733a5


From 870cf5751d68f42218cb71613413c53ad519bdcc Mon Sep 17 00:00:00 2001
From: Frank Steffahn <frank.steffahn@stu.uni-kiel.de>
Date: Sun, 22 Aug 2021 18:05:15 +0200
Subject: Fix typos “an”→“a”
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 library/stdarch/crates/core_arch/src/x86/sse.rs          | 2 +-
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index a75a31ffe47..f7a46b198ed 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -1000,7 +1000,7 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_ps)
 ///
 /// Note that there appears to be a mistake within Intel's Intrinsics Guide.
-/// `_mm_shuffle_ps` is supposed to take an `i32` instead of an `u32`
+/// `_mm_shuffle_ps` is supposed to take an `i32` instead of a `u32`
 /// as is the case for [other shuffle intrinsics](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_).
 /// Performing an implicit type conversion between an unsigned integer and a signed integer
 /// does not cause a problem in C, however Rust's commitment to strong typing does not allow this.
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index e21ee6c3cb2..f2df1fbcdf3 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -2,7 +2,7 @@
 //!
 //! The features are detected using the `detect_features` function below.
 //! This function uses the CPUID instruction to read the feature flags from the
-//! CPU and encodes them in an `usize` where each bit position represents
+//! CPU and encodes them in a `usize` where each bit position represents
 //! whether a feature is available (bit is set) or unavaiable (bit is cleared).
 //!
 //! The enum `Feature` is used to map bit positions to feature names, and the
-- 
cgit 1.4.1-3-g733a5


From 26cce19427e2a9c4b76e280f52cac380d6fcc84d Mon Sep 17 00:00:00 2001
From: Hans Kratz <hans@appfour.com>
Date: Mon, 20 Sep 2021 18:19:05 +0200
Subject: Make dedup guard optional (#1215)

---
 library/stdarch/ci/run.sh                          |   9 +-
 .../stdarch/crates/assert-instr-macro/src/lib.rs   |  57 +-
 .../crates/core_arch/src/aarch64/neon/generated.rs | 184 ++---
 .../crates/core_arch/src/aarch64/neon/mod.rs       |  14 +-
 library/stdarch/crates/core_arch/src/arm/neon.rs   |  44 +-
 .../core_arch/src/arm_shared/neon/generated.rs     | 880 ++++++++++-----------
 .../crates/core_arch/src/arm_shared/neon/mod.rs    |   8 +-
 .../stdarch/crates/core_arch/src/x86/avx512bw.rs   |  23 +-
 .../stdarch/crates/core_arch/src/x86_64/avx512f.rs |   4 +-
 .../std_detect/src/detect/os/linux/auxvec.rs       |   6 +-
 library/stdarch/crates/stdarch-gen/neon.spec       |  38 +-
 11 files changed, 647 insertions(+), 620 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/ci/run.sh b/library/stdarch/ci/run.sh
index e5e6aed1c59..1d501492b66 100755
--- a/library/stdarch/ci/run.sh
+++ b/library/stdarch/ci/run.sh
@@ -10,10 +10,17 @@ set -ex
 #export RUST_TEST_NOCAPTURE=1
 #export RUST_TEST_THREADS=1
 
-RUSTFLAGS="$RUSTFLAGS -D warnings "
+export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled "
+
+export STDARCH_DISABLE_DEDUP_GUARD=1
 
 case ${TARGET} in
+    # On Windows the linker performs identical COMDAT folding (ICF) by default
+    # in release mode which removes identical COMDAT sections. This interferes
+    # with our instruction assertions just like LLVM's MergeFunctions pass so
+    # we disable it.
     *-pc-windows-msvc)
+        export RUSTFLAGS="${RUSTFLAGS} -Clink-args=/OPT:NOICF"
         ;;
     # On 32-bit use a static relocation model which avoids some extra
     # instructions when dealing with static data, notably allowing some
diff --git a/library/stdarch/crates/assert-instr-macro/src/lib.rs b/library/stdarch/crates/assert-instr-macro/src/lib.rs
index 89de99746e2..9fa411df3d2 100644
--- a/library/stdarch/crates/assert-instr-macro/src/lib.rs
+++ b/library/stdarch/crates/assert-instr-macro/src/lib.rs
@@ -41,6 +41,10 @@ pub fn assert_instr(
     // testing for.
     let disable_assert_instr = std::env::var("STDARCH_DISABLE_ASSERT_INSTR").is_ok();
 
+    // Disable dedup guard. Only works if the LLVM MergeFunctions pass is disabled, e.g.
+    // with `-Z merge-functions=disabled` in RUSTFLAGS.
+    let disable_dedup_guard = std::env::var("STDARCH_DISABLE_DEDUP_GUARD").is_ok();
+
     // If instruction tests are disabled avoid emitting this shim at all, just
     // return the original item without our attribute.
     if !cfg!(optimized) || disable_assert_instr {
@@ -128,27 +132,38 @@ pub fn assert_instr(
         syn::LitStr::new("C", proc_macro2::Span::call_site())
     };
     let shim_name_str = format!("{}{}", shim_name, assert_name);
-    let to_test = quote! {
-
-        const #shim_name_ptr : *const u8 = #shim_name_str.as_ptr();
-
-        #attrs
-        #[no_mangle]
-        #[inline(never)]
-        pub unsafe extern #abi fn #shim_name(#(#inputs),*) #ret {
-            // The compiler in optimized mode by default runs a pass called
-            // "mergefunc" where it'll merge functions that look identical.
-            // Turns out some intrinsics produce identical code and they're
-            // folded together, meaning that one just jumps to another. This
-            // messes up our inspection of the disassembly of this function and
-            // we're not a huge fan of that.
-            //
-            // To thwart this pass and prevent functions from being merged we
-            // generate some code that's hopefully very tight in terms of
-            // codegen but is otherwise unique to prevent code from being
-            // folded.
-            ::stdarch_test::_DONT_DEDUP = #shim_name_ptr;
-            #name::<#(#const_vals),*>(#(#input_vals),*)
+    let to_test = if disable_dedup_guard {
+        quote! {
+            #attrs
+            #[no_mangle]
+            #[inline(never)]
+            pub unsafe extern #abi fn #shim_name(#(#inputs),*) #ret {
+                #name::<#(#const_vals),*>(#(#input_vals),*)
+            }
+        }
+    } else {
+        quote! {
+
+            const #shim_name_ptr : *const u8 = #shim_name_str.as_ptr();
+
+            #attrs
+            #[no_mangle]
+            #[inline(never)]
+            pub unsafe extern #abi fn #shim_name(#(#inputs),*) #ret {
+                // The compiler in optimized mode by default runs a pass called
+                // "mergefunc" where it'll merge functions that look identical.
+                // Turns out some intrinsics produce identical code and they're
+                // folded together, meaning that one just jumps to another. This
+                // messes up our inspection of the disassembly of this function and
+                // we're not a huge fan of that.
+                //
+                // To thwart this pass and prevent functions from being merged we
+                // generate some code that's hopefully very tight in terms of
+                // codegen but is otherwise unique to prevent code from being
+                // folded.
+                ::stdarch_test::_DONT_DEDUP = #shim_name_ptr;
+                #name::<#(#const_vals),*>(#(#input_vals),*)
+            }
         }
     };
 
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index dd6494d5f68..d441ee562a3 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -7448,7 +7448,7 @@ pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t {
     transmute(a)
 }
@@ -7456,7 +7456,7 @@ pub unsafe fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
     transmute(a)
 }
@@ -7464,7 +7464,7 @@ pub unsafe fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
     transmute(a)
 }
@@ -7472,7 +7472,7 @@ pub unsafe fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
     transmute(a)
 }
@@ -7480,7 +7480,7 @@ pub unsafe fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
     transmute(a)
 }
@@ -7488,7 +7488,7 @@ pub unsafe fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
     transmute(a)
 }
@@ -7496,7 +7496,7 @@ pub unsafe fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
     transmute(a)
 }
@@ -7504,7 +7504,7 @@ pub unsafe fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
     transmute(a)
 }
@@ -7512,7 +7512,7 @@ pub unsafe fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
     transmute(a)
 }
@@ -7520,7 +7520,7 @@ pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
     transmute(a)
 }
@@ -7528,7 +7528,7 @@ pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
     transmute(a)
 }
@@ -7536,7 +7536,7 @@ pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
     transmute(a)
 }
@@ -7544,7 +7544,7 @@ pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     transmute(a)
 }
@@ -7552,7 +7552,7 @@ pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     transmute(a)
 }
@@ -7560,7 +7560,7 @@ pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     transmute(a)
 }
@@ -7568,7 +7568,7 @@ pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     transmute(a)
 }
@@ -7576,7 +7576,7 @@ pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
     transmute(a)
 }
@@ -7584,7 +7584,7 @@ pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
     transmute(a)
 }
@@ -7592,7 +7592,7 @@ pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
     transmute(a)
 }
@@ -7600,7 +7600,7 @@ pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
     transmute(a)
 }
@@ -7608,7 +7608,7 @@ pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
     transmute(a)
 }
@@ -7616,7 +7616,7 @@ pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
     transmute(a)
 }
@@ -7624,7 +7624,7 @@ pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     transmute(a)
 }
@@ -7632,7 +7632,7 @@ pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     transmute(a)
 }
@@ -7640,7 +7640,7 @@ pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     transmute(a)
 }
@@ -7648,7 +7648,7 @@ pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
     transmute(a)
 }
@@ -7656,7 +7656,7 @@ pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     transmute(a)
 }
@@ -7664,7 +7664,7 @@ pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     transmute(a)
 }
@@ -7672,7 +7672,7 @@ pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
     transmute(a)
 }
@@ -7680,7 +7680,7 @@ pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
     transmute(a)
 }
@@ -7688,7 +7688,7 @@ pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
     transmute(a)
 }
@@ -7696,7 +7696,7 @@ pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     transmute(a)
 }
@@ -7704,7 +7704,7 @@ pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
     transmute(a)
 }
@@ -7712,7 +7712,7 @@ pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
     transmute(a)
 }
@@ -7720,7 +7720,7 @@ pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     transmute(a)
 }
@@ -7728,7 +7728,7 @@ pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     transmute(a)
 }
@@ -7736,7 +7736,7 @@ pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     transmute(a)
 }
@@ -7744,7 +7744,7 @@ pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     transmute(a)
 }
@@ -7752,7 +7752,7 @@ pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     transmute(a)
 }
@@ -7760,7 +7760,7 @@ pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     transmute(a)
 }
@@ -7768,7 +7768,7 @@ pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
     transmute(a)
 }
@@ -7776,7 +7776,7 @@ pub unsafe fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
     transmute(a)
 }
@@ -7784,7 +7784,7 @@ pub unsafe fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
     transmute(a)
 }
@@ -7792,7 +7792,7 @@ pub unsafe fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
     transmute(a)
 }
@@ -7800,7 +7800,7 @@ pub unsafe fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
     transmute(a)
 }
@@ -7808,7 +7808,7 @@ pub unsafe fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
     transmute(a)
 }
@@ -7816,7 +7816,7 @@ pub unsafe fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
     transmute(a)
 }
@@ -7824,7 +7824,7 @@ pub unsafe fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
     transmute(a)
 }
@@ -7832,7 +7832,7 @@ pub unsafe fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
     transmute(a)
 }
@@ -7840,7 +7840,7 @@ pub unsafe fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
     transmute(a)
 }
@@ -7848,7 +7848,7 @@ pub unsafe fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
     transmute(a)
 }
@@ -7856,7 +7856,7 @@ pub unsafe fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
     transmute(a)
 }
@@ -7864,7 +7864,7 @@ pub unsafe fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
     transmute(a)
 }
@@ -7872,7 +7872,7 @@ pub unsafe fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
     transmute(a)
 }
@@ -7880,7 +7880,7 @@ pub unsafe fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
     transmute(a)
 }
@@ -7888,7 +7888,7 @@ pub unsafe fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
     transmute(a)
 }
@@ -7896,7 +7896,7 @@ pub unsafe fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
     transmute(a)
 }
@@ -7904,7 +7904,7 @@ pub unsafe fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
     transmute(a)
 }
@@ -7912,7 +7912,7 @@ pub unsafe fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
     transmute(a)
 }
@@ -7920,7 +7920,7 @@ pub unsafe fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
     transmute(a)
 }
@@ -7928,7 +7928,7 @@ pub unsafe fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
     transmute(a)
 }
@@ -7936,7 +7936,7 @@ pub unsafe fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
     transmute(a)
 }
@@ -7944,7 +7944,7 @@ pub unsafe fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
     transmute(a)
 }
@@ -7952,7 +7952,7 @@ pub unsafe fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
     transmute(a)
 }
@@ -7960,7 +7960,7 @@ pub unsafe fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
     transmute(a)
 }
@@ -7968,7 +7968,7 @@ pub unsafe fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
     transmute(a)
 }
@@ -7976,7 +7976,7 @@ pub unsafe fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
     transmute(a)
 }
@@ -7984,7 +7984,7 @@ pub unsafe fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
     transmute(a)
 }
@@ -7992,7 +7992,7 @@ pub unsafe fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
     transmute(a)
 }
@@ -8000,7 +8000,7 @@ pub unsafe fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
     transmute(a)
 }
@@ -8008,7 +8008,7 @@ pub unsafe fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
     transmute(a)
 }
@@ -8016,7 +8016,7 @@ pub unsafe fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
     transmute(a)
 }
@@ -8024,7 +8024,7 @@ pub unsafe fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
     transmute(a)
 }
@@ -8032,7 +8032,7 @@ pub unsafe fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
     transmute(a)
 }
@@ -8040,7 +8040,7 @@ pub unsafe fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
     transmute(a)
 }
@@ -8048,7 +8048,7 @@ pub unsafe fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
     transmute(a)
 }
@@ -8056,7 +8056,7 @@ pub unsafe fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
     transmute(a)
 }
@@ -8064,7 +8064,7 @@ pub unsafe fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
     transmute(a)
 }
@@ -8072,7 +8072,7 @@ pub unsafe fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
     transmute(a)
 }
@@ -8080,7 +8080,7 @@ pub unsafe fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
     transmute(a)
 }
@@ -8088,7 +8088,7 @@ pub unsafe fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
     transmute(a)
 }
@@ -8096,7 +8096,7 @@ pub unsafe fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
     transmute(a)
 }
@@ -8104,7 +8104,7 @@ pub unsafe fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t {
     transmute(a)
 }
@@ -8112,7 +8112,7 @@ pub unsafe fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
     transmute(a)
 }
@@ -8120,7 +8120,7 @@ pub unsafe fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
     transmute(a)
 }
@@ -8128,7 +8128,7 @@ pub unsafe fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
     transmute(a)
 }
@@ -8136,7 +8136,7 @@ pub unsafe fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
     transmute(a)
 }
@@ -8144,7 +8144,7 @@ pub unsafe fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
     transmute(a)
 }
@@ -8152,7 +8152,7 @@ pub unsafe fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
     transmute(a)
 }
@@ -8160,7 +8160,7 @@ pub unsafe fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
     transmute(a)
 }
@@ -8168,7 +8168,7 @@ pub unsafe fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
     transmute(a)
 }
@@ -8176,7 +8176,7 @@ pub unsafe fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
     transmute(a)
 }
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index 0d4c77a7371..f40ed5c338e 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -1709,7 +1709,7 @@ pub unsafe fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 /// Extract vector from pair of vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str, N = 0))]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn vext_p64<const N: i32>(a: poly64x1_t, _b: poly64x1_t) -> poly64x1_t {
     if N != 0 {
@@ -1721,7 +1721,7 @@ pub unsafe fn vext_p64<const N: i32>(a: poly64x1_t, _b: poly64x1_t) -> poly64x1_
 /// Extract vector from pair of vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str, N = 0))]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn vext_f64<const N: i32>(a: float64x1_t, _b: float64x1_t) -> float64x1_t {
     if N != 0 {
@@ -1820,7 +1820,7 @@ pub unsafe fn vdup_n_p64(value: p64) -> poly64x1_t {
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vdup_n_f64(value: f64) -> float64x1_t {
     float64x1_t(value)
 }
@@ -1852,7 +1852,7 @@ pub unsafe fn vmov_n_p64(value: p64) -> poly64x1_t {
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vmov_n_f64(value: f64) -> float64x1_t {
     vdup_n_f64(value)
 }
@@ -1884,7 +1884,7 @@ pub unsafe fn vget_high_f64(a: float64x2_t) -> float64x1_t {
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(ext))]
 pub unsafe fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
     transmute(u64x1::new(simd_extract(a, 1)))
 }
@@ -1892,7 +1892,7 @@ pub unsafe fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vget_low_f64(a: float64x2_t) -> float64x1_t {
     float64x1_t(simd_extract(a, 0))
 }
@@ -1900,7 +1900,7 @@ pub unsafe fn vget_low_f64(a: float64x2_t) -> float64x1_t {
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
     transmute(u64x1::new(simd_extract(a, 0)))
 }
diff --git a/library/stdarch/crates/core_arch/src/arm/neon.rs b/library/stdarch/crates/core_arch/src/arm/neon.rs
index b38a4f92271..ebcff041a64 100644
--- a/library/stdarch/crates/core_arch/src/arm/neon.rs
+++ b/library/stdarch/crates/core_arch/src/arm/neon.rs
@@ -323,7 +323,7 @@ pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.8"))]
 pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
     vst1_v8i8(ptr as *const i8, a, align_of::<i8>() as i32)
 }
@@ -331,7 +331,7 @@ pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.8"))]
 pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
     vst1q_v16i8(ptr as *const i8, a, align_of::<i8>() as i32)
 }
@@ -339,7 +339,7 @@ pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.16"))]
 pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
     vst1_v4i16(ptr as *const i8, a, align_of::<i16>() as i32)
 }
@@ -347,7 +347,7 @@ pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.16"))]
 pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
     vst1q_v8i16(ptr as *const i8, a, align_of::<i16>() as i32)
 }
@@ -355,7 +355,7 @@ pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.32"))]
 pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
     vst1_v2i32(ptr as *const i8, a, align_of::<i32>() as i32)
 }
@@ -363,7 +363,7 @@ pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.32"))]
 pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
     vst1q_v4i32(ptr as *const i8, a, align_of::<i32>() as i32)
 }
@@ -371,7 +371,7 @@ pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.64"))]
 pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
     vst1_v1i64(ptr as *const i8, a, align_of::<i64>() as i32)
 }
@@ -379,7 +379,7 @@ pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.64"))]
 pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
     vst1q_v2i64(ptr as *const i8, a, align_of::<i64>() as i32)
 }
@@ -387,7 +387,7 @@ pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.8"))]
 pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
     vst1_v8i8(ptr as *const i8, transmute(a), align_of::<u8>() as i32)
 }
@@ -395,7 +395,7 @@ pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.8"))]
 pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
     vst1q_v16i8(ptr as *const i8, transmute(a), align_of::<u8>() as i32)
 }
@@ -403,7 +403,7 @@ pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.16"))]
 pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
     vst1_v4i16(ptr as *const i8, transmute(a), align_of::<u16>() as i32)
 }
@@ -411,7 +411,7 @@ pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.16"))]
 pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
     vst1q_v8i16(ptr as *const i8, transmute(a), align_of::<u16>() as i32)
 }
@@ -419,7 +419,7 @@ pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.32"))]
 pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
     vst1_v2i32(ptr as *const i8, transmute(a), align_of::<u32>() as i32)
 }
@@ -427,7 +427,7 @@ pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.32"))]
 pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
     vst1q_v4i32(ptr as *const i8, transmute(a), align_of::<u32>() as i32)
 }
@@ -435,7 +435,7 @@ pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.64"))]
 pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
     vst1_v1i64(ptr as *const i8, transmute(a), align_of::<u64>() as i32)
 }
@@ -443,7 +443,7 @@ pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.64"))]
 pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
     vst1q_v2i64(ptr as *const i8, transmute(a), align_of::<u64>() as i32)
 }
@@ -451,7 +451,7 @@ pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.8"))]
 pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
     vst1_v8i8(ptr as *const i8, transmute(a), align_of::<p8>() as i32)
 }
@@ -459,7 +459,7 @@ pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.8"))]
 pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
     vst1q_v16i8(ptr as *const i8, transmute(a), align_of::<p8>() as i32)
 }
@@ -467,7 +467,7 @@ pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.16"))]
 pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
     vst1_v4i16(ptr as *const i8, transmute(a), align_of::<p16>() as i32)
 }
@@ -475,7 +475,7 @@ pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
 /// Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.16"))]
 pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
     vst1q_v8i16(ptr as *const i8, transmute(a), align_of::<p8>() as i32)
 }
@@ -483,7 +483,7 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
 // Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.32"))]
 pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
     vst1_v2f32(ptr as *const i8, a, align_of::<f32>() as i32)
 }
@@ -491,7 +491,7 @@ pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
 // Store multiple single-element structures from one, two, three, or four registers.
 #[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(str))]
+#[cfg_attr(test, assert_instr("vst1.32"))]
 pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
     vst1q_v4f32(ptr as *const i8, a, align_of::<f32>() as i32)
 }
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 9ce0e67980b..69d8da17952 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -12246,8 +12246,8 @@ vrecpeq_f32_(a)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
     transmute(a)
 }
@@ -12256,8 +12256,8 @@ pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     transmute(a)
 }
@@ -12266,8 +12266,8 @@ pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     transmute(a)
 }
@@ -12276,8 +12276,8 @@ pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
     transmute(a)
 }
@@ -12286,8 +12286,8 @@ pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
     transmute(a)
 }
@@ -12296,8 +12296,8 @@ pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
     transmute(a)
 }
@@ -12306,8 +12306,8 @@ pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
     transmute(a)
 }
@@ -12316,8 +12316,8 @@ pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     transmute(a)
 }
@@ -12326,8 +12326,8 @@ pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     transmute(a)
 }
@@ -12336,8 +12336,8 @@ pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
     transmute(a)
 }
@@ -12346,8 +12346,8 @@ pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
     transmute(a)
 }
@@ -12356,8 +12356,8 @@ pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
     transmute(a)
 }
@@ -12366,8 +12366,8 @@ pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
     transmute(a)
 }
@@ -12376,8 +12376,8 @@ pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
     transmute(a)
 }
@@ -12386,8 +12386,8 @@ pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
     transmute(a)
 }
@@ -12396,8 +12396,8 @@ pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
     transmute(a)
 }
@@ -12406,8 +12406,8 @@ pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
     transmute(a)
 }
@@ -12416,8 +12416,8 @@ pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
     transmute(a)
 }
@@ -12426,8 +12426,8 @@ pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
     transmute(a)
 }
@@ -12436,8 +12436,8 @@ pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
     transmute(a)
 }
@@ -12446,8 +12446,8 @@ pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
     transmute(a)
 }
@@ -12456,8 +12456,8 @@ pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
     transmute(a)
 }
@@ -12466,8 +12466,8 @@ pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
     transmute(a)
 }
@@ -12476,8 +12476,8 @@ pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
     transmute(a)
 }
@@ -12486,8 +12486,8 @@ pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
     transmute(a)
 }
@@ -12496,8 +12496,8 @@ pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
     transmute(a)
 }
@@ -12506,8 +12506,8 @@ pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
     transmute(a)
 }
@@ -12516,8 +12516,8 @@ pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
     transmute(a)
 }
@@ -12526,8 +12526,8 @@ pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
     transmute(a)
 }
@@ -12536,8 +12536,8 @@ pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
     transmute(a)
 }
@@ -12546,8 +12546,8 @@ pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
     transmute(a)
 }
@@ -12556,8 +12556,8 @@ pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
     transmute(a)
 }
@@ -12566,8 +12566,8 @@ pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
     transmute(a)
 }
@@ -12576,8 +12576,8 @@ pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     transmute(a)
 }
@@ -12586,8 +12586,8 @@ pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     transmute(a)
 }
@@ -12596,8 +12596,8 @@ pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     transmute(a)
 }
@@ -12606,8 +12606,8 @@ pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     transmute(a)
 }
@@ -12616,8 +12616,8 @@ pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
     transmute(a)
 }
@@ -12626,8 +12626,8 @@ pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     transmute(a)
 }
@@ -12636,8 +12636,8 @@ pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     transmute(a)
 }
@@ -12646,8 +12646,8 @@ pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     transmute(a)
 }
@@ -12656,8 +12656,8 @@ pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     transmute(a)
 }
@@ -12666,8 +12666,8 @@ pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     transmute(a)
 }
@@ -12676,8 +12676,8 @@ pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     transmute(a)
 }
@@ -12686,8 +12686,8 @@ pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     transmute(a)
 }
@@ -12696,8 +12696,8 @@ pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     transmute(a)
 }
@@ -12706,8 +12706,8 @@ pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     transmute(a)
 }
@@ -12716,8 +12716,8 @@ pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
     transmute(a)
 }
@@ -12726,8 +12726,8 @@ pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
     transmute(a)
 }
@@ -12736,8 +12736,8 @@ pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     transmute(a)
 }
@@ -12746,8 +12746,8 @@ pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     transmute(a)
 }
@@ -12756,8 +12756,8 @@ pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
     transmute(a)
 }
@@ -12766,8 +12766,8 @@ pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     transmute(a)
 }
@@ -12776,8 +12776,8 @@ pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     transmute(a)
 }
@@ -12786,8 +12786,8 @@ pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     transmute(a)
 }
@@ -12796,8 +12796,8 @@ pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     transmute(a)
 }
@@ -12806,8 +12806,8 @@ pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     transmute(a)
 }
@@ -12816,8 +12816,8 @@ pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     transmute(a)
 }
@@ -12826,8 +12826,8 @@ pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     transmute(a)
 }
@@ -12836,8 +12836,8 @@ pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     transmute(a)
 }
@@ -12846,8 +12846,8 @@ pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     transmute(a)
 }
@@ -12856,8 +12856,8 @@ pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
     transmute(a)
 }
@@ -12866,8 +12866,8 @@ pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     transmute(a)
 }
@@ -12876,8 +12876,8 @@ pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
     transmute(a)
 }
@@ -12886,8 +12886,8 @@ pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     transmute(a)
 }
@@ -12896,8 +12896,8 @@ pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     transmute(a)
 }
@@ -12906,8 +12906,8 @@ pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     transmute(a)
 }
@@ -12916,8 +12916,8 @@ pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     transmute(a)
 }
@@ -12926,8 +12926,8 @@ pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
     transmute(a)
 }
@@ -12936,8 +12936,8 @@ pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     transmute(a)
 }
@@ -12946,8 +12946,8 @@ pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     transmute(a)
 }
@@ -12956,8 +12956,8 @@ pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
     transmute(a)
 }
@@ -12966,8 +12966,8 @@ pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
     transmute(a)
 }
@@ -12976,8 +12976,8 @@ pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     transmute(a)
 }
@@ -12986,8 +12986,8 @@ pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
     transmute(a)
 }
@@ -12996,8 +12996,8 @@ pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
     transmute(a)
 }
@@ -13006,8 +13006,8 @@ pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
     transmute(a)
 }
@@ -13016,8 +13016,8 @@ pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     transmute(a)
 }
@@ -13026,8 +13026,8 @@ pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     transmute(a)
 }
@@ -13036,8 +13036,8 @@ pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     transmute(a)
 }
@@ -13046,8 +13046,8 @@ pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     transmute(a)
 }
@@ -13056,8 +13056,8 @@ pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     transmute(a)
 }
@@ -13066,8 +13066,8 @@ pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
     transmute(a)
 }
@@ -13076,8 +13076,8 @@ pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     transmute(a)
 }
@@ -13086,8 +13086,8 @@ pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     transmute(a)
 }
@@ -13096,8 +13096,8 @@ pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     transmute(a)
 }
@@ -13106,8 +13106,8 @@ pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     transmute(a)
 }
@@ -13116,8 +13116,8 @@ pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     transmute(a)
 }
@@ -13126,8 +13126,8 @@ pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
     transmute(a)
 }
@@ -13136,8 +13136,8 @@ pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     transmute(a)
 }
@@ -13146,8 +13146,8 @@ pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
     transmute(a)
 }
@@ -13156,8 +13156,8 @@ pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     transmute(a)
 }
@@ -13166,8 +13166,8 @@ pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
     transmute(a)
 }
@@ -13176,8 +13176,8 @@ pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     transmute(a)
 }
@@ -13186,8 +13186,8 @@ pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     transmute(a)
 }
@@ -13196,8 +13196,8 @@ pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     transmute(a)
 }
@@ -13206,8 +13206,8 @@ pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     transmute(a)
 }
@@ -13216,8 +13216,8 @@ pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     transmute(a)
 }
@@ -13226,8 +13226,8 @@ pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     transmute(a)
 }
@@ -13236,8 +13236,8 @@ pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     transmute(a)
 }
@@ -13246,8 +13246,8 @@ pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
     transmute(a)
 }
@@ -13256,8 +13256,8 @@ pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     transmute(a)
 }
@@ -13266,8 +13266,8 @@ pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     transmute(a)
 }
@@ -13276,8 +13276,8 @@ pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     transmute(a)
 }
@@ -13286,8 +13286,8 @@ pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
     transmute(a)
 }
@@ -13296,8 +13296,8 @@ pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     transmute(a)
 }
@@ -13306,8 +13306,8 @@ pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
     transmute(a)
 }
@@ -13316,8 +13316,8 @@ pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     transmute(a)
 }
@@ -13326,8 +13326,8 @@ pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     transmute(a)
 }
@@ -13336,8 +13336,8 @@ pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     transmute(a)
 }
@@ -13346,8 +13346,8 @@ pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     transmute(a)
 }
@@ -13356,8 +13356,8 @@ pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     transmute(a)
 }
@@ -13366,8 +13366,8 @@ pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     transmute(a)
 }
@@ -13376,8 +13376,8 @@ pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     transmute(a)
 }
@@ -13386,8 +13386,8 @@ pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     transmute(a)
 }
@@ -13396,8 +13396,8 @@ pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     transmute(a)
 }
@@ -13406,8 +13406,8 @@ pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     transmute(a)
 }
@@ -13416,8 +13416,8 @@ pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     transmute(a)
 }
@@ -13426,8 +13426,8 @@ pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     transmute(a)
 }
@@ -13436,8 +13436,8 @@ pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     transmute(a)
 }
@@ -13446,8 +13446,8 @@ pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     transmute(a)
 }
@@ -13456,8 +13456,8 @@ pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     transmute(a)
 }
@@ -13466,8 +13466,8 @@ pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     transmute(a)
 }
@@ -13476,8 +13476,8 @@ pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     transmute(a)
 }
@@ -13486,8 +13486,8 @@ pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     transmute(a)
 }
@@ -13496,8 +13496,8 @@ pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     transmute(a)
 }
@@ -13506,8 +13506,8 @@ pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
     transmute(a)
 }
@@ -13516,8 +13516,8 @@ pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     transmute(a)
 }
@@ -13526,8 +13526,8 @@ pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     transmute(a)
 }
@@ -13536,8 +13536,8 @@ pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     transmute(a)
 }
@@ -13546,8 +13546,8 @@ pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
     transmute(a)
 }
@@ -13556,8 +13556,8 @@ pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     transmute(a)
 }
@@ -13566,8 +13566,8 @@ pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     transmute(a)
 }
@@ -13576,8 +13576,8 @@ pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     transmute(a)
 }
@@ -13586,8 +13586,8 @@ pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     transmute(a)
 }
@@ -13596,8 +13596,8 @@ pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     transmute(a)
 }
@@ -13606,8 +13606,8 @@ pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
     transmute(a)
 }
@@ -13616,8 +13616,8 @@ pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     transmute(a)
 }
@@ -13626,8 +13626,8 @@ pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     transmute(a)
 }
@@ -13636,8 +13636,8 @@ pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     transmute(a)
 }
@@ -13646,8 +13646,8 @@ pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     transmute(a)
 }
@@ -13656,8 +13656,8 @@ pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     transmute(a)
 }
@@ -13666,8 +13666,8 @@ pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     transmute(a)
 }
@@ -13676,8 +13676,8 @@ pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     transmute(a)
 }
@@ -13686,8 +13686,8 @@ pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     transmute(a)
 }
@@ -13696,8 +13696,8 @@ pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
     transmute(a)
 }
@@ -13706,8 +13706,8 @@ pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     transmute(a)
 }
@@ -13716,8 +13716,8 @@ pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     transmute(a)
 }
@@ -13726,8 +13726,8 @@ pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
     transmute(a)
 }
@@ -13736,8 +13736,8 @@ pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
     transmute(a)
 }
@@ -13746,8 +13746,8 @@ pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     transmute(a)
 }
@@ -13756,8 +13756,8 @@ pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
     transmute(a)
 }
@@ -13766,8 +13766,8 @@ pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     transmute(a)
 }
@@ -13776,8 +13776,8 @@ pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     transmute(a)
 }
@@ -13786,8 +13786,8 @@ pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
     transmute(a)
 }
@@ -13796,8 +13796,8 @@ pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     transmute(a)
 }
@@ -13806,8 +13806,8 @@ pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     transmute(a)
 }
@@ -13816,8 +13816,8 @@ pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     transmute(a)
 }
@@ -13826,8 +13826,8 @@ pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     transmute(a)
 }
@@ -13836,8 +13836,8 @@ pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     transmute(a)
 }
@@ -13846,8 +13846,8 @@ pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     transmute(a)
 }
@@ -13856,8 +13856,8 @@ pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     transmute(a)
 }
@@ -13866,8 +13866,8 @@ pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     transmute(a)
 }
@@ -13876,8 +13876,8 @@ pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     transmute(a)
 }
@@ -13886,8 +13886,8 @@ pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     transmute(a)
 }
@@ -13896,8 +13896,8 @@ pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     transmute(a)
 }
@@ -13906,8 +13906,8 @@ pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     transmute(a)
 }
@@ -13916,8 +13916,8 @@ pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     transmute(a)
 }
@@ -13926,8 +13926,8 @@ pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     transmute(a)
 }
@@ -13936,8 +13936,8 @@ pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
     transmute(a)
 }
@@ -13946,8 +13946,8 @@ pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
     transmute(a)
 }
@@ -13956,8 +13956,8 @@ pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     transmute(a)
 }
@@ -13966,8 +13966,8 @@ pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     transmute(a)
 }
@@ -13976,8 +13976,8 @@ pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
     transmute(a)
 }
@@ -13986,8 +13986,8 @@ pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     transmute(a)
 }
@@ -13996,8 +13996,8 @@ pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
     transmute(a)
 }
@@ -14006,8 +14006,8 @@ pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
     transmute(a)
 }
@@ -14016,8 +14016,8 @@ pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     transmute(a)
 }
@@ -14026,8 +14026,8 @@ pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
     transmute(a)
 }
@@ -14036,8 +14036,8 @@ pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     transmute(a)
 }
@@ -14046,8 +14046,8 @@ pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
     transmute(a)
 }
@@ -14056,8 +14056,8 @@ pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
     transmute(a)
 }
@@ -14066,8 +14066,8 @@ pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
     transmute(a)
 }
@@ -14076,8 +14076,8 @@ pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
     transmute(a)
 }
@@ -14086,8 +14086,8 @@ pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
     transmute(a)
 }
@@ -14096,8 +14096,8 @@ pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
     transmute(a)
 }
@@ -14106,8 +14106,8 @@ pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
     transmute(a)
 }
@@ -14116,8 +14116,8 @@ pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
     transmute(a)
 }
@@ -14126,8 +14126,8 @@ pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
     transmute(a)
 }
@@ -14136,8 +14136,8 @@ pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
     transmute(a)
 }
@@ -14146,8 +14146,8 @@ pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
     transmute(a)
 }
@@ -14156,8 +14156,8 @@ pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
     transmute(a)
 }
@@ -14166,8 +14166,8 @@ pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
     transmute(a)
 }
@@ -14176,8 +14176,8 @@ pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
     transmute(a)
 }
@@ -14186,8 +14186,8 @@ pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
     transmute(a)
 }
@@ -14196,8 +14196,8 @@ pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
     transmute(a)
 }
@@ -14206,8 +14206,8 @@ pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
     transmute(a)
 }
@@ -14216,8 +14216,8 @@ pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
     transmute(a)
 }
@@ -14226,8 +14226,8 @@ pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
     transmute(a)
 }
@@ -14236,8 +14236,8 @@ pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
     transmute(a)
 }
@@ -14246,8 +14246,8 @@ pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
     transmute(a)
 }
@@ -14256,8 +14256,8 @@ pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
     transmute(a)
 }
@@ -14266,8 +14266,8 @@ pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
     transmute(a)
 }
@@ -14276,8 +14276,8 @@ pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
     transmute(a)
 }
@@ -14286,8 +14286,8 @@ pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
     transmute(a)
 }
@@ -14296,8 +14296,8 @@ pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
     transmute(a)
 }
@@ -14306,8 +14306,8 @@ pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
     transmute(a)
 }
@@ -14316,8 +14316,8 @@ pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
     transmute(a)
 }
@@ -14326,8 +14326,8 @@ pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
     transmute(a)
 }
@@ -14336,8 +14336,8 @@ pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
     transmute(a)
 }
@@ -14346,8 +14346,8 @@ pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     transmute(a)
 }
@@ -14356,8 +14356,8 @@ pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     transmute(a)
 }
@@ -14366,8 +14366,8 @@ pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     transmute(a)
 }
@@ -14376,8 +14376,8 @@ pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
     transmute(a)
 }
@@ -14386,8 +14386,8 @@ pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     transmute(a)
 }
@@ -14396,8 +14396,8 @@ pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     transmute(a)
 }
@@ -14406,8 +14406,8 @@ pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     transmute(a)
 }
@@ -14416,8 +14416,8 @@ pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     transmute(a)
 }
@@ -14426,8 +14426,8 @@ pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     transmute(a)
 }
@@ -14436,8 +14436,8 @@ pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     transmute(a)
 }
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
index fa03a4ca97c..d3f88abd9bb 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -4146,8 +4146,8 @@ pub unsafe fn vmovq_n_f32(value: f32) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("str", N = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("str", N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", N = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("nop", N = 0))]
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn vext_s64<const N: i32>(a: int64x1_t, _b: int64x1_t) -> int64x1_t {
     if N != 0 {
@@ -4160,8 +4160,8 @@ pub unsafe fn vext_s64<const N: i32>(a: int64x1_t, _b: int64x1_t) -> int64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("str", N = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("str", N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", N = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("nop", N = 0))]
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn vext_u64<const N: i32>(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_t {
     if N != 0 {
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
index 4834f19edd0..10e00963390 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -8013,7 +8013,7 @@ pub unsafe fn _mm_maskz_dbsad_epu8<const IMM8: i32>(
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movepi16_mask&expand=3873)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(mov))] // should be vpmovw2m but msvc does not generate it
+#[cfg_attr(test, assert_instr(vpmovw2m))]
 pub unsafe fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
     let filter = _mm512_set1_epi16(1 << 15);
     let a = _mm512_and_si512(a, filter);
@@ -8025,7 +8025,7 @@ pub unsafe fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movepi16_mask&expand=3872)
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(mov))] // should be vpmovw2m but msvc does not generate it
+#[cfg_attr(test, assert_instr(vpmovw2m))]
 pub unsafe fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
     let filter = _mm256_set1_epi16(1 << 15);
     let a = _mm256_and_si256(a, filter);
@@ -8037,7 +8037,7 @@ pub unsafe fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi16_mask&expand=3871)
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(mov))] // should be vpmovw2m but msvc does not generate it
+#[cfg_attr(test, assert_instr(vpmovw2m))]
 pub unsafe fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
     let filter = _mm_set1_epi16(1 << 15);
     let a = _mm_and_si128(a, filter);
@@ -8049,7 +8049,7 @@ pub unsafe fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movepi8_mask&expand=3883)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(mov))] // should be vpmovb2m but msvc does not generate it
+#[cfg_attr(test, assert_instr(vpmovb2m))]
 pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
     let filter = _mm512_set1_epi8(1 << 7);
     let a = _mm512_and_si512(a, filter);
@@ -8061,7 +8061,8 @@ pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movepi8_mask&expand=3882)
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(mov))] // should be vpmovb2m but msvc does not generate it
+#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
+                                           // using vpmovb2m plus converting the mask register to a standard register.
 pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
     let filter = _mm256_set1_epi8(1 << 7);
     let a = _mm256_and_si256(a, filter);
@@ -8073,7 +8074,8 @@ pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi8_mask&expand=3881)
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(mov))] // should be vpmovb2m but msvc does not generate it
+#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
+                                           // using vpmovb2m plus converting the mask register to a standard register.
 pub unsafe fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
     let filter = _mm_set1_epi8(1 << 7);
     let a = _mm_and_si128(a, filter);
@@ -8216,8 +8218,9 @@ pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask32&expand=3207)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kaddd
-                                     //llvm.x86.avx512.kadd.d
+#[cfg_attr(all(test, target_arch = "x86"), assert_instr(add))]
+#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(lea))] // generate normal lea/add code instead of kaddd
+                                                                  //llvm.x86.avx512.kadd.d
 pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
     transmute(a + b)
 }
@@ -8227,7 +8230,9 @@ pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask64&expand=3208)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kaddq
+#[cfg_attr(all(test, target_arch = "x86"), assert_instr(add))]
+#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(lea))] // generate normal lea/add code instead of kaddd
+                                                                  //llvm.x86.avx512.kadd.d
 pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
     transmute(a + b)
 }
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
index 471977821c1..5eed0502cbf 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
@@ -75,7 +75,7 @@ pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_ss&expand=2035)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2ss
+#[cfg_attr(test, assert_instr(vcvtusi2ss))]
 pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
     let b = b as f32;
     let r = simd_insert(a, 0, b);
@@ -87,7 +87,7 @@ pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sd&expand=2034)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2sd
+#[cfg_attr(test, assert_instr(vcvtusi2sd))]
 pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
     let b = b as f64;
     let r = simd_insert(a, 0, b);
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 077fc9e4c89..76908db3140 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -1,5 +1,5 @@
 //! Parses ELF auxiliary vectors.
-#![cfg_attr(not(target_arch = "aarch64"), allow(dead_code))]
+#![allow(dead_code)]
 
 pub(crate) const AT_NULL: usize = 0;
 
@@ -89,11 +89,10 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
 
     #[cfg(not(feature = "std_detect_dlsym_getauxval"))]
     {
-        let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize };
-
         // Targets with only AT_HWCAP:
         #[cfg(any(target_arch = "aarch64", target_arch = "mips", target_arch = "mips64"))]
         {
+            let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize };
             if hwcap != 0 {
                 return Ok(AuxVec { hwcap });
             }
@@ -106,6 +105,7 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
             target_arch = "powerpc64"
         ))]
         {
+            let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize };
             let hwcap2 = unsafe { libc::getauxval(AT_HWCAP2 as libc::c_ulong) as usize };
             if hwcap != 0 && hwcap2 != 0 {
                 return Ok(AuxVec { hwcap, hwcap2 });
diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec
index 8299f2bec42..a22dfe3dd4a 100644
--- a/library/stdarch/crates/stdarch-gen/neon.spec
+++ b/library/stdarch/crates/stdarch-gen/neon.spec
@@ -4525,11 +4525,11 @@ fn = transmute
 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 
-aarch64 = str
+aarch64 = nop
 generate poly64x1_t:int64x1_t, poly64x1_t:uint64x1_t, int64x1_t:poly64x1_t, uint64x1_t:poly64x1_t
 generate poly64x2_t:int64x2_t, poly64x2_t:uint64x2_t, int64x2_t:poly64x2_t, uint64x2_t:poly64x2_t
 
-arm = str
+arm = nop
 generate uint8x8_t:int8x8_t, poly8x8_t:int8x8_t, poly16x4_t:int16x4_t, uint16x4_t:int16x4_t, uint32x2_t:int32x2_t, uint64x1_t:int64x1_t
 generate uint8x16_t:int8x16_t, poly8x16_t:int8x16_t, poly16x8_t:int16x8_t, uint16x8_t:int16x8_t, uint32x4_t:int32x4_t, uint64x2_t:int64x2_t
 generate poly8x8_t:uint8x8_t, int8x8_t:uint8x8_t, poly16x4_t:uint16x4_t, int16x4_t:uint16x4_t, int32x2_t:uint32x2_t, int64x1_t:uint64x1_t
@@ -4544,11 +4544,11 @@ fn = transmute
 a = 0, 1, 2, 3, 4, 5, 6, 7
 validate 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
 
-aarch64 = str
+aarch64 = nop
 generate poly64x1_t:int32x2_t, poly64x1_t:uint32x2_t
 generate poly64x2_t:int32x4_t, poly64x2_t:uint32x4_t
 
-arm = str
+arm = nop
 generate int16x4_t:int8x8_t, uint16x4_t:int8x8_t, poly16x4_t:int8x8_t, int32x2_t:int16x4_t, uint32x2_t:int16x4_t, int64x1_t:int32x2_t, uint64x1_t:int32x2_t
 generate int16x8_t:int8x16_t, uint16x8_t:int8x16_t, poly16x8_t:int8x16_t, int32x4_t:int16x8_t, uint32x4_t:int16x8_t, int64x2_t:int32x4_t, uint64x2_t:int32x4_t
 generate poly16x4_t:uint8x8_t, int16x4_t:uint8x8_t, uint16x4_t:uint8x8_t, int32x2_t:uint16x4_t, uint32x2_t:uint16x4_t, int64x1_t:uint32x2_t, uint64x1_t:uint32x2_t
@@ -4563,11 +4563,11 @@ fn = transmute
 a = 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
 validate 0, 1, 2, 3, 4, 5, 6, 7
 
-aarch64 = str
+aarch64 = nop
 generate int32x2_t:poly64x1_t, uint32x2_t:poly64x1_t
 generate int32x4_t:poly64x2_t, uint32x4_t:poly64x2_t
 
-arm = str
+arm = nop
 generate poly8x8_t:int16x4_t, int8x8_t:int16x4_t, uint8x8_t:int16x4_t, poly16x4_t:int32x2_t, int16x4_t:int32x2_t, uint16x4_t:int32x2_t, int32x2_t:int64x1_t, uint32x2_t:int64x1_t
 generate poly8x16_t:int16x8_t, int8x16_t:int16x8_t, uint8x16_t:int16x8_t, poly16x8_t:int32x4_t, int16x8_t:int32x4_t, uint16x8_t:int32x4_t, int32x4_t:int64x2_t, uint32x4_t:int64x2_t
 generate poly8x8_t:uint16x4_t, int8x8_t:uint16x4_t, uint8x8_t:uint16x4_t, poly16x4_t:uint32x2_t, int16x4_t:uint32x2_t, uint16x4_t:uint32x2_t, int32x2_t:uint64x1_t, uint32x2_t:uint64x1_t
@@ -4582,11 +4582,11 @@ fn = transmute
 a = 0, 1, 2, 3
 validate 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
 
-aarch64 = str
+aarch64 = nop
 generate poly64x1_t:int16x4_t, poly64x1_t:uint16x4_t, poly64x1_t:poly16x4_t
 generate poly64x2_t:int16x8_t, poly64x2_t:uint16x8_t, poly64x2_t:poly16x8_t
 
-arm = str
+arm = nop
 generate int32x2_t:int8x8_t, uint32x2_t:int8x8_t, int64x1_t:int16x4_t, uint64x1_t:int16x4_t
 generate int32x4_t:int8x16_t, uint32x4_t:int8x16_t, int64x2_t:int16x8_t, uint64x2_t:int16x8_t
 generate int32x2_t:uint8x8_t, uint32x2_t:uint8x8_t, int64x1_t:uint16x4_t, uint64x1_t:uint16x4_t
@@ -4601,11 +4601,11 @@ fn = transmute
 a = 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
 validate 0, 1, 2, 3
 
-aarch64 = str
+aarch64 = nop
 generate poly16x4_t:poly64x1_t, int16x4_t:poly64x1_t, uint16x4_t:poly64x1_t
 generate poly16x8_t:poly64x2_t, int16x8_t:poly64x2_t, uint16x8_t:poly64x2_t
 
-arm = str
+arm = nop
 generate poly8x8_t:int32x2_t, int8x8_t:int32x2_t, uint8x8_t:int32x2_t, poly16x4_t:int64x1_t, int16x4_t:int64x1_t, uint16x4_t:int64x1_t
 generate poly8x16_t:int32x4_t, int8x16_t:int32x4_t, uint8x16_t:int32x4_t, poly16x8_t:int64x2_t, int16x8_t:int64x2_t, uint16x8_t:int64x2_t
 generate poly8x8_t:uint32x2_t, int8x8_t:uint32x2_t, uint8x8_t:uint32x2_t, poly16x4_t:uint64x1_t, int16x4_t:uint64x1_t, uint16x4_t:uint64x1_t
@@ -4618,11 +4618,11 @@ fn = transmute
 a = 0, 1
 validate 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
 
-aarch64 = str
+aarch64 = nop
 generate poly64x1_t:int8x8_t, poly64x1_t:uint8x8_t, poly64x1_t:poly8x8_t
 generate poly64x2_t:int8x16_t, poly64x2_t:uint8x16_t, poly64x2_t:poly8x16_t
 
-arm = str
+arm = nop
 generate int64x1_t:int8x8_t, uint64x1_t:int8x8_t, int64x1_t:uint8x8_t, uint64x1_t:uint8x8_t, int64x1_t:poly8x8_t, uint64x1_t:poly8x8_t
 generate int64x2_t:int8x16_t, uint64x2_t:int8x16_t, int64x2_t:uint8x16_t, uint64x2_t:uint8x16_t, int64x2_t:poly8x16_t, uint64x2_t:poly8x16_t
 
@@ -4633,11 +4633,11 @@ fn = transmute
 a = 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
 validate 0, 1
 
-aarch64 = str
+aarch64 = nop
 generate poly8x8_t:poly64x1_t, int8x8_t:poly64x1_t, uint8x8_t:poly64x1_t
 generate poly8x16_t:poly64x2_t, int8x16_t:poly64x2_t, uint8x16_t:poly64x2_t
 
-arm = str
+arm = nop
 generate poly8x8_t:int64x1_t, int8x8_t:int64x1_t, uint8x8_t:int64x1_t, poly8x8_t:uint64x1_t, int8x8_t:uint64x1_t, uint8x8_t:uint64x1_t
 generate poly8x16_t:int64x2_t, int8x16_t:int64x2_t, uint8x16_t:int64x2_t, poly8x16_t:uint64x2_t, int8x16_t:uint64x2_t, uint8x16_t:uint64x2_t
 
@@ -4648,7 +4648,7 @@ fn = transmute
 a = 0., 0., 0., 0., 0., 0., 0., 0.
 validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 
-aarch64 = str
+aarch64 = nop
 generate float64x1_t:int8x8_t, float64x1_t:int16x4_t, float64x1_t:int32x2_t, float64x1_t:int64x1_t
 generate float64x2_t:int8x16_t, float64x2_t:int16x8_t, float64x2_t:int32x4_t, float64x2_t:int64x2_t
 generate float64x1_t:uint8x8_t, float64x1_t:uint16x4_t, float64x1_t:uint32x2_t, float64x1_t:uint64x1_t
@@ -4656,7 +4656,7 @@ generate float64x2_t:uint8x16_t, float64x2_t:uint16x8_t, float64x2_t:uint32x4_t,
 generate float64x1_t:poly8x8_t, float64x1_t:poly16x4_t, float32x2_t:poly64x1_t, float64x1_t:poly64x1_t
 generate float64x2_t:poly8x16_t, float64x2_t:poly16x8_t, float32x4_t:poly64x2_t, float64x2_t:poly64x2_t
 
-arm = str
+arm = nop
 generate float32x2_t:int8x8_t, float32x2_t:int16x4_t, float32x2_t:int32x2_t, float32x2_t:int64x1_t
 generate float32x4_t:int8x16_t, float32x4_t:int16x8_t, float32x4_t:int32x4_t, float32x4_t:int64x2_t
 generate float32x2_t:uint8x8_t, float32x2_t:uint16x4_t, float32x2_t:uint32x2_t, float32x2_t:uint64x1_t
@@ -4671,7 +4671,7 @@ fn = transmute
 a = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 validate 0., 0., 0., 0., 0., 0., 0., 0.
 
-aarch64 = str
+aarch64 = nop
 generate int8x8_t:float64x1_t, int16x4_t:float64x1_t, int32x2_t:float64x1_t, int64x1_t:float64x1_t
 generate int8x16_t:float64x2_t, int16x8_t:float64x2_t, int32x4_t:float64x2_t, int64x2_t:float64x2_t
 generate poly8x8_t:float64x1_t, uint16x4_t:float64x1_t, uint32x2_t:float64x1_t, uint64x1_t:float64x1_t
@@ -4679,7 +4679,7 @@ generate poly8x16_t:float64x2_t, uint16x8_t:float64x2_t, uint32x4_t:float64x2_t,
 generate uint8x8_t:float64x1_t, poly16x4_t:float64x1_t, poly64x1_t:float64x1_t, poly64x1_t:float32x2_t
 generate uint8x16_t:float64x2_t, poly16x8_t:float64x2_t, poly64x2_t:float64x2_t, poly64x2_t:float32x4_t
 
-arm = str
+arm = nop
 generate int8x8_t:float32x2_t, int16x4_t:float32x2_t, int32x2_t:float32x2_t, int64x1_t:float32x2_t
 generate int8x16_t:float32x4_t, int16x8_t:float32x4_t, int32x4_t:float32x4_t, int64x2_t:float32x4_t
 generate uint8x8_t:float32x2_t, uint16x4_t:float32x2_t, uint32x2_t:float32x2_t, uint64x1_t:float32x2_t
@@ -4694,7 +4694,7 @@ fn = transmute
 a = 0., 0., 0., 0., 0., 0., 0., 0.
 validate 0., 0., 0., 0., 0., 0., 0., 0.
 
-aarch64 = str
+aarch64 = nop
 generate float32x2_t:float64x1_t, float64x1_t:float32x2_t
 generate float32x4_t:float64x2_t, float64x2_t:float32x4_t
 
-- 
cgit 1.4.1-3-g733a5


From 42ae943ab4720b699c9ea880609d50aca195d70e Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Sun, 17 Oct 2021 06:22:47 +0900
Subject: Fix error message of is_*_feature_detected on nort_feature (#1232)

---
 library/stdarch/crates/std_detect/src/detect/macros.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index a4af3d1c027..b83fe71c41d 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -25,7 +25,7 @@ macro_rules! features {
                 ($nort_feature) => {
                     compile_error!(
                         concat!(
-                            stringify!(nort_feature),
+                            stringify!($nort_feature),
                             " feature cannot be detected at run-time"
                         )
                     )
-- 
cgit 1.4.1-3-g733a5


From be5e1be2243d430910d070b250c59ac70488a9c1 Mon Sep 17 00:00:00 2001
From: Sparrow Li <liyuan179@huawei.com>
Date: Wed, 10 Nov 2021 23:19:59 +0800
Subject: Add remaining insturctions (#1250)

* add vmmla vusmmla vsm4e vsm3 vrax1 vxar vsha512 vbcax veor3 neon instructions

* update runtime feature detect

* correct tests

* add `vrnd32x` `vrnd64x`

* add MISSING.md
---
 library/stdarch/crates/core_arch/MISSING.md        | 194 +++++++
 .../crates/core_arch/src/aarch64/neon/generated.rs | 602 ++++++++++++++++++---
 .../crates/core_arch/src/aarch64/neon/mod.rs       | 140 +++++
 .../crates/core_arch/src/arm_shared/neon/mod.rs    |  86 +++
 library/stdarch/crates/core_arch/src/lib.rs        |   3 +-
 .../crates/std_detect/src/detect/arch/arm.rs       |   2 +
 library/stdarch/crates/stdarch-gen/neon.spec       | 188 ++++++-
 library/stdarch/crates/stdarch-gen/src/main.rs     |  56 +-
 library/stdarch/crates/stdarch-verify/tests/arm.rs |  26 +
 9 files changed, 1197 insertions(+), 100 deletions(-)
 create mode 100644 library/stdarch/crates/core_arch/MISSING.md

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/MISSING.md b/library/stdarch/crates/core_arch/MISSING.md
new file mode 100644
index 00000000000..99eb794a55e
--- /dev/null
+++ b/library/stdarch/crates/core_arch/MISSING.md
@@ -0,0 +1,194 @@
+## The following neon instructions are currently not implemented in stdarch
+
+### Can be implemented next:
+
+`vcls_u16`
+
+`vcls_u32`
+
+`vcls_u8`
+
+`vclsq_u16`
+
+`vclsq_u32`
+
+`vclsq_u8`
+
+`vcreate_s16`
+
+`vcreate_u16`
+
+`vpaddq_s64`
+
+`vpaddq_u64`
+
+`vreinterpretq_p128_f32`
+
+`vreinterpretq_p128_f64`
+
+`vreinterpretq_p128_p16`
+
+`vreinterpretq_p128_p8`
+
+`vreinterpretq_p128_s16`
+
+`vreinterpretq_p128_s32`
+
+`vreinterpretq_p128_s64`
+
+`vreinterpretq_p128_s8`
+
+`vreinterpretq_p128_u16`
+
+`vreinterpretq_p128_u32`
+
+`vreinterpretq_p128_u64`
+
+`vreinterpretq_p128_u8`
+
+`vslid_n_s64`
+
+`vslid_n_u64`
+
+`vsrid_n_s64`
+
+`vsrid_n_u64`
+
+### Not implemented on arm:
+
+`vcadd_rot270_f32`
+
+`vcadd_rot90_f32`
+
+`vcaddq_rot270_f32`
+
+`vcaddq_rot90_f32`
+
+`vdot_s32`
+
+`vdot_u32`
+
+`vdotq_s32`
+
+`vdotq_u32`
+
+`vdot_lane_s32`
+
+`vdot_lane_u32`
+
+`vdotq_lane_s32`
+
+`vdotq_lane_u32`
+
+`vcmla_f32`
+
+`vcmla_lane_f32`
+
+`vcmla_laneq_f32`
+
+`vcmla_rot180_f32`
+
+`vcmla_rot180_lane_f32`
+
+`vcmla_rot180_laneq_f32`
+
+`vcmla_rot270_f32`
+
+`vcmla_rot270_lane_f32`
+
+`vcmla_rot270_laneq_f32`
+
+`vcmla_rot90_f32`
+
+`vcmla_rot90_lane_f32`
+
+`vcmla_rot90_laneq_f32`
+
+`vcmlaq_f32`
+
+`vcmlaq_lane_f32`
+
+`vcmlaq_laneq_f32`
+
+`vcmlaq_rot180_f32`
+
+`vcmlaq_rot180_lane_f32`
+
+`vcmlaq_rot180_laneq_f32`
+
+`vcmlaq_rot270_f32`
+
+`vcmlaq_rot270_lane_f32`
+
+`vcmlaq_rot270_laneq_f32`
+
+`vcmlaq_rot90_f32`
+
+`vcmlaq_rot90_lane_f32`
+
+`vcmlaq_rot90_laneq_f32`
+
+### Not implemented in LLVM:
+
+`vrnd32x_f64`
+
+`vrnd32xq_f64`
+
+`vrnd32z_f64`
+
+`vrnd32zq_f64`
+
+`vrnd64x_f64`
+
+`vrnd64xq_f64`
+
+`vrnd64z_f64`
+
+`vrnd64zq_f64`
+
+### LLVM Select errors may occur:
+
+`vsudot_lane_s32`
+
+`vsudot_laneq_s32`
+
+`vsudotq_lane_s32`
+
+`vsudotq_laneq_s32`
+
+`vusdot_lane_s32`
+
+`vusdot_laneq_s32`
+
+`vusdot_s32`
+
+`vusdotq_lane_s32`
+
+`vusdotq_laneq_s32`
+
+`vusdotq_s32v`
+
+`vqshlu_n_s16`
+
+`vqshlu_n_s32`
+
+`vqshlu_n_s64`
+
+`vqshlu_n_s8`
+
+`vqshlub_n_s8`
+
+`vqshlud_n_s64`
+
+`vqshluh_n_s16`
+
+`vqshluq_n_s16`
+
+`vqshluq_n_s32`
+
+`vqshluq_n_s64`
+
+`vqshluq_n_s8`
+
+`vqshlus_n_s32`
+
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 194695c11c5..bdf6158bb2c 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -11,66 +11,106 @@ use stdarch_test::assert_instr;
 
 /// Three-way exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    simd_xor(simd_xor(a, b), c)
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3s.v16i8")]
+        fn veor3q_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    veor3q_s8_(a, b, c)
 }
 
 /// Three-way exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    simd_xor(simd_xor(a, b), c)
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3s.v8i16")]
+        fn veor3q_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    veor3q_s16_(a, b, c)
 }
 
 /// Three-way exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    simd_xor(simd_xor(a, b), c)
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3s.v4i32")]
+        fn veor3q_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    veor3q_s32_(a, b, c)
 }
 
 /// Three-way exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
-    simd_xor(simd_xor(a, b), c)
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3s.v2i64")]
+        fn veor3q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    veor3q_s64_(a, b, c)
 }
 
 /// Three-way exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    simd_xor(simd_xor(a, b), c)
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3u.v16i8")]
+        fn veor3q_u8_(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t;
+    }
+    veor3q_u8_(a, b, c)
 }
 
 /// Three-way exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    simd_xor(simd_xor(a, b), c)
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3u.v8i16")]
+        fn veor3q_u16_(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t;
+    }
+    veor3q_u16_(a, b, c)
 }
 
 /// Three-way exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    simd_xor(simd_xor(a, b), c)
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3u.v4i32")]
+        fn veor3q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+    }
+    veor3q_u32_(a, b, c)
 }
 
 /// Three-way exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    simd_xor(simd_xor(a, b), c)
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3u.v2i64")]
+        fn veor3q_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    veor3q_u64_(a, b, c)
 }
 
 /// Absolute difference between the arguments of Floating
@@ -7770,66 +7810,106 @@ pub unsafe fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
 
 /// Bit clear and exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    simd_xor(a, vbicq_s8(b, c))
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxs.v16i8")]
+        fn vbcaxq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    vbcaxq_s8_(a, b, c)
 }
 
 /// Bit clear and exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    simd_xor(a, vbicq_s16(b, c))
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxs.v8i16")]
+        fn vbcaxq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    vbcaxq_s16_(a, b, c)
 }
 
 /// Bit clear and exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    simd_xor(a, vbicq_s32(b, c))
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxs.v4i32")]
+        fn vbcaxq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    vbcaxq_s32_(a, b, c)
 }
 
 /// Bit clear and exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
-    simd_xor(a, vbicq_s64(b, c))
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxs.v2i64")]
+        fn vbcaxq_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    vbcaxq_s64_(a, b, c)
 }
 
 /// Bit clear and exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    simd_xor(a, vbicq_u8(b, c))
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxu.v16i8")]
+        fn vbcaxq_u8_(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t;
+    }
+    vbcaxq_u8_(a, b, c)
 }
 
 /// Bit clear and exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    simd_xor(a, vbicq_u16(b, c))
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxu.v8i16")]
+        fn vbcaxq_u16_(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t;
+    }
+    vbcaxq_u16_(a, b, c)
 }
 
 /// Bit clear and exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    simd_xor(a, vbicq_u32(b, c))
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxu.v4i32")]
+        fn vbcaxq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+    }
+    vbcaxq_u32_(a, b, c)
 }
 
 /// Bit clear and exclusive OR
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    simd_xor(a, vbicq_u64(b, c))
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxu.v2i64")]
+        fn vbcaxq_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    vbcaxq_u64_(a, b, c)
 }
 
 /// Floating-point complex add
@@ -11886,6 +11966,240 @@ pub unsafe fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> ui
     simd_shuffle4!(a, vshrn_n_u64::<N>(b), [0, 1, 2, 3])
 }
 
+/// SM3PARTW1
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3partw1))]
+pub unsafe fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3partw1")]
+        fn vsm3partw1q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+    }
+    vsm3partw1q_u32_(a, b, c)
+}
+
+/// SM3PARTW2
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3partw2))]
+pub unsafe fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3partw2")]
+        fn vsm3partw2q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+    }
+    vsm3partw2q_u32_(a, b, c)
+}
+
+/// SM3SS1
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3ss1))]
+pub unsafe fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3ss1")]
+        fn vsm3ss1q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+    }
+    vsm3ss1q_u32_(a, b, c)
+}
+
+/// SM4 key
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm4ekey))]
+pub unsafe fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm4ekey")]
+        fn vsm4ekeyq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+    vsm4ekeyq_u32_(a, b)
+}
+
+/// SM4 encode
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm4e))]
+pub unsafe fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm4e")]
+        fn vsm4eq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+    vsm4eq_u32_(a, b)
+}
+
+/// Rotate and exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(rax1))]
+pub unsafe fn vrax1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.rax1")]
+        fn vrax1q_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
+    }
+    vrax1q_u64_(a, b)
+}
+
+/// SHA512 hash update part 1
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512h))]
+pub unsafe fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha512h")]
+        fn vsha512hq_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    vsha512hq_u64_(a, b, c)
+}
+
+/// SHA512 hash update part 2
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512h2))]
+pub unsafe fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha512h2")]
+        fn vsha512h2q_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    vsha512h2q_u64_(a, b, c)
+}
+
+/// SHA512 schedule update 0
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512su0))]
+pub unsafe fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha512su0")]
+        fn vsha512su0q_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
+    }
+    vsha512su0q_u64_(a, b)
+}
+
+/// SHA512 schedule update 1
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512su1))]
+pub unsafe fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha512su1")]
+        fn vsha512su1q_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    vsha512su1q_u64_(a, b, c)
+}
+
+/// Floating-point round to 32-bit integer, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint32x))]
+pub unsafe fn vrnd32x_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32x.v2f32")]
+        fn vrnd32x_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrnd32x_f32_(a)
+}
+
+/// Floating-point round to 32-bit integer, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint32x))]
+pub unsafe fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32x.v4f32")]
+        fn vrnd32xq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrnd32xq_f32_(a)
+}
+
+/// Floating-point round to 32-bit integer toward zero
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint32z))]
+pub unsafe fn vrnd32z_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32z.v2f32")]
+        fn vrnd32z_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrnd32z_f32_(a)
+}
+
+/// Floating-point round to 32-bit integer toward zero
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint32z))]
+pub unsafe fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32z.v4f32")]
+        fn vrnd32zq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrnd32zq_f32_(a)
+}
+
+/// Floating-point round to 64-bit integer, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint64x))]
+pub unsafe fn vrnd64x_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64x.v2f32")]
+        fn vrnd64x_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrnd64x_f32_(a)
+}
+
+/// Floating-point round to 64-bit integer, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint64x))]
+pub unsafe fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64x.v4f32")]
+        fn vrnd64xq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrnd64xq_f32_(a)
+}
+
+/// Floating-point round to 64-bit integer toward zero
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint64z))]
+pub unsafe fn vrnd64z_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64z.v2f32")]
+        fn vrnd64z_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrnd64z_f32_(a)
+}
+
+/// Floating-point round to 64-bit integer toward zero
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint64z))]
+pub unsafe fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64z.v4f32")]
+        fn vrnd64zq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrnd64zq_f32_(a)
+}
+
 /// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
@@ -13086,7 +13400,7 @@ mod test {
     use std::mem::transmute;
     use stdarch_test::simd_test;
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_veor3q_s8() {
         let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
         let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
@@ -13096,7 +13410,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_veor3q_s16() {
         let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
         let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
@@ -13106,7 +13420,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_veor3q_s32() {
         let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
         let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
@@ -13116,7 +13430,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_veor3q_s64() {
         let a: i64x2 = i64x2::new(0x00, 0x01);
         let b: i64x2 = i64x2::new(0x00, 0x00);
@@ -13126,7 +13440,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_veor3q_u8() {
         let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
         let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
@@ -13136,7 +13450,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_veor3q_u16() {
         let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
         let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
@@ -13146,7 +13460,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_veor3q_u32() {
         let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
         let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
@@ -13156,7 +13470,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_veor3q_u64() {
         let a: u64x2 = u64x2::new(0x00, 0x01);
         let b: u64x2 = u64x2::new(0x00, 0x00);
@@ -19212,7 +19526,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_vbcaxq_s8() {
         let a: i8x16 = i8x16::new(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0);
         let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -19222,7 +19536,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_vbcaxq_s16() {
         let a: i16x8 = i16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
         let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
@@ -19232,7 +19546,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_vbcaxq_s32() {
         let a: i32x4 = i32x4::new(1, 0, 1, 0);
         let b: i32x4 = i32x4::new(0, 1, 2, 3);
@@ -19242,7 +19556,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_vbcaxq_s64() {
         let a: i64x2 = i64x2::new(1, 0);
         let b: i64x2 = i64x2::new(0, 1);
@@ -19252,7 +19566,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_vbcaxq_u8() {
         let a: u8x16 = u8x16::new(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0);
         let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -19262,7 +19576,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_vbcaxq_u16() {
         let a: u16x8 = u16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
         let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
@@ -19272,7 +19586,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_vbcaxq_u32() {
         let a: u32x4 = u32x4::new(1, 0, 1, 0);
         let b: u32x4 = u32x4::new(0, 1, 2, 3);
@@ -19282,7 +19596,7 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "neon,sha3")]
     unsafe fn test_vbcaxq_u64() {
         let a: u64x2 = u64x2::new(1, 0);
         let b: u64x2 = u64x2::new(0, 1);
@@ -22841,6 +23155,166 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3partw1q_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(2147549312, 3221323968, 131329, 2684362752);
+        let r: u32x4 = transmute(vsm3partw1q_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3partw2q_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(128, 256, 384, 1077977696);
+        let r: u32x4 = transmute(vsm3partw2q_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3ss1q_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(0, 0, 0, 2098176);
+        let r: u32x4 = transmute(vsm3ss1q_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm4ekeyq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(1784948604, 136020997, 2940231695, 3789947679);
+        let r: u32x4 = transmute(vsm4ekeyq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm4eq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(1093874472, 3616769504, 3878330411, 2765298765);
+        let r: u32x4 = transmute(vsm4eq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vrax1q_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(3, 4);
+        let e: u64x2 = u64x2::new(7, 10);
+        let r: u64x2 = transmute(vrax1q_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vsha512hq_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(3, 4);
+        let c: u64x2 = u64x2::new(5, 6);
+        let e: u64x2 = u64x2::new(11189044327219203, 7177611956453380);
+        let r: u64x2 = transmute(vsha512hq_u64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vsha512h2q_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(3, 4);
+        let c: u64x2 = u64x2::new(5, 6);
+        let e: u64x2 = u64x2::new(5770237651009406214, 349133864969);
+        let r: u64x2 = transmute(vsha512h2q_u64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vsha512su0q_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(3, 4);
+        let e: u64x2 = u64x2::new(144115188075855874, 9439544818968559619);
+        let r: u64x2 = transmute(vsha512su0q_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vsha512su1q_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(3, 4);
+        let c: u64x2 = u64x2::new(5, 6);
+        let e: u64x2 = u64x2::new(105553116266526, 140737488355368);
+        let r: u64x2 = transmute(vsha512su1q_u64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd32x_f32() {
+        let a: f32x2 = f32x2::new(1.1, 1.9);
+        let e: f32x2 = f32x2::new(1.0, 2.0);
+        let r: f32x2 = transmute(vrnd32x_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd32xq_f32() {
+        let a: f32x4 = f32x4::new(1.1, 1.9, -1.7, -2.3);
+        let e: f32x4 = f32x4::new(1.0, 2.0, -2.0, -2.0);
+        let r: f32x4 = transmute(vrnd32xq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd32z_f32() {
+        let a: f32x2 = f32x2::new(1.1, 1.9);
+        let e: f32x2 = f32x2::new(1.0, 1.0);
+        let r: f32x2 = transmute(vrnd32z_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd32zq_f32() {
+        let a: f32x4 = f32x4::new(1.1, 1.9, -1.7, -2.3);
+        let e: f32x4 = f32x4::new(1.0, 1.0, -1.0, -2.0);
+        let r: f32x4 = transmute(vrnd32zq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd64x_f32() {
+        let a: f32x2 = f32x2::new(1.1, 1.9);
+        let e: f32x2 = f32x2::new(1.0, 2.0);
+        let r: f32x2 = transmute(vrnd64x_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd64xq_f32() {
+        let a: f32x4 = f32x4::new(1.1, 1.9, -1.7, -2.3);
+        let e: f32x4 = f32x4::new(1.0, 2.0, -2.0, -2.0);
+        let r: f32x4 = transmute(vrnd64xq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd64z_f32() {
+        let a: f32x2 = f32x2::new(1.1, 1.9);
+        let e: f32x2 = f32x2::new(1.0, 1.0);
+        let r: f32x2 = transmute(vrnd64z_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd64zq_f32() {
+        let a: f32x4 = f32x4::new(1.1, 1.9, -1.7, -2.3);
+        let e: f32x4 = f32x4::new(1.0, 1.0, -1.0, -2.0);
+        let r: f32x4 = transmute(vrnd64zq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vtrn1_s8() {
         let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index 1cc10dc15da..d23e43c4352 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -3208,6 +3208,97 @@ pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x
     transmute(vsriq_n_s64_(transmute(a), transmute(b), N))
 }
 
+/// SM3TT1A
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt1a, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vsm3tt1aq_u32<const IMM2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_imm2!(IMM2);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3tt1a")]
+        fn vsm3tt1aq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
+    }
+    vsm3tt1aq_u32_(a, b, c, IMM2 as i64)
+}
+
+/// SM3TT1B
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt1b, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vsm3tt1bq_u32<const IMM2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_imm2!(IMM2);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3tt1b")]
+        fn vsm3tt1bq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
+    }
+    vsm3tt1bq_u32_(a, b, c, IMM2 as i64)
+}
+
+/// SM3TT2A
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt2a, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vsm3tt2aq_u32<const IMM2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_imm2!(IMM2);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3tt2a")]
+        fn vsm3tt2aq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
+    }
+    vsm3tt2aq_u32_(a, b, c, IMM2 as i64)
+}
+
+/// SM3TT2B
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt2b, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vsm3tt2bq_u32<const IMM2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_imm2!(IMM2);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3tt2b")]
+        fn vsm3tt2bq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
+    }
+    vsm3tt2bq_u32_(a, b, c, IMM2 as i64)
+}
+
+/// Exclusive OR and rotate
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(xar, IMM6 = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vxarq_u64<const IMM6: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert_imm6!(IMM6);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.xar")]
+        fn vxarq_u64_(a: uint64x2_t, b: uint64x2_t, n: i64) -> uint64x2_t;
+    }
+    vxarq_u64_(a, b, IMM6 as i64)
+}
+
 #[cfg(test)]
 mod tests {
     use crate::core_arch::aarch64::test_support::*;
@@ -4866,6 +4957,55 @@ mod tests {
         assert_eq!(vals[1], 1.);
         assert_eq!(vals[2], 2.);
     }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3tt1aq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(2, 1536, 4, 16395);
+        let r: u32x4 = transmute(vsm3tt1aq_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3tt1bq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(2, 1536, 4, 16392);
+        let r: u32x4 = transmute(vsm3tt1bq_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3tt2aq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(2, 1572864, 4, 1447435);
+        let r: u32x4 = transmute(vsm3tt2aq_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3tt2bq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(2, 1572864, 4, 1052680);
+        let r: u32x4 = transmute(vsm3tt2bq_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vxarq_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(3, 4);
+        let e: u64x2 = u64x2::new(2, 6);
+        let r: u64x2 = transmute(vxarq_u64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
 }
 
 #[cfg(test)]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
index 588c86537df..15c659ded9b 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -4806,6 +4806,63 @@ pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
     }
 }
 
+/// 8-bit integer matrix multiply-accumulate
+#[inline]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smmla))]
+pub unsafe fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")]
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8"
+        )]
+        fn vmmlaq_s32_(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
+    }
+    vmmlaq_s32_(a, b, c)
+}
+
+/// 8-bit integer matrix multiply-accumulate
+#[inline]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ummla))]
+pub unsafe fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")]
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8"
+        )]
+        fn vmmlaq_u32_(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
+    }
+    vmmlaq_u32_(a, b, c)
+}
+
+/// Unsigned and signed 8-bit integer matrix multiply-accumulate
+#[inline]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usmmla))]
+pub unsafe fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usmmla.v4i32.v16i8")]
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.neon.usmmla.v4i32.v16i8"
+        )]
+        fn vusmmlaq_s32_(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t;
+    }
+    vusmmlaq_s32_(a, b, c)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -10368,6 +10425,35 @@ mod tests {
         let e: u16x8 = transmute(vrev64q_p16(transmute(a)));
         assert_eq!(r, e);
     }
+    #[simd_test(enable = "neon,i8mm")]
+    unsafe fn test_vmmlaq_s32() {
+        let a: i32x4 = i32x4::new(1, 3, 4, 9);
+        let b: i8x16 = i8x16::new(1, 21, 31, 14, 5, 6, 17, 8, 9, 13, 15, 12, 13, 19, 20, 16);
+        let c: i8x16 = i8x16::new(12, 22, 3, 4, 5, 56, 7, 8, 91, 10, 11, 15, 13, 14, 17, 16);
+        let e: i32x4 = i32x4::new(1, 2, 3, 4);
+        let r: i32x4 = transmute(vmmlaq_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,i8mm")]
+    unsafe fn test_vmmlaq_u32() {
+        let a: u32x4 = u32x4::new(1, 3, 4, 9);
+        let b: i8x16 = i8x16::new(1, 21, 31, 14, 5, 6, 17, 8, 9, 13, 15, 12, 13, 19, 20, 16);
+        let c: i8x16 = i8x16::new(12, 22, 3, 4, 5, 56, 7, 8, 91, 10, 11, 15, 13, 14, 17, 16);
+        let e: u32x4 = u32x4::new(1, 2, 3, 4);
+        let r: u32x4 = transmute(vmmlaq_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,i8mm")]
+    unsafe fn test_vusmmlaq_s32() {
+        let a: i32x4 = i32x4::new(1, 3, 4, 9);
+        let b: i8x16 = i8x16::new(1, 21, 31, 14, 5, 6, 17, 8, 9, 13, 15, 12, 13, 19, 20, 16);
+        let c: i8x16 = i8x16::new(12, 22, 3, 4, 5, 56, 7, 8, 91, 10, 11, 15, 13, 14, 17, 16);
+        let e: i32x4 = i32x4::new(1, 2, 3, 4);
+        let r: i32x4 = transmute(vusmmlaq_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
 }
 
 #[cfg(all(test, target_arch = "arm", target_endian = "little"))]
diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index 33b0627d612..d43192b3998 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -33,7 +33,8 @@
     f16c_target_feature,
     allow_internal_unstable,
     decl_macro,
-    bench_black_box
+    bench_black_box,
+    asm_const
 )]
 #![cfg_attr(test, feature(test, abi_vectorcall))]
 #![deny(clippy::missing_inline_in_public_items)]
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index d96514c8449..9e7dda094fe 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -22,4 +22,6 @@ features! {
     /// FEAT_AES (AES instructions)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sha2: "sha2";
     /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] i8mm: "i8mm";
+    /// FEAT_I8MM
 }
diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec
index 0dca59839bc..933d2bc41f0 100644
--- a/library/stdarch/crates/stdarch-gen/neon.spec
+++ b/library/stdarch/crates/stdarch-gen/neon.spec
@@ -104,15 +104,16 @@ generate int*_t, uint*_t, int64x*_t, uint64x*_t
 
 /// Three-way exclusive OR
 name = veor3
-multi_fn = simd_xor, {simd_xor, a, b}, c
 a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
 b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 c = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+target = sha3
 
-// llvm does not currently support `eor3` instructions
-aarch64 = nop
+aarch64 = eor3
+link-aarch64 = llvm.aarch64.crypto.eor3s._EXT_
 generate int8x16_t, int16x8_t, int32x4_t, int64x2_t
+link-aarch64 = llvm.aarch64.crypto.eor3u._EXT_
 generate uint8x16_t, uint16x8_t, uint32x4_t, uint64x2_t
 
 ////////////////////
@@ -4438,15 +4439,16 @@ generate uint32x4_t:uint32x4_t:uint64x2_t
 
 /// Bit clear and exclusive OR
 name = vbcax
-multi_fn = simd_xor, a, {vbic-self-noext, b, c}
 a = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0
 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 c = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 validate 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14
+target = sha3
 
-// llvm does not currently support the `bcax` instruction
-aarch64 = nop
+aarch64 = bcax
+link-aarch64 = llvm.aarch64.crypto.bcaxs._EXT_
 generate int8x16_t, int16x8_t, int32x4_t, int64x2_t
+link-aarch64 = llvm.aarch64.crypto.bcaxu._EXT_
 generate uint8x16_t, uint16x8_t, uint32x4_t, uint64x2_t
 
 /// Floating-point complex add
@@ -4805,24 +4807,6 @@ generate float32x2_t:f32, float64x2_t:f64
 aarch64 = fminnmv
 generate float32x4_t:f32
 
-/// 8-bit integer matrix multiply-accumulate
-name = vmmlaq
-a = 1, 2, 3, 4
-b = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-c = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-validate 1, 2, 3, 4
-target = i8mm
-
-aarch64 = smmla
-link-aarch64 = smmla._EXT_._EXT3_
-// the feature `i8mm` is not valid for some target
-//generate int32x4_t:int8x16_t:int8x16_t:int32x4_t
-
-aarch64 = ummla
-link-aarch64 = ummla._EXT_._EXT3_
-// the feature `i8mm` is not valid for some target
-//generate uint32x4_t:uint8x16_t:uint8x16_t:uint32x4_t
-
 /// Vector move
 name = vmovl_high
 no-q
@@ -6862,6 +6846,162 @@ aarch64 = usra
 arm = vsra
 generate uint*_t, uint64x*_t
 
+/// SM3PARTW1
+name = vsm3partw1
+a = 1, 2, 3, 4
+b = 1, 2, 3, 4
+c = 1, 2, 3, 4
+validate 2147549312, 3221323968, 131329, 2684362752
+target = sm4
+
+aarch64 = sm3partw1
+link-aarch64 = llvm.aarch64.crypto.sm3partw1
+generate uint32x4_t
+
+/// SM3PARTW2
+name = vsm3partw2
+a = 1, 2, 3, 4
+b = 1, 2, 3, 4
+c = 1, 2, 3, 4
+validate 128, 256, 384, 1077977696
+target = sm4
+
+aarch64 = sm3partw2
+link-aarch64 = llvm.aarch64.crypto.sm3partw2
+generate uint32x4_t
+
+/// SM3SS1
+name = vsm3ss1
+a = 1, 2, 3, 4
+b = 1, 2, 3, 4
+c = 1, 2, 3, 4
+validate 0, 0, 0, 2098176
+target = sm4
+
+aarch64 = sm3ss1
+link-aarch64 = llvm.aarch64.crypto.sm3ss1
+generate uint32x4_t
+
+/// SM4 key
+name = vsm4ekey
+a = 1, 2, 3, 4
+b = 1, 2, 3, 4
+validate 1784948604, 136020997, 2940231695, 3789947679
+target = sm4
+
+aarch64 = sm4ekey
+link-aarch64 = llvm.aarch64.crypto.sm4ekey
+generate uint32x4_t
+
+/// SM4 encode
+name = vsm4e
+a = 1, 2, 3, 4
+b = 1, 2, 3, 4
+validate 1093874472, 3616769504, 3878330411, 2765298765
+target = sm4
+
+aarch64 = sm4e
+link-aarch64 = llvm.aarch64.crypto.sm4e
+generate uint32x4_t
+
+/// Rotate and exclusive OR
+name = vrax1
+a = 1, 2
+b = 3, 4
+validate 7, 10
+target = sha3
+
+aarch64 = rax1
+link-aarch64 = llvm.aarch64.crypto.rax1
+generate uint64x2_t
+
+/// SHA512 hash update part 1
+name = vsha512h
+a = 1, 2
+b = 3, 4
+c = 5, 6
+validate 11189044327219203, 7177611956453380
+target = sha3
+
+aarch64 = sha512h
+link-aarch64 = llvm.aarch64.crypto.sha512h
+generate uint64x2_t
+
+/// SHA512 hash update part 2
+name = vsha512h2
+a = 1, 2
+b = 3, 4
+c = 5, 6
+validate 5770237651009406214, 349133864969
+target = sha3
+
+aarch64 = sha512h2
+link-aarch64 = llvm.aarch64.crypto.sha512h2
+generate uint64x2_t
+
+/// SHA512 schedule update 0
+name = vsha512su0
+a = 1, 2
+b = 3, 4
+validate 144115188075855874, 9439544818968559619
+target = sha3
+
+aarch64 = sha512su0
+link-aarch64 = llvm.aarch64.crypto.sha512su0
+generate uint64x2_t
+
+/// SHA512 schedule update 1
+name = vsha512su1
+a = 1, 2
+b = 3, 4
+c = 5, 6
+validate 105553116266526, 140737488355368
+target = sha3
+
+aarch64 = sha512su1
+link-aarch64 = llvm.aarch64.crypto.sha512su1
+generate uint64x2_t
+
+/// Floating-point round to 32-bit integer, using current rounding mode
+name = vrnd32x
+a = 1.1, 1.9, -1.7, -2.3
+validate 1.0, 2.0, -2.0, -2.0
+target = frintts
+
+aarch64 = frint32x
+link-aarch64 = frint32x._EXT_
+generate float32x2_t, float32x4_t
+
+/// Floating-point round to 32-bit integer toward zero
+name = vrnd32z
+a = 1.1, 1.9, -1.7, -2.3
+validate 1.0, 1.0, -1.0, -2.0
+target = frintts
+
+aarch64 = frint32z
+link-aarch64 = frint32z._EXT_
+generate float32x2_t, float32x4_t
+
+/// Floating-point round to 64-bit integer, using current rounding mode
+name = vrnd64x
+a = 1.1, 1.9, -1.7, -2.3
+validate 1.0, 2.0, -2.0, -2.0
+target = frintts
+
+aarch64 = frint64x
+link-aarch64 = frint64x._EXT_
+generate float32x2_t, float32x4_t
+
+/// Floating-point round to 64-bit integer toward zero
+name = vrnd64z
+a = 1.1, 1.9, -1.7, -2.3
+validate 1.0, 1.0, -1.0, -2.0
+target = frintts
+
+aarch64 = frint64z
+link-aarch64 = frint64z._EXT_
+generate float32x2_t, float32x4_t
+
 /// Transpose elements
 name = vtrn
 multi_fn = simd_shuffle-in_len-!, a1:in_t, a, b, {transpose-1-in_len}
diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs
index a33933ad976..4ef3cb091dc 100644
--- a/library/stdarch/crates/stdarch-gen/src/main.rs
+++ b/library/stdarch/crates/stdarch-gen/src/main.rs
@@ -464,7 +464,10 @@ enum TargetFeature {
     FCMA,
     Dotprod,
     I8MM,
+    SHA3,
     RDM,
+    SM4,
+    FTTS,
 }
 
 #[derive(Clone, Copy)]
@@ -1068,7 +1071,10 @@ fn gen_aarch64(
         FCMA => "neon,fcma",
         Dotprod => "neon,dotprod",
         I8MM => "neon,i8mm",
+        SHA3 => "neon,sha3",
         RDM => "rdm",
+        SM4 => "neon,sm4",
+        FTTS => "neon,frintts",
     };
     let current_fn = if let Some(current_fn) = current_fn.clone() {
         if link_aarch64.is_some() {
@@ -1379,6 +1385,13 @@ fn gen_aarch64(
         fn_decl,
         call_params
     );
+    let test_target = match target {
+        I8MM => "neon,i8mm",
+        SM4 => "neon,sm4",
+        SHA3 => "neon,sha3",
+        FTTS => "neon,frintts",
+        _ => "neon",
+    };
     let test = match fn_type {
         Fntype::Normal => gen_test(
             &name,
@@ -1388,6 +1401,7 @@ fn gen_aarch64(
             [type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])],
             type_len(out_t),
             para_num,
+            test_target,
         ),
         Fntype::Load => gen_load_test(&name, in_t, &out_t, current_tests, type_len(out_t)),
         Fntype::Store => gen_store_test(&name, in_t, &out_t, current_tests, type_len(in_t[1])),
@@ -1575,12 +1589,13 @@ fn gen_test(
     len_in: [usize; 3],
     len_out: usize,
     para_num: i32,
+    target: &str,
 ) -> String {
     let mut test = format!(
         r#"
-    #[simd_test(enable = "neon")]
+    #[simd_test(enable = "{}")]
     unsafe fn test_{}() {{"#,
-        name,
+        target, name,
     );
     for (a, b, c, n, e) in current_tests {
         let a: Vec<String> = a.iter().take(len_in[0]).cloned().collect();
@@ -1777,7 +1792,10 @@ fn gen_arm(
         FCMA => "neon,fcma",
         Dotprod => "neon,dotprod",
         I8MM => "neon,i8mm",
+        SHA3 => "neon,sha3",
         RDM => "rdm",
+        SM4 => "neon,sm4",
+        FTTS => "neon,frintts",
     };
     let current_target_arm = match target {
         Default => "v7",
@@ -1787,8 +1805,11 @@ fn gen_arm(
         AES => "aes,v8",
         FCMA => "v8",    // v8.3a
         Dotprod => "v8", // v8.2a
-        I8MM => "v8",    // v8.6a
+        I8MM => "v8,i8mm",
         RDM => unreachable!(),
+        SM4 => unreachable!(),
+        SHA3 => unreachable!(),
+        FTTS => unreachable!(),
     };
     let current_fn = if let Some(current_fn) = current_fn.clone() {
         if link_aarch64.is_some() || link_arm.is_some() {
@@ -2364,6 +2385,13 @@ fn gen_arm(
             call,
         )
     };
+    let test_target = match target {
+        I8MM => "neon,i8mm",
+        SM4 => "neon,sm4",
+        SHA3 => "neon,sha3",
+        FTTS => "neon,frintts",
+        _ => "neon",
+    };
     let test = match fn_type {
         Fntype::Normal => gen_test(
             &name,
@@ -2373,6 +2401,7 @@ fn gen_arm(
             [type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])],
             type_len(out_t),
             para_num,
+            test_target,
         ),
         Fntype::Load => gen_load_test(&name, in_t, &out_t, current_tests, type_len(out_t)),
         Fntype::Store => gen_store_test(&name, in_t, &out_t, current_tests, type_len(in_t[1])),
@@ -3173,7 +3202,10 @@ mod test {
                     "fcma" => FCMA,
                     "dotprod" => Dotprod,
                     "i8mm" => I8MM,
+                    "sha3" => SHA3,
                     "rdm" => RDM,
+                    "sm4" => SM4,
+                    "frintts" => FTTS,
                     _ => Default,
                 },
                 _ => Default,
@@ -3278,20 +3310,22 @@ mod test {
     tests_aarch64.push('}');
     tests_aarch64.push('\n');
 
-    let arm_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap())
-        .join("src")
-        .join("arm_shared")
-        .join("neon");
+    let arm_out_path: PathBuf =
+        PathBuf::from(env::var("OUT_DIR").unwrap_or("crates/core_arch".to_string()))
+            .join("src")
+            .join("arm_shared")
+            .join("neon");
     std::fs::create_dir_all(&arm_out_path)?;
 
     let mut file_arm = File::create(arm_out_path.join(ARM_OUT))?;
     file_arm.write_all(out_arm.as_bytes())?;
     file_arm.write_all(tests_arm.as_bytes())?;
 
-    let aarch64_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap())
-        .join("src")
-        .join("aarch64")
-        .join("neon");
+    let aarch64_out_path: PathBuf =
+        PathBuf::from(env::var("OUT_DIR").unwrap_or("crates/core_arch".to_string()))
+            .join("src")
+            .join("aarch64")
+            .join("neon");
     std::fs::create_dir_all(&aarch64_out_path)?;
 
     let mut file_aarch = File::create(aarch64_out_path.join(AARCH64_OUT))?;
diff --git a/library/stdarch/crates/stdarch-verify/tests/arm.rs b/library/stdarch/crates/stdarch-verify/tests/arm.rs
index bd894e0baae..ce7039ce72a 100644
--- a/library/stdarch/crates/stdarch-verify/tests/arm.rs
+++ b/library/stdarch/crates/stdarch-verify/tests/arm.rs
@@ -559,6 +559,32 @@ fn verify_all_signatures() {
             "vaddq_p16",
             "vaddq_p64",
             "vaddq_p128",
+            "vsm4ekeyq_u32",
+            "vsm4eq_u32",
+            "vmmlaq_s32",
+            "vmmlaq_u32",
+            "vusmmlaq_s32",
+            "vsm3partw1q_u32",
+            "vsm3partw2q_u32",
+            "vsm3ss1q_u32",
+            "vsm3tt1aq_u32",
+            "vsm3tt1bq_u32",
+            "vsm3tt2aq_u32",
+            "vsm3tt2bq_u32",
+            "vrax1q_u64",
+            "vxarq_u64",
+            "vsha512hq_u64",
+            "vsha512h2q_u64",
+            "vsha512su0q_u64",
+            "vsha512su1q_u64",
+            "vrnd32x_f32",
+            "vrnd32xq_f32",
+            "vrnd32z_f32",
+            "vrnd32zq_f32",
+            "vrnd64x_f32",
+            "vrnd64xq_f32",
+            "vrnd64z_f32",
+            "vrnd64zq_f32",
             "__dbg",
         ];
         let arm = match map.get(rust.name) {
-- 
cgit 1.4.1-3-g733a5


From 08a70f96fe9b7e5f524ab94f91ae2d0cd8ac7d3b Mon Sep 17 00:00:00 2001
From: Thom Chiovoloni <chiovolonit@gmail.com>
Date: Thu, 18 Nov 2021 17:29:04 -0800
Subject: Work-around buggy Intel chips erroneously reporting BMI1/BMI2 support
 (#1249)

---
 .../stdarch/crates/std_detect/src/detect/os/x86.rs   | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 388af2e3043..ea5f595ec98 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -249,5 +249,25 @@ pub(crate) fn detect_features() -> cache::Initializer {
         }
     }
 
+    // Unfortunately, some Skylake chips erroneously report support for BMI1 and
+    // BMI2 without actual support. These chips don't support AVX, and it seems
+    // that all Intel chips with non-erroneous support BMI do (I didn't check
+    // other vendors), so we can disable these flags for chips that don't also
+    // report support for AVX.
+    //
+    // It's possible this will pessimize future chips that do support BMI and
+    // not AVX, but this seems minor compared to a hard crash you get when
+    // executing an unsupported instruction (to put it another way, it's safe
+    // for us to under-report CPU features, but not to over-report them). Still,
+    // to limit any impact this may have in the future, we only do this for
+    // Intel chips, as it's a bug only present in their chips.
+    //
+    // This bug is documented as `SKL052` in the errata section of this document:
+    // http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/desktop-6th-gen-core-family-spec-update.pdf
+    if vendor_id == *b"GenuineIntel" && !value.test(Feature::avx as u32) {
+        value.unset(Feature::bmi1 as u32);
+        value.unset(Feature::bmi2 as u32);
+    }
+
     value
 }
-- 
cgit 1.4.1-3-g733a5


From 972030f2b2a71d2716f17606d73e44d13fb04c58 Mon Sep 17 00:00:00 2001
From: Tobias Kortkamp <t6@users.noreply.github.com>
Date: Sat, 27 Nov 2021 14:01:23 +0000
Subject: Support riscv64gc-unknown-freebsd (#1260)

---
 library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
index f12476adac5..29fcc8cb0c7 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
@@ -3,7 +3,8 @@
     any(
         target_arch = "aarch64",
         target_arch = "arm",
-        target_arch = "powerpc64"
+        target_arch = "powerpc64",
+        target_arch = "riscv64"
     ),
     allow(dead_code)
 )]
-- 
cgit 1.4.1-3-g733a5


From 72061cb5f5c663fe04af99531a30d708ef4c0fbb Mon Sep 17 00:00:00 2001
From: Luo Jia <me@luojia.cc>
Date: Wed, 8 Dec 2021 19:41:12 +0800
Subject: RISC-V feature and detect macro (#1263)

---
 library/stdarch/crates/simd-test-macro/src/lib.rs  |   1 +
 .../crates/std_detect/src/detect/arch/riscv.rs     | 205 +++++++++++++++++++++
 .../crates/std_detect/src/detect/error_macros.rs   |  21 +++
 .../stdarch/crates/std_detect/src/detect/mod.rs    |   7 +
 .../std_detect/src/detect/os/linux/auxvec.rs       |  24 ++-
 .../std_detect/src/detect/os/linux/cpuinfo.rs      |  32 ++++
 .../crates/std_detect/src/detect/os/linux/mod.rs   |   5 +-
 .../crates/std_detect/src/detect/os/linux/riscv.rs |  73 ++++++++
 library/stdarch/crates/std_detect/src/lib.rs       |   1 +
 9 files changed, 365 insertions(+), 4 deletions(-)
 create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/simd-test-macro/src/lib.rs b/library/stdarch/crates/simd-test-macro/src/lib.rs
index 407c154475f..9d81a4c5ea4 100644
--- a/library/stdarch/crates/simd-test-macro/src/lib.rs
+++ b/library/stdarch/crates/simd-test-macro/src/lib.rs
@@ -64,6 +64,7 @@ pub fn simd_test(
         "i686" | "x86_64" | "i586" => "is_x86_feature_detected",
         "arm" | "armv7" => "is_arm_feature_detected",
         "aarch64" => "is_aarch64_feature_detected",
+        maybe_riscv if maybe_riscv.starts_with("riscv") => "is_riscv_feature_detected",
         "powerpc" | "powerpcle" => "is_powerpc_feature_detected",
         "powerpc64" | "powerpc64le" => "is_powerpc64_feature_detected",
         "mips" | "mipsel" | "mipsisa32r6" | "mipsisa32r6el" => {
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
new file mode 100644
index 00000000000..ac43b2c4ddc
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -0,0 +1,205 @@
+//! Run-time feature detection on RISC-V.
+
+features! {
+    @TARGET: riscv;
+    @MACRO_NAME: is_riscv_feature_detected;
+    @MACRO_ATTRS:
+    /// A macro to test at *runtime* whether instruction sets are available on
+    /// RISC-V platforms.
+    ///
+    /// RISC-V standard defined the base sets and the extension sets.
+    /// The base sets are RV32I, RV64I, RV32E or RV128I. Any RISC-V platform
+    /// must support one base set and/or multiple extension sets.
+    ///
+    /// Any RISC-V standard instruction sets can be in state of either ratified,
+    /// frozen or draft. The version and status of current standard instruction
+    /// sets can be checked out from preface section of the [ISA manual].
+    ///
+    /// Platform may define and support their own custom instruction sets with
+    /// ISA prefix X. These sets are highly platform specific and should be
+    /// detected with their own platform support crates.
+    ///
+    /// # Unprivileged Specification
+    ///
+    /// The supported ratified RISC-V instruction sets are as follows:
+    ///
+    /// * RV32I: `"rv32i"`
+    /// * Zifencei: `"zifencei"`
+    /// * Zihintpause: `"zihintpause"`
+    /// * RV64I: `"rv64i"`
+    /// * M: `"m"`
+    /// * A: `"a"`
+    /// * Zicsr: `"zicsr"`
+    /// * Zicntr: `"zicntr"`
+    /// * Zihpm: `"zihpm"`
+    /// * F: `"f"`
+    /// * D: `"d"`
+    /// * Q: `"q"`
+    /// * C: `"c"`
+    ///
+    /// There's also bases and extensions marked as standard instruction set,
+    /// but they are in frozen or draft state. These instruction sets are also
+    /// reserved by this macro and can be detected in the future platforms.
+    ///
+    /// Frozen RISC-V instruction sets:
+    ///
+    /// * Zfinx: `"zfinx"`
+    /// * Zdinx: `"zdinx"`
+    /// * Zhinx: `"zhinx"`
+    /// * Zhinxmin: `"zhinxmin"`
+    /// * Ztso: `"ztso"`
+    ///
+    /// Draft RISC-V instruction sets:
+    ///
+    /// * RV32E: `"rv32e"`
+    /// * RV128I: `"rv128i"`
+    /// * Zfh: `"zfh"`
+    /// * Zfhmin: `"zfhmin"`
+    /// * B: `"b"`
+    /// * J: `"j"`
+    /// * P: `"p"`
+    /// * V: `"v"`
+    /// * Zam: `"zam"`
+    ///
+    /// Defined by Privileged Specification:
+    ///
+    /// * Supervisor: `"s"`
+    /// * Svnapot: `"svnapot"`
+    /// * Svpbmt: `"svpbmt"`
+    /// * Svinval: `"svinval"`
+    /// * Hypervisor: `"h"`
+    ///
+    /// # RISC-V Bit-Manipulation ISA-extensions
+    ///
+    /// This document defined the following extensions:
+    ///
+    /// * Zba: `"zba"`
+    /// * Zbb: `"zbb"`
+    /// * Zbc: `"zbc"`
+    /// * Zbs: `"zbs"`
+    ///
+    /// # RISC-V Cryptography Extensions
+    ///
+    /// These extensions are defined in Volume I, Scalar & Entropy Source
+    /// Instructions:
+    ///
+    /// * Zbkb: `"zbkb"`
+    /// * Zbkc: `"zbkc"`
+    /// * Zbkx: `"zbkx"`
+    /// * Zknd: `"zknd"`
+    /// * Zkne: `"zkne"`
+    /// * Zknh: `"zknh"`
+    /// * Zksed: `"zksed"`
+    /// * Zksh: `"zksh"`
+    /// * Zkr: `"zkr"`
+    /// * Zkn: `"zkn"`
+    /// * Zks: `"zks"`
+    /// * Zk: `"zk"`
+    /// * Zkt: `"zkt"`
+    ///
+    /// [ISA manual]: https://github.com/riscv/riscv-isa-manual/
+    #[unstable(feature = "stdsimd", issue = "27731")]
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rv32i: "rv32i";
+    /// RV32I Base Integer Instruction Set
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zifencei: "zifencei";
+    /// "Zifencei" Instruction-Fetch Fence
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zihintpause: "zihintpause";
+    /// "Zihintpause" Pause Hint
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rv64i: "rv64i";
+    /// RV64I Base Integer Instruction Set
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] m: "m";
+    /// "M" Standard Extension for Integer Multiplication and Division
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] a: "a";
+    /// "A" Standard Extension for Atomic Instructions
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zicsr: "zicsr";
+    /// "Zicsr", Control and Status Register (CSR) Instructions
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zicntr: "zicntr";
+    /// "Zicntr", Standard Extension for Base Counters and Timers
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zihpm: "zihpm";
+    /// "Zihpm", Standard Extension for Hardware Performance Counters
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] f: "f";
+    /// "F" Standard Extension for Single-Precision Floating-Point
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] d: "d";
+    /// "D" Standard Extension for Double-Precision Floating-Point
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] q: "q";
+    /// "Q" Standard Extension for Quad-Precision Floating-Point
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] c: "c";
+    /// "C" Standard Extension for Compressed Instructions
+
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zfinx: "zfinx";
+    /// "Zfinx" Standard Extension for Single-Precision Floating-Point in Integer Registers
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zdinx: "zdinx";
+    /// "Zdinx" Standard Extension for Double-Precision Floating-Point in Integer Registers
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zhinx: "zhinx";
+    /// "Zhinx" Standard Extension for Half-Precision Floating-Point in Integer Registers
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zhinxmin: "zhinxmin";
+    /// "Zhinxmin" Standard Extension for Minimal Half-Precision Floating-Point in Integer Registers
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] ztso: "ztso";
+    /// "Ztso" Standard Extension for Total Store Ordering
+
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rv32e: "rv32e";
+    /// RV32E Base Integer Instruction Set
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rv128i: "rv128i";
+    /// RV128I Base Integer Instruction Set
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zfh: "zfh";
+    /// "Zfh" Standard Extension for 16-Bit Half-Precision Floating-Point
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zfhmin: "zfhmin";
+    /// "Zfhmin" Standard Extension for Minimal Half-Precision Floating-Point Support
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] b: "b";
+    /// "B" Standard Extension for Bit Manipulation
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] j: "j";
+    /// "J" Standard Extension for Dynamically Translated Languages
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] p: "p";
+    /// "P" Standard Extension for Packed-SIMD Instructions
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] v: "v";
+    /// "V" Standard Extension for Vector Operations
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zam: "zam";
+    /// "Zam" Standard Extension for Misaligned Atomics
+
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] s: "s";
+    /// Supervisor-Level ISA
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] svnapot: "svnapot";
+    /// "Svnapot" Standard Extension for NAPOT Translation Contiguity
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] svpbmt: "svpbmt";
+    /// "Svpbmt" Standard Extension for Page-Based Memory Types
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] svinval: "svinval";
+    /// "Svinval" Standard Extension for Fine-Grained Address-Translation Cache Invalidation
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] h: "h";
+    /// Hypervisor Extension
+
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zba: "zba";
+    /// "Zba" Standard Extension for Address Generation Instructions
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbb: "zbb";
+    /// "Zbb" Standard Extension for Basic Bit-Manipulation
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbc: "zbc";
+    /// "Zbc" Standard Extension for Carry-less Multiplication
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbs: "zbs";
+    /// "Zbs" Standard Extension for Single-Bit instructions
+
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbkb: "zbkb";
+    /// "Zbkb" Standard Extension for Bitmanip instructions for Cryptography
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbkc: "zbkc";
+    /// "Zbkc" Standard Extension for Carry-less multiply instructions
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbkx: "zbkx";
+    /// "Zbkx" Standard Extension for Crossbar permutation instructions
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zknd: "zknd";
+    /// "Zknd" Standard Extension for NIST Suite: AES Decryption
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zkne: "zkne";
+    /// "Zkne" Standard Extension for NIST Suite: AES Encryption
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zknh: "zknh";
+    /// "Zknh" Standard Extension for NIST Suite: Hash Function Instructions
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zksed: "zksed";
+    /// "Zksed" Standard Extension for ShangMi Suite: SM4 Block Cipher Instructions
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zksh: "zksh";
+    /// "Zksh" Standard Extension for ShangMi Suite: SM3 Hash Function Instructions
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zkr: "zkr";
+    /// "Zkr" Standard Extension for Entropy Source Extension
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zkn: "zkn";
+    /// "Zkn" Standard Extension for NIST Algorithm Suite
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zks: "zks";
+    /// "Zks" Standard Extension for ShangMi Algorithm Suite
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zk: "zk";
+    /// "Zk" Standard Extension for Standard scalar cryptography extension
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zkt: "zkt";
+    /// "Zkt" Standard Extension for Data Independent Execution Latency
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/error_macros.rs b/library/stdarch/crates/std_detect/src/detect/error_macros.rs
index 6769757ed93..1e70da486f2 100644
--- a/library/stdarch/crates/std_detect/src/detect/error_macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/error_macros.rs
@@ -65,6 +65,27 @@ macro_rules! is_aarch64_feature_detected {
     };
 }
 
+/// Prevents compilation if `is_riscv_feature_detected` is used somewhere else
+/// than `riscv32` or `riscv64` targets.
+#[cfg(not(any(target_arch = "riscv32", target_arch = "riscv64")))]
+#[macro_export]
+#[unstable(feature = "stdsimd", issue = "27731")]
+macro_rules! is_riscv_feature_detected {
+    ($t: tt) => {
+        compile_error!(
+            r#"
+        is_riscv_feature_detected can only be used on RISC-V targets.
+        You can prevent it from being used in other architectures by
+        guarding it behind a cfg(target_arch) as follows:
+
+            #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] {
+                if is_riscv_feature_detected(...) { ... }
+            }
+        "#
+        )
+    };
+}
+
 /// Prevents compilation if `is_powerpc_feature_detected` is used somewhere else
 /// than `PowerPC` targets.
 #[cfg(not(target_arch = "powerpc"))]
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 1b7768ae8f0..e4eea72d458 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -38,6 +38,10 @@ cfg_if! {
         #[path = "arch/aarch64.rs"]
         #[macro_use]
         mod arch;
+    } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] {
+        #[path = "arch/riscv.rs"]
+        #[macro_use]
+        mod arch;
     } else if #[cfg(target_arch = "powerpc")] {
         #[path = "arch/powerpc.rs"]
         #[macro_use]
@@ -134,12 +138,15 @@ pub fn features() -> impl Iterator<Item = (&'static str, bool)> {
             target_arch = "x86_64",
             target_arch = "arm",
             target_arch = "aarch64",
+            target_arch = "riscv32",
+            target_arch = "riscv64",
             target_arch = "powerpc",
             target_arch = "powerpc64",
             target_arch = "mips",
             target_arch = "mips64",
         ))] {
             (0_u8..Feature::_last as u8).map(|discriminant: u8| {
+                #[allow(bindings_with_variant_name)] // RISC-V has Feature::f
                 let f: Feature = unsafe { core::mem::transmute(discriminant) };
                 let name: &'static str = f.to_str();
                 let enabled: bool = check_for(f);
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 76908db3140..e6447d0cde9 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -63,7 +63,13 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
         // Try to call a dynamically-linked getauxval function.
         if let Ok(hwcap) = getauxval(AT_HWCAP) {
             // Targets with only AT_HWCAP:
-            #[cfg(any(target_arch = "aarch64", target_arch = "mips", target_arch = "mips64"))]
+            #[cfg(any(
+                target_arch = "aarch64",
+                target_arch = "riscv32",
+                target_arch = "riscv64",
+                target_arch = "mips",
+                target_arch = "mips64"
+            ))]
             {
                 if hwcap != 0 {
                     return Ok(AuxVec { hwcap });
@@ -90,7 +96,13 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
     #[cfg(not(feature = "std_detect_dlsym_getauxval"))]
     {
         // Targets with only AT_HWCAP:
-        #[cfg(any(target_arch = "aarch64", target_arch = "mips", target_arch = "mips64"))]
+        #[cfg(any(
+            target_arch = "aarch64",
+            target_arch = "riscv32",
+            target_arch = "riscv64",
+            target_arch = "mips",
+            target_arch = "mips64"
+        ))]
         {
             let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize };
             if hwcap != 0 {
@@ -168,7 +180,13 @@ fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
 #[cfg(feature = "std_detect_file_io")]
 fn auxv_from_buf(buf: &[usize; 64]) -> Result<AuxVec, ()> {
     // Targets with only AT_HWCAP:
-    #[cfg(any(target_arch = "aarch64", target_arch = "mips", target_arch = "mips64"))]
+    #[cfg(any(
+        target_arch = "aarch64",
+        target_arch = "riscv32",
+        target_arch = "riscv64",
+        target_arch = "mips",
+        target_arch = "mips64",
+    ))]
     {
         for el in buf.chunks(2) {
             match el[0] {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
index 91279b9ed76..48a5c972868 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
@@ -212,6 +212,38 @@ CPU revision	: 1";
         assert!(cpuinfo.field("Features").has("asimd"));
     }
 
+    const RISCV_RV64GC: &str = r"processor       : 0
+hart            : 3
+isa             : rv64imafdc
+mmu             : sv39
+uarch           : sifive,u74-mc
+
+processor       : 1
+hart            : 1
+isa             : rv64imafdc
+mmu             : sv39
+uarch           : sifive,u74-mc
+
+processor       : 2
+hart            : 2
+isa             : rv64imafdc
+mmu             : sv39
+uarch           : sifive,u74-mc
+
+processor       : 3
+hart            : 4
+isa             : rv64imafdc
+mmu             : sv39
+uarch           : sifive,u74-mc";
+
+    #[test]
+    fn riscv_rv64gc() {
+        let cpuinfo = CpuInfo::from_str(RISCV_RV64GC).unwrap();
+        assert_eq!(cpuinfo.field("isa"), "rv64imafdc");
+        assert_eq!(cpuinfo.field("mmu"), "sv39");
+        assert_eq!(cpuinfo.field("uarch"), "sifive,u74-mc");
+    }
+
     const POWER8E_POWERKVM: &str = r"processor       : 0
 cpu             : POWER8E (raw), altivec supported
 clock           : 3425.000000MHz
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
index 4b6776e982a..a49a72783d9 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
@@ -45,7 +45,10 @@ cfg_if::cfg_if! {
     } else if #[cfg(target_arch = "arm")] {
         mod arm;
         pub(crate) use self::arm::detect_features;
-    } else  if #[cfg(any(target_arch = "mips", target_arch = "mips64"))] {
+    } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] {
+        mod riscv;
+        pub(crate) use self::riscv::detect_features;
+    } else if #[cfg(any(target_arch = "mips", target_arch = "mips64"))] {
         mod mips;
         pub(crate) use self::mips::detect_features;
     } else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
new file mode 100644
index 00000000000..1ec06959a3c
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -0,0 +1,73 @@
+//! Run-time feature detection for RISC-V on Linux.
+
+use super::auxvec;
+use crate::detect::{bit, cache, Feature};
+
+/// Read list of supported features from the auxiliary vector.
+pub(crate) fn detect_features() -> cache::Initializer {
+    let mut value = cache::Initializer::default();
+    let enable_feature = |value: &mut cache::Initializer, feature, enable| {
+        if enable {
+            value.set(feature as u32);
+        }
+    };
+    let enable_features = |value: &mut cache::Initializer, feature_slice: &[Feature], enable| {
+        if enable {
+            for feature in feature_slice {
+                value.set(*feature as u32);
+            }
+        }
+    };
+
+    // The values are part of the platform-specific [asm/hwcap.h][hwcap]
+    //
+    // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/riscv/include/asm/hwcap.h
+    let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform
+    enable_feature(
+        &mut value,
+        Feature::a,
+        bit::test(auxv.hwcap, (b'a' - b'a').into()),
+    );
+    enable_feature(
+        &mut value,
+        Feature::c,
+        bit::test(auxv.hwcap, (b'c' - b'a').into()),
+    );
+    enable_features(
+        &mut value,
+        &[Feature::d, Feature::f, Feature::zicsr],
+        bit::test(auxv.hwcap, (b'd' - b'a').into()),
+    );
+    enable_features(
+        &mut value,
+        &[Feature::f, Feature::zicsr],
+        bit::test(auxv.hwcap, (b'f' - b'a').into()),
+    );
+    let has_i = bit::test(auxv.hwcap, (b'i' - b'a').into());
+    // If future RV128I is supported, implement with `enable_feature` here
+    #[cfg(target_pointer_width = "64")]
+    enable_feature(&mut value, Feature::rv64i, has_i);
+    #[cfg(target_pointer_width = "32")]
+    enable_feature(&mut value, Feature::rv32i, has_i);
+    #[cfg(target_pointer_width = "32")]
+    enable_feature(
+        &mut value,
+        Feature::rv32e,
+        bit::test(auxv.hwcap, (b'e' - b'a').into()),
+    );
+    enable_feature(
+        &mut value,
+        Feature::h,
+        bit::test(auxv.hwcap, (b'h' - b'a').into()),
+    );
+    enable_feature(
+        &mut value,
+        Feature::m,
+        bit::test(auxv.hwcap, (b'm' - b'a').into()),
+    );
+    // FIXME: Auxvec does not show supervisor feature support, but this mode may be useful
+    // to detect when Rust is used to write Linux kernel modules.
+    // These should be more than Auxvec way to detect supervisor features.
+
+    value
+}
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index a95af4145fd..f3557515bcf 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -6,6 +6,7 @@
 //! * `x86` and `x86_64`: [`is_x86_feature_detected`]
 //! * `arm`: [`is_arm_feature_detected`]
 //! * `aarch64`: [`is_aarch64_feature_detected`]
+//! * `riscv`: [`is_riscv_feature_detected`]
 //! * `mips`: [`is_mips_feature_detected`]
 //! * `mips64`: [`is_mips64_feature_detected`]
 //! * `powerpc`: [`is_powerpc_feature_detected`]
-- 
cgit 1.4.1-3-g733a5


From df24e2a0f810053760cc5663f6fdb5ca23459227 Mon Sep 17 00:00:00 2001
From: Frank Steffahn <frank.steffahn@stu.uni-kiel.de>
Date: Tue, 14 Dec 2021 16:17:57 +0100
Subject: Fix a bunch of typos

---
 .../stdarch/ci/docker/x86_64-linux-android/Dockerfile    |  2 +-
 library/stdarch/crates/core_arch/src/aarch64/prefetch.rs |  2 +-
 library/stdarch/crates/core_arch/src/arm/mod.rs          |  2 +-
 .../crates/core_arch/src/arm_shared/barrier/v8.rs        |  2 +-
 library/stdarch/crates/core_arch/src/wasm32/atomic.rs    |  4 ++--
 library/stdarch/crates/core_arch/src/wasm32/simd128.rs   | 10 +++++-----
 library/stdarch/crates/core_arch/src/x86/avx.rs          |  4 ++--
 library/stdarch/crates/core_arch/src/x86/avx512f.rs      | 16 ++++++++--------
 library/stdarch/crates/core_arch/src/x86/fxsr.rs         |  2 +-
 library/stdarch/crates/core_arch/src/x86/sse.rs          | 10 +++++-----
 library/stdarch/crates/core_arch/src/x86/sse41.rs        |  4 ++--
 library/stdarch/crates/core_arch/src/x86/sse42.rs        |  2 +-
 library/stdarch/crates/core_arch/src/x86/sse4a.rs        |  2 +-
 library/stdarch/crates/core_arch/src/x86_64/fxsr.rs      |  2 +-
 library/stdarch/crates/intrinsic-test/src/argument.rs    |  4 ++--
 library/stdarch/crates/intrinsic-test/src/intrinsic.rs   |  2 +-
 .../stdarch/crates/std_detect/src/detect/arch/aarch64.rs |  6 +++---
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs |  4 ++--
 library/stdarch/crates/stdarch-gen/neon.spec             |  6 +++---
 library/stdarch/crates/stdarch-gen/src/main.rs           |  8 ++++----
 20 files changed, 47 insertions(+), 47 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/ci/docker/x86_64-linux-android/Dockerfile b/library/stdarch/ci/docker/x86_64-linux-android/Dockerfile
index d52dd45b12b..c2830b15fb2 100644
--- a/library/stdarch/ci/docker/x86_64-linux-android/Dockerfile
+++ b/library/stdarch/ci/docker/x86_64-linux-android/Dockerfile
@@ -17,7 +17,7 @@ COPY android-install-ndk.sh /android/
 RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
 
 # We do not run x86_64-linux-android tests on an android emulator.
-# See ci/android-sysimage.sh for informations about how tests are run.
+# See ci/android-sysimage.sh for information about how tests are run.
 COPY android-sysimage.sh /android/
 RUN bash /android/android-sysimage.sh x86_64 x86_64-24_r07.zip
 
diff --git a/library/stdarch/crates/core_arch/src/aarch64/prefetch.rs b/library/stdarch/crates/core_arch/src/aarch64/prefetch.rs
index 687c3f39a0f..3ae0ef506b3 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/prefetch.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/prefetch.rs
@@ -66,7 +66,7 @@ pub const _PREFETCH_LOCALITY3: i32 = 3;
 #[rustc_legacy_const_generics(1, 2)]
 // FIXME: Replace this with the standard ACLE __pld/__pldx/__pli/__plix intrinsics
 pub unsafe fn _prefetch<const RW: i32, const LOCALITY: i32>(p: *const i8) {
-    // We use the `llvm.prefetch` instrinsic with `cache type` = 1 (data cache).
+    // We use the `llvm.prefetch` intrinsic with `cache type` = 1 (data cache).
     static_assert_imm1!(RW);
     static_assert_imm2!(LOCALITY);
     prefetch(p, RW, LOCALITY, 1);
diff --git a/library/stdarch/crates/core_arch/src/arm/mod.rs b/library/stdarch/crates/core_arch/src/arm/mod.rs
index 3c56ec7b1ea..efe0068d405 100644
--- a/library/stdarch/crates/core_arch/src/arm/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm/mod.rs
@@ -93,7 +93,7 @@ pub unsafe fn udf() -> ! {
 /// This provides a hint to debugging and related systems. The argument must be
 /// a constant integer from 0 to 15 inclusive. See implementation documentation
 /// for the effect (if any) of this instruction and the meaning of the
-/// argument. This is available only when compliling for AArch32.
+/// argument. This is available only when compiling for AArch32.
 // Section 10.1 of ACLE says that the supported arches are: 7, 7-M
 // "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and
 // executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/barrier/v8.rs b/library/stdarch/crates/core_arch/src/arm_shared/barrier/v8.rs
index 2951a5a6700..db15da805d6 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/barrier/v8.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/barrier/v8.rs
@@ -16,7 +16,7 @@ pub struct NSHLD;
 
 dmb_dsb!(NSHLD);
 
-/// Outher Shareable is the required shareability domain, reads are the required
+/// Outer Shareable is the required shareability domain, reads are the required
 /// access type
 pub struct OSHLD;
 
diff --git a/library/stdarch/crates/core_arch/src/wasm32/atomic.rs b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
index 4a3a5c67458..f79e946acc7 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
@@ -16,7 +16,7 @@ extern "C" {
 /// pointed to by `ptr` is equal to `expression` (performing this action
 /// atomically).
 ///
-/// The argument `timeout_ns` is a maxinum number of nanoseconds the calling
+/// The argument `timeout_ns` is a maximum number of nanoseconds the calling
 /// thread will be blocked for, if it blocks. If the timeout is negative then
 /// the calling thread will be blocked forever.
 ///
@@ -46,7 +46,7 @@ pub unsafe fn memory_atomic_wait32(ptr: *mut i32, expression: i32, timeout_ns: i
 /// pointed to by `ptr` is equal to `expression` (performing this action
 /// atomically).
 ///
-/// The argument `timeout_ns` is a maxinum number of nanoseconds the calling
+/// The argument `timeout_ns` is a maximum number of nanoseconds the calling
 /// thread will be blocked for, if it blocks. If the timeout is negative then
 /// the calling thread will be blocked forever.
 ///
diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
index f2c0a25d291..b93295602ab 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@@ -33,7 +33,7 @@ types! {
     /// * four 32-bit floats (`f32`)
     /// * two 64-bit floats (`f64`)
     ///
-    /// The `v128` type in Rust is intended to be quite analagous to the `v128`
+    /// The `v128` type in Rust is intended to be quite analogous to the `v128`
     /// type in WebAssembly. Operations on `v128` can only be performed with the
     /// functions in this module.
     // N.B., internals here are arbitrary.
@@ -2305,7 +2305,7 @@ pub fn i8x16_abs(a: v128) -> v128 {
     }
 }
 
-/// Negates a 128-bit vectors intepreted as sixteen 8-bit signed integers
+/// Negates a 128-bit vectors interpreted as sixteen 8-bit signed integers
 #[inline]
 #[cfg_attr(test, assert_instr(i8x16.neg))]
 #[target_feature(enable = "simd128")]
@@ -2602,7 +2602,7 @@ pub fn i16x8_abs(a: v128) -> v128 {
     }
 }
 
-/// Negates a 128-bit vectors intepreted as eight 16-bit signed integers
+/// Negates a 128-bit vectors interpreted as eight 16-bit signed integers
 #[inline]
 #[cfg_attr(test, assert_instr(i16x8.neg))]
 #[target_feature(enable = "simd128")]
@@ -3090,7 +3090,7 @@ pub fn i32x4_abs(a: v128) -> v128 {
     }
 }
 
-/// Negates a 128-bit vectors intepreted as four 32-bit signed integers
+/// Negates a 128-bit vectors interpreted as four 32-bit signed integers
 #[inline]
 #[cfg_attr(test, assert_instr(i32x4.neg))]
 #[target_feature(enable = "simd128")]
@@ -3472,7 +3472,7 @@ pub fn i64x2_abs(a: v128) -> v128 {
     }
 }
 
-/// Negates a 128-bit vectors intepreted as two 64-bit signed integers
+/// Negates a 128-bit vectors interpreted as two 64-bit signed integers
 #[inline]
 #[cfg_attr(test, assert_instr(i64x2.neg))]
 #[target_feature(enable = "simd128")]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs
index 8e0af72ca54..07de99acb3e 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx.rs
@@ -53,7 +53,7 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-// FIXME: Should be 'vandpd' instuction.
+// FIXME: Should be 'vandpd' instruction.
 // See https://github.com/rust-lang/stdarch/issues/71
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
@@ -83,7 +83,7 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-// FIXME: should be `vorpd` instuction.
+// FIXME: should be `vorpd` instruction.
 // See <https://github.com/rust-lang/stdarch/issues/71>.
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index 11c021f1715..97c6f6c4d67 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -19479,7 +19479,7 @@ pub unsafe fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) ->
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex_epi64&expand=4208)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //shoud be vpermq
+#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
     static_assert_imm8!(MASK);
@@ -19535,7 +19535,7 @@ pub unsafe fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m51
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex_epi64&expand=4205)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //shoud be vpermq
+#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
     static_assert_imm8!(MASK);
@@ -19587,7 +19587,7 @@ pub unsafe fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m25
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex_pd&expand=4214)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //shoud be vpermpd
+#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
     static_assert_imm8!(MASK);
@@ -19612,7 +19612,7 @@ pub unsafe fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutex_pd&expand=4212)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //shoud be vpermpd
+#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn _mm512_mask_permutex_pd<const MASK: i32>(
     src: __m512d,
@@ -19628,7 +19628,7 @@ pub unsafe fn _mm512_mask_permutex_pd<const MASK: i32>(
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutex_pd&expand=4213)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //shoud be vpermpd
+#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
     let r = _mm512_permutex_pd::<MASK>(a);
@@ -19641,7 +19641,7 @@ pub unsafe fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d)
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex_pd&expand=4211)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //shoud be vpermpd
+#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
     static_assert_imm8!(MASK);
@@ -19662,7 +19662,7 @@ pub unsafe fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_permutex_pd&expand=4209)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //shoud be vpermpd
+#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn _mm256_mask_permutex_pd<const MASK: i32>(
     src: __m256d,
@@ -19679,7 +19679,7 @@ pub unsafe fn _mm256_mask_permutex_pd<const MASK: i32>(
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_permutex_pd&expand=4210)
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //shoud be vpermpd
+#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
 #[rustc_legacy_const_generics(2)]
 pub unsafe fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
     static_assert_imm8!(MASK);
diff --git a/library/stdarch/crates/core_arch/src/x86/fxsr.rs b/library/stdarch/crates/core_arch/src/x86/fxsr.rs
index 4a39bb8de93..8ea1bfab74a 100644
--- a/library/stdarch/crates/core_arch/src/x86/fxsr.rs
+++ b/library/stdarch/crates/core_arch/src/x86/fxsr.rs
@@ -1,4 +1,4 @@
-//! FXSR floating-point context fast save and restor.
+//! FXSR floating-point context fast save and restore.
 
 #[cfg(test)]
 use stdarch_test::assert_instr;
diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index f7a46b198ed..2c4295ef61d 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -1412,19 +1412,19 @@ pub unsafe fn _mm_getcsr() -> u32 {
 /// * `_MM_EXCEPT_DENORM`: An operation attempted to operate on a denormalized
 ///   number. Mainly this can cause loss of precision.
 ///
-/// * `_MM_EXCEPT_DIV_ZERO`: Division by zero occured.
+/// * `_MM_EXCEPT_DIV_ZERO`: Division by zero occurred.
 ///
-/// * `_MM_EXCEPT_OVERFLOW`: A numeric overflow exception occured, i.e., a
+/// * `_MM_EXCEPT_OVERFLOW`: A numeric overflow exception occurred, i.e., a
 /// result was too large to be represented (e.g., an `f32` with absolute
 /// value
 ///   greater than `2^128`).
 ///
-/// * `_MM_EXCEPT_UNDERFLOW`: A numeric underflow exception occured, i.e., a
+/// * `_MM_EXCEPT_UNDERFLOW`: A numeric underflow exception occurred, i.e., a
 /// result was too small to be represented in a normalized way (e.g., an
 /// `f32`
 ///   with absulte value smaller than `2^-126`.)
 ///
-/// * `_MM_EXCEPT_INEXACT`: An inexact-result exception occured (a.k.a.
+/// * `_MM_EXCEPT_INEXACT`: An inexact-result exception occurred (a.k.a.
 ///   precision exception). This means some precision was lost due to rounding.
 ///   For example, the fraction `1/3` cannot be represented accurately in a
 ///   32 or 64 bit float and computing it would cause this exception to be
@@ -1752,7 +1752,7 @@ pub const _MM_HINT_ET1: i32 = 6;
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_prefetch<const STRATEGY: i32>(p: *const i8) {
-    // We use the `llvm.prefetch` instrinsic with `cache type` = 1 (data cache).
+    // We use the `llvm.prefetch` intrinsic with `cache type` = 1 (data cache).
     // `locality` and `rw` are based on our `STRATEGY`.
     prefetch(p, (STRATEGY >> 2) & 1, STRATEGY & 3, 1);
 }
diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs
index 73bb4bb11f5..7c59f2702fd 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse41.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs
@@ -8,7 +8,7 @@ use crate::{
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-// SSE4 rounding constans
+// SSE4 rounding constants
 /// round to nearest
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
@@ -830,7 +830,7 @@ pub unsafe fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m12
 /// using the `ROUNDING` parameter, store the result as a single-precision
 /// floating-point element in the lower element of the intrinsic result,
 /// and copies the upper 3 packed elements from `a` to the upper elements
-/// of the instrinsic result.
+/// of the intrinsic result.
 /// Rounding is done according to the rounding parameter, which can be one of:
 ///
 /// ```
diff --git a/library/stdarch/crates/core_arch/src/x86/sse42.rs b/library/stdarch/crates/core_arch/src/x86/sse42.rs
index 3aed5e7dde7..fb3a22b5192 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse42.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse42.rs
@@ -146,7 +146,7 @@ pub unsafe fn _mm_cmpistrm<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
 /// # }
 /// ```
 ///
-/// The `_mm_cmpistri` intrinsic may also be used to find the existance of
+/// The `_mm_cmpistri` intrinsic may also be used to find the existence of
 /// one or more of a given set of characters in the haystack.
 ///
 /// ```
diff --git a/library/stdarch/crates/core_arch/src/x86/sse4a.rs b/library/stdarch/crates/core_arch/src/x86/sse4a.rs
index e6345d0da90..976c907cb29 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse4a.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse4a.rs
@@ -32,7 +32,7 @@ extern "C" {
 /// If the length is zero, it is interpreted as `64`. If the length and index
 /// are zero, the lower 64 bits of `x` are extracted.
 ///
-/// If `length == 0 && index > 0` or `lenght + index > 64` the result is
+/// If `length == 0 && index > 0` or `length + index > 64` the result is
 /// undefined.
 #[inline]
 #[target_feature(enable = "sse4a")]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
index 4274b4c4767..d02702046e4 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
@@ -1,4 +1,4 @@
-//! FXSR floating-point context fast save and restor.
+//! FXSR floating-point context fast save and restore.
 
 #[cfg(test)]
 use stdarch_test::assert_instr;
diff --git a/library/stdarch/crates/intrinsic-test/src/argument.rs b/library/stdarch/crates/intrinsic-test/src/argument.rs
index 9b02970f3fa..f4cb77992a7 100644
--- a/library/stdarch/crates/intrinsic-test/src/argument.rs
+++ b/library/stdarch/crates/intrinsic-test/src/argument.rs
@@ -55,7 +55,7 @@ pub struct ArgumentList {
 }
 
 impl ArgumentList {
-    /// Converts the argument list into the call paramters for a C function call.
+    /// Converts the argument list into the call parameters for a C function call.
     /// e.g. this would generate something like `a, &b, c`
     pub fn as_call_param_c(&self) -> String {
         self.args
@@ -70,7 +70,7 @@ impl ArgumentList {
             .join(", ")
     }
 
-    /// Converts the argument list into the call paramters for a Rust function.
+    /// Converts the argument list into the call parameters for a Rust function.
     /// e.g. this would generate something like `a, b, c`
     pub fn as_call_param_rust(&self) -> String {
         self.args
diff --git a/library/stdarch/crates/intrinsic-test/src/intrinsic.rs b/library/stdarch/crates/intrinsic-test/src/intrinsic.rs
index ff8151a8cf9..2b7130440f6 100644
--- a/library/stdarch/crates/intrinsic-test/src/intrinsic.rs
+++ b/library/stdarch/crates/intrinsic-test/src/intrinsic.rs
@@ -8,7 +8,7 @@ pub struct Intrinsic {
     /// The function name of this intrinsic.
     pub name: String,
 
-    /// Any arguments for this intrinsinc.
+    /// Any arguments for this intrinsic.
     pub arguments: ArgumentList,
 
     /// The return type of this intrinsic.
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 81979ba9672..2f90555428c 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -8,7 +8,7 @@ features! {
     /// Currently most features are only supported on linux-based platforms.
     ///
     /// This macro takes one argument which is a string literal of the feature being tested for.
-    /// The feature names are mostly taken from their FEAT_* definitiions in the [ARM Architecture
+    /// The feature names are mostly taken from their FEAT_* definitions in the [ARM Architecture
     /// Reference Manual][docs].
     ///
     /// ## Supported arguments
@@ -104,9 +104,9 @@ features! {
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] pauth: "pauth";
     /// FEAT_PAuth (pointer authentication)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dpb: "dpb";
-    /// FEAT_DPB (aka dcpop - data cache clean to point of persistance)
+    /// FEAT_DPB (aka dcpop - data cache clean to point of persistence)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dpb2: "dpb2";
-    /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistance)
+    /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve2: "sve2";
     /// FEAT_SVE2 (Scalable Vector Extension 2)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve2_aes: "sve2-aes";
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index f2df1fbcdf3..22dbb98412f 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -3,7 +3,7 @@
 //! The features are detected using the `detect_features` function below.
 //! This function uses the CPUID instruction to read the feature flags from the
 //! CPU and encodes them in a `usize` where each bit position represents
-//! whether a feature is available (bit is set) or unavaiable (bit is cleared).
+//! whether a feature is available (bit is set) or unavailable (bit is cleared).
 //!
 //! The enum `Feature` is used to map bit positions to feature names, and the
 //! the `__crate::detect::check_for!` macro is used to map string literals (e.g.,
@@ -178,7 +178,7 @@ features! {
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] popcnt: "popcnt";
     /// POPCNT (Population Count)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fxsr: "fxsr";
-    /// FXSR (Floating-point context fast save and restor)
+    /// FXSR (Floating-point context fast save and restore)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsave: "xsave";
     /// XSAVE (Save Processor Extended States)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsaveopt: "xsaveopt";
diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec
index 9e207fcc912..c4a720aa35c 100644
--- a/library/stdarch/crates/stdarch-gen/neon.spec
+++ b/library/stdarch/crates/stdarch-gen/neon.spec
@@ -56,7 +56,7 @@
 // MAX - maximal value proper to overflow
 //
 // # validate <values>
-// Validates a and b aginst the expected result of the test.
+// Validates a and b against the expected result of the test.
 // The special values 'TRUE' and 'FALSE' can be used to
 // represent the correct NEON representation of true or
 // false values. It too gets scaled to the type.
@@ -4685,7 +4685,7 @@ link-arm = vmaxs._EXT_
 link-aarch64 = fmax._EXT_
 generate float*_t
 
-/// Floating-point Maximun Number (vector)
+/// Floating-point Maximum Number (vector)
 name = vmaxnm
 a = 1.0, 2.0, 3.0, -4.0
 b = 8.0, 16.0, -1.0, 6.0
@@ -4789,7 +4789,7 @@ link-arm = vmins._EXT_
 link-aarch64 = fmin._EXT_
 generate float*_t
 
-/// Floating-point Minimun Number (vector)
+/// Floating-point Minimum Number (vector)
 name = vminnm
 a = 1.0, 2.0, 3.0, -4.0
 b = 8.0, 16.0, -1.0, 6.0
diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs
index 0295a2296ce..39b41269463 100644
--- a/library/stdarch/crates/stdarch-gen/src/main.rs
+++ b/library/stdarch/crates/stdarch-gen/src/main.rs
@@ -2806,7 +2806,7 @@ fn get_call(
         let s = &params[i];
         if s.starts_with('{') {
             let mut sub_fn = String::new();
-            let mut paranthes = 0;
+            let mut parentheses = 0;
             while i < params.len() {
                 if !sub_fn.is_empty() {
                     sub_fn.push_str(", ");
@@ -2815,19 +2815,19 @@ fn get_call(
                 let l = params[i].len();
                 for j in 0..l {
                     if &params[i][j..j + 1] == "{" {
-                        paranthes += 1;
+                        parentheses += 1;
                     } else {
                         break;
                     }
                 }
                 for j in 0..l {
                     if &params[i][l - j - 1..l - j] == "}" {
-                        paranthes -= 1;
+                        parentheses -= 1;
                     } else {
                         break;
                     }
                 }
-                if paranthes == 0 {
+                if parentheses == 0 {
                     break;
                 }
                 i += 1;
-- 
cgit 1.4.1-3-g733a5


From 59e7156f6e149ec857b93f1c461262c19adf77c5 Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Sun, 19 Dec 2021 15:43:19 +0100
Subject: Remove asm feature which is now stable (#1269)

---
 library/stdarch/crates/core_arch/src/lib.rs  | 1 -
 library/stdarch/crates/std_detect/src/lib.rs | 1 -
 2 files changed, 2 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index cce743a3e89..c73e309e72d 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -4,7 +4,6 @@
 #![allow(unused_features)]
 #![deny(rust_2018_idioms)]
 #![feature(
-    asm,
     custom_inner_attributes,
     link_llvm_intrinsics,
     platform_intrinsics,
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index f3557515bcf..a17bd40e04f 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -17,7 +17,6 @@
 #![deny(rust_2018_idioms)]
 #![allow(clippy::shadow_reuse)]
 #![deny(clippy::missing_inline_in_public_items)]
-#![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))]
 #![cfg_attr(test, allow(unused_imports))]
 #![cfg_attr(feature = "std_detect_file_io", feature(vec_spare_capacity))]
 #![no_std]
-- 
cgit 1.4.1-3-g733a5


From f5c59b3514fd634ad22a25919bc8f2e25613c980 Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Mon, 17 Jan 2022 21:05:59 +0000
Subject: Handle stabilization of vec_spare_capacity

---
 library/stdarch/crates/std_detect/src/lib.rs | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index a17bd40e04f..bf9c707cb62 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -18,14 +18,8 @@
 #![allow(clippy::shadow_reuse)]
 #![deny(clippy::missing_inline_in_public_items)]
 #![cfg_attr(test, allow(unused_imports))]
-#![cfg_attr(feature = "std_detect_file_io", feature(vec_spare_capacity))]
 #![no_std]
 
-// rust-lang/rust#83888: removing `extern crate` gives an error that `vec_spare_capacity` is unknown
-#[cfg_attr(feature = "std_detect_file_io", allow(unused_extern_crates))]
-#[cfg(feature = "std_detect_file_io")]
-extern crate alloc;
-
 #[cfg(test)]
 #[macro_use]
 extern crate std;
-- 
cgit 1.4.1-3-g733a5


From efad3ead528492f1ef9a63d6c582446d8d72f8a1 Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Mon, 24 Jan 2022 01:32:45 +0000
Subject: Fix std_detect_file_io

---
 library/stdarch/crates/std_detect/src/lib.rs | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index bf9c707cb62..c0e0de0dd5e 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -24,6 +24,11 @@
 #[macro_use]
 extern crate std;
 
+// rust-lang/rust#83888: removing `extern crate` gives an error that `vec_spare>
+#[cfg_attr(feature = "std_detect_file_io", allow(unused_extern_crates))]
+#[cfg(feature = "std_detect_file_io")]
+extern crate alloc;
+
 #[doc(hidden)]
 #[unstable(feature = "stdsimd", issue = "27731")]
 pub mod detect;
-- 
cgit 1.4.1-3-g733a5


From 3030730d048d1aaba2a8debafa6a77943a645462 Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Fri, 28 Jan 2022 09:49:43 +0000
Subject: Emit the full is_{arch}_feature_detected! macro on all arches when
 building docs (#1276)

---
 .../stdarch/crates/std_detect/src/detect/mod.rs    | 46 +++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index e4eea72d458..7937417f526 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -19,6 +19,7 @@
 
 use cfg_if::cfg_if;
 
+#[cfg(not(doc))]
 #[macro_use]
 mod error_macros;
 
@@ -26,7 +27,50 @@ mod error_macros;
 mod macros;
 
 cfg_if! {
-    if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
+    if #[cfg(doc)] {
+        // When building docs, emit the full macro for all supported arches.
+        #[allow(dead_code)]
+        mod arch {
+            #[path = "x86.rs"]
+            #[macro_use]
+            mod x86;
+            #[path = "arm.rs"]
+            #[macro_use]
+            mod arm;
+            #[path = "aarch64.rs"]
+            #[macro_use]
+            mod aarch64;
+            #[path = "riscv.rs"]
+            #[macro_use]
+            mod riscv;
+            #[path = "powerpc.rs"]
+            #[macro_use]
+            mod powerpc;
+            #[path = "powerpc64.rs"]
+            #[macro_use]
+            mod powerpc64;
+            #[path = "mips.rs"]
+            #[macro_use]
+            mod mips;
+            #[path = "mips64.rs"]
+            #[macro_use]
+            mod mips64;
+
+            #[doc(hidden)]
+            pub(crate) enum Feature {
+                Null
+            }
+            #[doc(hidden)]
+            pub mod __is_feature_detected {}
+
+            impl Feature {
+                #[doc(hidden)]
+                pub(crate) fn from_str(_s: &str) -> Result<Feature, ()> { Err(()) }
+                #[doc(hidden)]
+                pub(crate) fn to_str(self) -> &'static str { "" }
+            }
+        }
+    } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
         #[path = "arch/x86.rs"]
         #[macro_use]
         mod arch;
-- 
cgit 1.4.1-3-g733a5


From c52d7f5b6631e60eeccdb6557a6f8c54c0981709 Mon Sep 17 00:00:00 2001
From: Adam Gemmell <adam.gemmell@arm.com>
Date: Wed, 9 Feb 2022 19:33:25 +0000
Subject: Stabilise `is_aarch64_feature_detected!` under `simd_aarch64` feature
 (#1239)

---
 .../crates/std_detect/src/detect/arch/aarch64.rs   | 80 +++++++++++-----------
 1 file changed, 40 insertions(+), 40 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 2f90555428c..5c3f604bea4 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -55,7 +55,7 @@ features! {
     /// * `"sm4"` - FEAT_SM3 & FEAT_SM4
     ///
     /// [docs]: https://developer.arm.com/documentation/ddi0487/latest
-    #[unstable(feature = "stdsimd", issue = "27731")]
+    #[stable(feature = "simd_aarch64", since = "1.60.0")]
     @BIND_FEATURE_NAME: "asimd"; "neon";
     @NO_RUNTIME_DETECTION: "ras";
     @NO_RUNTIME_DETECTION: "v8.1a";
@@ -65,84 +65,84 @@ features! {
     @NO_RUNTIME_DETECTION: "v8.5a";
     @NO_RUNTIME_DETECTION: "v8.6a";
     @NO_RUNTIME_DETECTION: "v8.7a";
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] asimd: "neon";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] asimd: "neon";
     /// FEAT_AdvSIMD (Advanced SIMD/NEON)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] pmull: "pmull";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pmull: "pmull";
     /// FEAT_PMULL (Polynomial Multiply)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] fp: "fp";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp: "fp";
     /// FEAT_FP (Floating point support)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] fp16: "fp16";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp16: "fp16";
     /// FEAT_FP16 (Half-float support)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve: "sve";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve: "sve";
     /// FEAT_SVE (Scalable Vector Extension)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crc: "crc";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] crc: "crc";
     /// FEAT_CRC32 (Cyclic Redundancy Check)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] lse: "lse";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse: "lse";
     /// FEAT_LSE (Large System Extension - atomics)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] lse2: "lse2";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse2: "lse2";
     /// FEAT_LSE2 (unaligned and register-pair atomics)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rdm: "rdm";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rdm: "rdm";
     /// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rcpc: "rcpc";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc";
     /// FEAT_LRCPC (Release consistent Processor consistent)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rcpc2: "rcpc2";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc2: "rcpc2";
     /// FEAT_LRCPC2 (RCPC with immediate offsets)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dotprod: "dotprod";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dotprod: "dotprod";
     /// FEAT_DotProd (Vector Dot-Product - ASIMDDP)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] tme: "tme";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] tme: "tme";
     /// FEAT_TME (Transactional Memory Extensions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] fhm: "fhm";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fhm: "fhm";
     /// FEAT_FHM (fp16 multiplication instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dit: "dit";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dit: "dit";
     /// FEAT_DIT (Data Independent Timing instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] flagm: "flagm";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] flagm: "flagm";
     /// FEAT_FLAGM (flag manipulation instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] ssbs: "ssbs";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs";
     /// FEAT_SSBS (speculative store bypass safe)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sb: "sb";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb";
     /// FEAT_SB (speculation barrier)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] pauth: "pauth";
     /// FEAT_PAuth (pointer authentication)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dpb: "dpb";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb: "dpb";
     /// FEAT_DPB (aka dcpop - data cache clean to point of persistence)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dpb2: "dpb2";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb2: "dpb2";
     /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve2: "sve2";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2: "sve2";
     /// FEAT_SVE2 (Scalable Vector Extension 2)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve2_aes: "sve2-aes";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes";
     /// FEAT_SVE_AES (SVE2 AES crypto)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve2_sm4: "sve2-sm4";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sm4: "sve2-sm4";
     /// FEAT_SVE_SM4 (SVE2 SM4 crypto)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve2_sha3: "sve2-sha3";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3";
     /// FEAT_SVE_SHA3 (SVE2 SHA3 crypto)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sve2_bitperm: "sve2-bitperm";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm";
     /// FEAT_SVE_BitPerm (SVE2 bit permutation instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] frintts: "frintts";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] frintts: "frintts";
     /// FEAT_FRINTTS (float to integer rounding instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] i8mm: "i8mm";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] i8mm: "i8mm";
     /// FEAT_I8MM (integer matrix multiplication, plus ASIMD support)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] f32mm: "f32mm";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f32mm: "f32mm";
     /// FEAT_F32MM (single-precision matrix multiplication)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] f64mm: "f64mm";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f64mm: "f64mm";
     /// FEAT_F64MM (double-precision matrix multiplication)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] bf16: "bf16";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bf16: "bf16";
     /// FEAT_BF16 (BFloat16 type, plus MM instructions, plus ASIMD support)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rand: "rand";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rand: "rand";
     /// FEAT_RNG (Random Number Generator)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] bti: "bti";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bti: "bti";
     /// FEAT_BTI (Branch Target Identification)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] mte: "mte";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] mte: "mte";
     /// FEAT_MTE (Memory Tagging Extension)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] jsconv: "jsconv";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] jsconv: "jsconv";
     /// FEAT_JSCVT (JavaScript float conversion instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] fcma: "fcma";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fcma: "fcma";
     /// FEAT_FCMA (float complex number operations)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] aes: "aes";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] aes: "aes";
     /// FEAT_AES (AES instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sha2: "sha2";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha2: "sha2";
     /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sha3: "sha3";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha3: "sha3";
     /// FEAT_SHA512 & FEAT_SHA3 (SHA2-512 & SHA3 instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sm4: "sm4";
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sm4: "sm4";
     /// FEAT_SM3 & FEAT_SM4 (SM3 & SM4 instructions)
 }
-- 
cgit 1.4.1-3-g733a5


From 3362d525c5cac6692a51543fd6c3a025b98547a5 Mon Sep 17 00:00:00 2001
From: Adam Gemmell <adam.gemmell@arm.com>
Date: Thu, 10 Feb 2022 14:59:40 +0000
Subject: Split aarch64 `pauth` feature into `paca` and `pacg` (#1259)

---
 library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs     | 9 ++++++---
 library/stdarch/crates/std_detect/src/detect/os/aarch64.rs       | 4 ++++
 library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs | 4 ++--
 library/stdarch/crates/std_detect/tests/cpu-detection.rs         | 3 ++-
 4 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 5c3f604bea4..5d2fb6d4d5a 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -31,7 +31,8 @@ features! {
     /// * `"flagm"` - FEAT_FLAGM
     /// * `"ssbs"` - FEAT_SSBS
     /// * `"sb"` - FEAT_SB
-    /// * `"pauth"` - FEAT_PAuth
+    /// * `"paca"` - FEAT_PAuth (address authentication)
+    /// * `"pacg"` - FEAT_Pauth (generic authentication)
     /// * `"dpb"` - FEAT_DPB
     /// * `"dpb2"` - FEAT_DPB2
     /// * `"sve2"` - FEAT_SVE2
@@ -101,8 +102,10 @@ features! {
     /// FEAT_SSBS (speculative store bypass safe)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb";
     /// FEAT_SB (speculation barrier)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] pauth: "pauth";
-    /// FEAT_PAuth (pointer authentication)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] paca: "paca";
+    /// FEAT_PAuth (address authentication)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pacg: "pacg";
+    /// FEAT_PAuth (generic authentication)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb: "dpb";
     /// FEAT_DPB (aka dcpop - data cache clean to point of persistence)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb2: "dpb2";
diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
index 0fd2702731f..169c51b961c 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -87,7 +87,11 @@ pub(crate) fn detect_features() -> cache::Initializer {
             );
         }
 
+        // Check for either APA or API field
+        enable_feature(Feature::paca, bits_shift(aa64isar1, 11, 4) >= 1);
         enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1);
+        // Check for either GPA or GPI field
+        enable_feature(Feature::pacg, bits_shift(aa64isar1, 31, 24) >= 1);
     }
 
     value
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index 4dc63620578..b6a2e5218cb 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -230,8 +230,8 @@ impl AtHwcap {
             enable_feature(Feature::flagm, self.flagm);
             enable_feature(Feature::ssbs, self.ssbs);
             enable_feature(Feature::sb, self.sb);
-            // FEAT_PAuth provides both paca & pacg
-            enable_feature(Feature::pauth, self.paca && self.pacg);
+            enable_feature(Feature::paca, self.paca);
+            enable_feature(Feature::pacg, self.pacg);
             enable_feature(Feature::dpb, self.dcpop);
             enable_feature(Feature::dpb2, self.dcpodp);
             enable_feature(Feature::rand, self.rng);
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index adbe4fa9a77..ca8bf28f448 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -55,7 +55,8 @@ fn aarch64_linux() {
     println!("flagm: {}", is_aarch64_feature_detected!("flagm"));
     println!("ssbs: {}", is_aarch64_feature_detected!("ssbs"));
     println!("sb: {}", is_aarch64_feature_detected!("sb"));
-    println!("pauth: {}", is_aarch64_feature_detected!("pauth"));
+    println!("paca: {}", is_aarch64_feature_detected!("paca"));
+    println!("pacg: {}", is_aarch64_feature_detected!("pacg"));
     println!("dpb: {}", is_aarch64_feature_detected!("dpb"));
     println!("dpb2: {}", is_aarch64_feature_detected!("dpb2"));
     println!("sve2: {}", is_aarch64_feature_detected!("sve2"));
-- 
cgit 1.4.1-3-g733a5


From bf3a73b3e87655c788fe43583290007b2abd52b9 Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Thu, 17 Feb 2022 12:49:50 +0000
Subject: Avoid the use of #[cfg(doc)] in std_detect (#1283)

Co-authored-by: bjorn3 <bjorn3@users.noreply.github.com>
---
 .../crates/std_detect/src/detect/arch/aarch64.rs   |   1 +
 .../crates/std_detect/src/detect/arch/arm.rs       |   1 +
 .../crates/std_detect/src/detect/arch/mips.rs      |   1 +
 .../crates/std_detect/src/detect/arch/mips64.rs    |   1 +
 .../crates/std_detect/src/detect/arch/mod.rs       |  56 +++++++
 .../crates/std_detect/src/detect/arch/powerpc.rs   |   1 +
 .../crates/std_detect/src/detect/arch/powerpc64.rs |   1 +
 .../crates/std_detect/src/detect/arch/riscv.rs     |   1 +
 .../crates/std_detect/src/detect/arch/x86.rs       |   1 +
 .../crates/std_detect/src/detect/error_macros.rs   | 171 ---------------------
 .../stdarch/crates/std_detect/src/detect/macros.rs |  50 ++++++
 .../stdarch/crates/std_detect/src/detect/mod.rs    | 101 +-----------
 12 files changed, 115 insertions(+), 271 deletions(-)
 create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/mod.rs
 delete mode 100644 library/stdarch/crates/std_detect/src/detect/error_macros.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 5d2fb6d4d5a..f32f961ae52 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -2,6 +2,7 @@
 
 features! {
     @TARGET: aarch64;
+    @CFG: target_arch = "aarch64";
     @MACRO_NAME: is_aarch64_feature_detected;
     @MACRO_ATTRS:
     /// This macro tests, at runtime, whether an `aarch64` feature is enabled on aarch64 platforms.
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index 9e7dda094fe..897dc314c79 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -2,6 +2,7 @@
 
 features! {
     @TARGET: arm;
+    @CFG: target_arch = "arm";
     @MACRO_NAME: is_arm_feature_detected;
     @MACRO_ATTRS:
     /// Checks if `arm` feature is enabled.
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
index ada81b83ec5..ae27d0093c6 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
@@ -2,6 +2,7 @@
 
 features! {
     @TARGET: mips;
+    @CFG: target_arch = "mips";
     @MACRO_NAME: is_mips_feature_detected;
     @MACRO_ATTRS:
     /// Checks if `mips` feature is enabled.
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
index 6a0bb159ba7..7182ec2da4e 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
@@ -2,6 +2,7 @@
 
 features! {
     @TARGET: mips64;
+    @CFG: target_arch = "mips64";
     @MACRO_NAME: is_mips64_feature_detected;
     @MACRO_ATTRS:
     /// Checks if `mips64` feature is enabled.
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
new file mode 100644
index 00000000000..81a1f23e873
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
@@ -0,0 +1,56 @@
+#![allow(dead_code)]
+
+use cfg_if::cfg_if;
+
+// Export the macros for all supported architectures.
+#[macro_use]
+mod x86;
+#[macro_use]
+mod arm;
+#[macro_use]
+mod aarch64;
+#[macro_use]
+mod riscv;
+#[macro_use]
+mod powerpc;
+#[macro_use]
+mod powerpc64;
+#[macro_use]
+mod mips;
+#[macro_use]
+mod mips64;
+
+cfg_if! {
+    if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
+        pub use x86::*;
+    } else if #[cfg(target_arch = "arm")] {
+        pub use arm::*;
+    } else if #[cfg(target_arch = "aarch64")] {
+        pub use aarch64::*;
+    } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] {
+        pub use riscv::*;
+    } else if #[cfg(target_arch = "powerpc")] {
+        pub use powerpc::*;
+    } else if #[cfg(target_arch = "powerpc64")] {
+        pub use powerpc64::*;
+    } else if #[cfg(target_arch = "mips")] {
+        pub use mips::*;
+    } else if #[cfg(target_arch = "mips64")] {
+        pub use mips64::*;
+    } else {
+        // Unimplemented architecture:
+        #[doc(hidden)]
+        pub(crate) enum Feature {
+            Null
+        }
+        #[doc(hidden)]
+        pub mod __is_feature_detected {}
+
+        impl Feature {
+            #[doc(hidden)]
+            pub(crate) fn from_str(_s: &str) -> Result<Feature, ()> { Err(()) }
+            #[doc(hidden)]
+            pub(crate) fn to_str(self) -> &'static str { "" }
+        }
+    }
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
index 44bd7f337f6..d135cd95de2 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
@@ -2,6 +2,7 @@
 
 features! {
     @TARGET: powerpc;
+    @CFG: target_arch = "powerpc";
     @MACRO_NAME: is_powerpc_feature_detected;
     @MACRO_ATTRS:
     /// Checks if `powerpc` feature is enabled.
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
index 17e4d958bf1..773afd6ceb4 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
@@ -2,6 +2,7 @@
 
 features! {
     @TARGET: powerpc64;
+    @CFG: target_arch = "powerpc64";
     @MACRO_NAME: is_powerpc64_feature_detected;
     @MACRO_ATTRS:
     /// Checks if `powerpc` feature is enabled.
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index ac43b2c4ddc..5ea36e7c1c9 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -2,6 +2,7 @@
 
 features! {
     @TARGET: riscv;
+    @CFG: any(target_arch = "riscv32", target_arch = "riscv64");
     @MACRO_NAME: is_riscv_feature_detected;
     @MACRO_ATTRS:
     /// A macro to test at *runtime* whether instruction sets are available on
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 22dbb98412f..893e1a887f5 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -17,6 +17,7 @@
 
 features! {
     @TARGET: x86;
+    @CFG: any(target_arch = "x86", target_arch = "x86_64");
     @MACRO_NAME: is_x86_feature_detected;
     @MACRO_ATTRS:
     /// A macro to test at *runtime* whether a CPU feature is available on
diff --git a/library/stdarch/crates/std_detect/src/detect/error_macros.rs b/library/stdarch/crates/std_detect/src/detect/error_macros.rs
deleted file mode 100644
index 1e70da486f2..00000000000
--- a/library/stdarch/crates/std_detect/src/detect/error_macros.rs
+++ /dev/null
@@ -1,171 +0,0 @@
-//! The `is_{target_arch}_feature_detected!` macro are only available on their
-//! architecture. These macros provide a better error messages when the user
-//! attempts to call them in a different architecture.
-
-/// Prevents compilation if `is_x86_feature_detected` is used somewhere
-/// else than `x86` and `x86_64` targets.
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-macro_rules! is_x86_feature_detected {
-    ($t: tt) => {
-        compile_error!(
-            r#"
-        is_x86_feature_detected can only be used on x86 and x86_64 targets.
-        You can prevent it from being used in other architectures by
-        guarding it behind a cfg(target_arch) as follows:
-
-            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
-                if is_x86_feature_detected(...) { ... }
-            }
-        "#
-        )
-    };
-}
-
-/// Prevents compilation if `is_arm_feature_detected` is used somewhere else
-/// than `ARM` targets.
-#[cfg(not(target_arch = "arm"))]
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-macro_rules! is_arm_feature_detected {
-    ($t:tt) => {
-        compile_error!(
-            r#"
-        is_arm_feature_detected can only be used on ARM targets.
-        You can prevent it from being used in other architectures by
-        guarding it behind a cfg(target_arch) as follows:
-
-            #[cfg(target_arch = "arm")] {
-                if is_arm_feature_detected(...) { ... }
-            }
-        "#
-        )
-    };
-}
-
-/// Prevents compilation if `is_aarch64_feature_detected` is used somewhere else
-/// than `aarch64` targets.
-#[cfg(not(target_arch = "aarch64"))]
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-macro_rules! is_aarch64_feature_detected {
-    ($t: tt) => {
-        compile_error!(
-            r#"
-        is_aarch64_feature_detected can only be used on AArch64 targets.
-        You can prevent it from being used in other architectures by
-        guarding it behind a cfg(target_arch) as follows:
-
-            #[cfg(target_arch = "aarch64")] {
-                if is_aarch64_feature_detected(...) { ... }
-            }
-        "#
-        )
-    };
-}
-
-/// Prevents compilation if `is_riscv_feature_detected` is used somewhere else
-/// than `riscv32` or `riscv64` targets.
-#[cfg(not(any(target_arch = "riscv32", target_arch = "riscv64")))]
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-macro_rules! is_riscv_feature_detected {
-    ($t: tt) => {
-        compile_error!(
-            r#"
-        is_riscv_feature_detected can only be used on RISC-V targets.
-        You can prevent it from being used in other architectures by
-        guarding it behind a cfg(target_arch) as follows:
-
-            #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] {
-                if is_riscv_feature_detected(...) { ... }
-            }
-        "#
-        )
-    };
-}
-
-/// Prevents compilation if `is_powerpc_feature_detected` is used somewhere else
-/// than `PowerPC` targets.
-#[cfg(not(target_arch = "powerpc"))]
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-macro_rules! is_powerpc_feature_detected {
-    ($t:tt) => {
-        compile_error!(
-            r#"
-is_powerpc_feature_detected can only be used on PowerPC targets.
-You can prevent it from being used in other architectures by
-guarding it behind a cfg(target_arch) as follows:
-
-    #[cfg(target_arch = "powerpc")] {
-        if is_powerpc_feature_detected(...) { ... }
-    }
-"#
-        )
-    };
-}
-
-/// Prevents compilation if `is_powerpc64_feature_detected` is used somewhere
-/// else than `PowerPC64` targets.
-#[cfg(not(target_arch = "powerpc64"))]
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-macro_rules! is_powerpc64_feature_detected {
-    ($t:tt) => {
-        compile_error!(
-            r#"
-is_powerpc64_feature_detected can only be used on PowerPC64 targets.
-You can prevent it from being used in other architectures by
-guarding it behind a cfg(target_arch) as follows:
-
-    #[cfg(target_arch = "powerpc64")] {
-        if is_powerpc64_feature_detected(...) { ... }
-    }
-"#
-        )
-    };
-}
-
-/// Prevents compilation if `is_mips_feature_detected` is used somewhere else
-/// than `MIPS` targets.
-#[cfg(not(target_arch = "mips"))]
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-macro_rules! is_mips_feature_detected {
-    ($t:tt) => {
-        compile_error!(
-            r#"
-        is_mips_feature_detected can only be used on MIPS targets.
-        You can prevent it from being used in other architectures by
-        guarding it behind a cfg(target_arch) as follows:
-
-            #[cfg(target_arch = "mips")] {
-                if is_mips_feature_detected(...) { ... }
-            }
-        "#
-        )
-    };
-}
-
-/// Prevents compilation if `is_mips64_feature_detected` is used somewhere else
-/// than `MIPS64` targets.
-#[cfg(not(target_arch = "mips64"))]
-#[macro_export]
-#[unstable(feature = "stdsimd", issue = "27731")]
-macro_rules! is_mips64_feature_detected {
-    ($t:tt) => {
-        compile_error!(
-            r#"
-        is_mips64_feature_detected can only be used on MIPS64 targets.
-        You can prevent it from being used in other architectures by
-        guarding it behind a cfg(target_arch) as follows:
-
-            #[cfg(target_arch = "mips64")] {
-                if is_mips64_feature_detected(...) { ... }
-            }
-        "#
-        )
-    };
-}
diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index b83fe71c41d..fa7189336a7 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -2,6 +2,7 @@
 macro_rules! features {
     (
       @TARGET: $target:ident;
+      @CFG: $cfg:meta;
       @MACRO_NAME: $macro_name:ident;
       @MACRO_ATTRS: $(#[$macro_attrs:meta])*
       $(@BIND_FEATURE_NAME: $bind_feature:tt; $feature_impl:tt; )*
@@ -11,6 +12,8 @@ macro_rules! features {
         #[macro_export]
         $(#[$macro_attrs])*
         #[allow_internal_unstable(stdsimd_internal)]
+        #[cfg($cfg)]
+        #[doc(cfg($cfg))]
         macro_rules! $macro_name {
             $(
                 ($feature_lit) => {
@@ -44,6 +47,50 @@ macro_rules! features {
             };
         }
 
+        $(#[$macro_attrs])*
+        #[macro_export]
+        #[cfg(not($cfg))]
+        #[doc(cfg($cfg))]
+        macro_rules! $macro_name {
+            $(
+                ($feature_lit) => {
+                    compile_error!(
+                        concat!(
+                            r#"This macro cannot be used on the current target.
+                            You can prevent it from being used in other architectures by
+                            guarding it behind a cfg("#,
+                            stringify!($cfg),
+                            ")."
+                        )
+                    )
+                };
+            )*
+            $(
+                ($bind_feature) => { $macro_name!($feature_impl) };
+            )*
+            $(
+                ($nort_feature) => {
+                    compile_error!(
+                        concat!(
+                            stringify!($nort_feature),
+                            " feature cannot be detected at run-time"
+                        )
+                    )
+                };
+            )*
+            ($t:tt,) => {
+                    $macro_name!($t);
+            };
+            ($t:tt) => {
+                compile_error!(
+                    concat!(
+                        concat!("unknown ", stringify!($target)),
+                        concat!(" target feature: ", $t)
+                    )
+                )
+            };
+        }
+
         /// Each variant denotes a position in a bitset for a particular feature.
         ///
         /// PLEASE: do not use this, it is an implementation detail subject
@@ -53,6 +100,7 @@ macro_rules! features {
         #[derive(Copy, Clone)]
         #[repr(u8)]
         #[unstable(feature = "stdsimd_internal", issue = "none")]
+        #[cfg($cfg)]
         pub(crate) enum Feature {
             $(
                 $(#[$feature_comment])*
@@ -63,6 +111,7 @@ macro_rules! features {
             _last
         }
 
+        #[cfg($cfg)]
         impl Feature {
             pub(crate) fn to_str(self) -> &'static str {
                 match self {
@@ -86,6 +135,7 @@ macro_rules! features {
         /// PLEASE: do not use this, it is an implementation detail subject
         /// to change.
         #[doc(hidden)]
+        #[cfg($cfg)]
         pub mod __is_feature_detected {
             $(
 
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 7937417f526..2bca84ca127 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -19,109 +19,10 @@
 
 use cfg_if::cfg_if;
 
-#[cfg(not(doc))]
-#[macro_use]
-mod error_macros;
-
 #[macro_use]
 mod macros;
 
-cfg_if! {
-    if #[cfg(doc)] {
-        // When building docs, emit the full macro for all supported arches.
-        #[allow(dead_code)]
-        mod arch {
-            #[path = "x86.rs"]
-            #[macro_use]
-            mod x86;
-            #[path = "arm.rs"]
-            #[macro_use]
-            mod arm;
-            #[path = "aarch64.rs"]
-            #[macro_use]
-            mod aarch64;
-            #[path = "riscv.rs"]
-            #[macro_use]
-            mod riscv;
-            #[path = "powerpc.rs"]
-            #[macro_use]
-            mod powerpc;
-            #[path = "powerpc64.rs"]
-            #[macro_use]
-            mod powerpc64;
-            #[path = "mips.rs"]
-            #[macro_use]
-            mod mips;
-            #[path = "mips64.rs"]
-            #[macro_use]
-            mod mips64;
-
-            #[doc(hidden)]
-            pub(crate) enum Feature {
-                Null
-            }
-            #[doc(hidden)]
-            pub mod __is_feature_detected {}
-
-            impl Feature {
-                #[doc(hidden)]
-                pub(crate) fn from_str(_s: &str) -> Result<Feature, ()> { Err(()) }
-                #[doc(hidden)]
-                pub(crate) fn to_str(self) -> &'static str { "" }
-            }
-        }
-    } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
-        #[path = "arch/x86.rs"]
-        #[macro_use]
-        mod arch;
-    } else if #[cfg(target_arch = "arm")] {
-        #[path = "arch/arm.rs"]
-        #[macro_use]
-        mod arch;
-    } else if #[cfg(target_arch = "aarch64")] {
-        #[path = "arch/aarch64.rs"]
-        #[macro_use]
-        mod arch;
-    } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] {
-        #[path = "arch/riscv.rs"]
-        #[macro_use]
-        mod arch;
-    } else if #[cfg(target_arch = "powerpc")] {
-        #[path = "arch/powerpc.rs"]
-        #[macro_use]
-        mod arch;
-    } else if #[cfg(target_arch = "powerpc64")] {
-        #[path = "arch/powerpc64.rs"]
-        #[macro_use]
-        mod arch;
-    } else if #[cfg(target_arch = "mips")] {
-        #[path = "arch/mips.rs"]
-        #[macro_use]
-        mod arch;
-    } else if #[cfg(target_arch = "mips64")] {
-        #[path = "arch/mips64.rs"]
-        #[macro_use]
-        mod arch;
-    } else {
-        // Unimplemented architecture:
-        #[allow(dead_code)]
-        mod arch {
-            #[doc(hidden)]
-            pub(crate) enum Feature {
-                Null
-            }
-            #[doc(hidden)]
-            pub mod __is_feature_detected {}
-
-            impl Feature {
-                #[doc(hidden)]
-                pub(crate) fn from_str(_s: &str) -> Result<Feature, ()> { Err(()) }
-                #[doc(hidden)]
-                pub(crate) fn to_str(self) -> &'static str { "" }
-            }
-        }
-    }
-}
+mod arch;
 
 // This module needs to be public because the `is_{arch}_feature_detected!`
 // macros expand calls to items within it in user crates.
-- 
cgit 1.4.1-3-g733a5


From d3f6f21a2695930b8ccfa0ff5a5c49a22aa714f1 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Fri, 11 Mar 2022 18:12:29 +0100
Subject: Import the asm macro in std_detect (#1290)

---
 library/stdarch/crates/std_detect/src/detect/os/aarch64.rs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
index 169c51b961c..e0e62ee3393 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -17,6 +17,7 @@
 //! - [Linux documentation](https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt)
 
 use crate::detect::{cache, Feature};
+use core::arch::asm;
 
 /// Try to read the features from the system registers.
 ///
-- 
cgit 1.4.1-3-g733a5


From bae054af11ef749a027066b7b03d13eb4049c0dc Mon Sep 17 00:00:00 2001
From: Jane Lusby <jlusby42@gmail.com>
Date: Thu, 28 Apr 2022 17:30:25 -0700
Subject: Add  stdsimd feature to allow_internal_unstable attribute in feature
 detect macros (#1303)

---
 library/stdarch/crates/std_detect/src/detect/macros.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index fa7189336a7..7548c97807a 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -11,7 +11,7 @@ macro_rules! features {
     ) => {
         #[macro_export]
         $(#[$macro_attrs])*
-        #[allow_internal_unstable(stdsimd_internal)]
+        #[allow_internal_unstable(stdsimd_internal, stdsimd)]
         #[cfg($cfg)]
         #[doc(cfg($cfg))]
         macro_rules! $macro_name {
-- 
cgit 1.4.1-3-g733a5


From 0798ac34c75b08afe9f55352ef851154e2a907bc Mon Sep 17 00:00:00 2001
From: Adam Gemmell <adam.gemmell@arm.com>
Date: Tue, 21 Jun 2022 00:20:12 +0100
Subject: Fully qualify recursive macro calls (#1310)

---
 library/stdarch/crates/std_detect/src/detect/macros.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 7548c97807a..3783d499cad 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -22,7 +22,7 @@ macro_rules! features {
                 };
             )*
             $(
-                ($bind_feature) => { $macro_name!($feature_impl) };
+                ($bind_feature) => { $crate::$macro_name!($feature_impl) };
             )*
             $(
                 ($nort_feature) => {
@@ -35,7 +35,7 @@ macro_rules! features {
                 };
             )*
             ($t:tt,) => {
-                    $macro_name!($t);
+                    $crate::$macro_name!($t);
             };
             ($t:tt) => {
                 compile_error!(
@@ -66,7 +66,7 @@ macro_rules! features {
                 };
             )*
             $(
-                ($bind_feature) => { $macro_name!($feature_impl) };
+                ($bind_feature) => { $crate::$macro_name!($feature_impl) };
             )*
             $(
                 ($nort_feature) => {
@@ -79,7 +79,7 @@ macro_rules! features {
                 };
             )*
             ($t:tt,) => {
-                    $macro_name!($t);
+                    $crate::$macro_name!($t);
             };
             ($t:tt) => {
                 compile_error!(
-- 
cgit 1.4.1-3-g733a5


From ba9ca43154f20224229d652ad9970798c018d09b Mon Sep 17 00:00:00 2001
From: Adam Gemmell <adam.gemmell@arm.com>
Date: Sat, 2 Jul 2022 00:55:03 +0100
Subject: Allow mapping a runtime feature to a set of target_features (#1311)

---
 .../crates/std_detect/src/detect/arch/aarch64.rs       |  3 ++-
 library/stdarch/crates/std_detect/src/detect/macros.rs | 18 +++++++++++++++---
 2 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index f32f961ae52..5f46c76966a 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -72,7 +72,8 @@ features! {
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pmull: "pmull";
     /// FEAT_PMULL (Polynomial Multiply)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp: "fp";
-    /// FEAT_FP (Floating point support)
+    implied by target_features: ["neon"];
+    /// FEAT_FP (Floating point support) - Implied by `neon` target_feature
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp16: "fp16";
     /// FEAT_FP16 (Half-float support)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve: "sve";
diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 3783d499cad..213e63ec550 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -1,3 +1,14 @@
+#[macro_export]
+macro_rules! detect_feature {
+    ($feature:tt, $feature_lit:tt) => {
+        $crate::detect_feature!($feature, $feature_lit : $feature_lit)
+    };
+    ($feature:tt, $feature_lit:tt : $($target_feature_lit:tt),*) => {
+        $(cfg!(target_feature = $target_feature_lit) ||)*
+            $crate::detect::__is_feature_detected::$feature()
+    };
+}
+
 #[allow(unused)]
 macro_rules! features {
     (
@@ -7,7 +18,9 @@ macro_rules! features {
       @MACRO_ATTRS: $(#[$macro_attrs:meta])*
       $(@BIND_FEATURE_NAME: $bind_feature:tt; $feature_impl:tt; )*
       $(@NO_RUNTIME_DETECTION: $nort_feature:tt; )*
-      $(@FEATURE: #[$stability_attr:meta] $feature:ident: $feature_lit:tt; $(#[$feature_comment:meta])*)*
+      $(@FEATURE: #[$stability_attr:meta] $feature:ident: $feature_lit:tt;
+          $(implied by target_features: [$($target_feature_lit:tt),*];)?
+          $(#[$feature_comment:meta])*)*
     ) => {
         #[macro_export]
         $(#[$macro_attrs])*
@@ -17,8 +30,7 @@ macro_rules! features {
         macro_rules! $macro_name {
             $(
                 ($feature_lit) => {
-                    cfg!(target_feature = $feature_lit) ||
-                        $crate::detect::__is_feature_detected::$feature()
+                    $crate::detect_feature!($feature, $feature_lit $(: $($target_feature_lit),*)?)
                 };
             )*
             $(
-- 
cgit 1.4.1-3-g733a5


From 39f73ac0b3b83d24bbf895fb23fd7a23b3991d48 Mon Sep 17 00:00:00 2001
From: Tomasz Miąsko <tomasz.miasko@gmail.com>
Date: Sat, 13 Aug 2022 00:00:00 +0000
Subject: Allow internal use of stdsimd from detect_feature

This allows using feature detection macros, without placing a
requirement of enabled stdsimd feature gate from end users.

A follow-up to changes from #1311, which introduced the new macro.
---
 library/stdarch/crates/std_detect/src/detect/macros.rs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 213e63ec550..a467f9db605 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -1,4 +1,5 @@
 #[macro_export]
+#[allow_internal_unstable(stdsimd)]
 macro_rules! detect_feature {
     ($feature:tt, $feature_lit:tt) => {
         $crate::detect_feature!($feature, $feature_lit : $feature_lit)
-- 
cgit 1.4.1-3-g733a5


From 8a944e5a5f1cbf1b5ae08297a3b4aec0ae39cc6b Mon Sep 17 00:00:00 2001
From: Jacob Bramley <jacob.bramley@arm.com>
Date: Thu, 22 Sep 2022 05:31:46 +0100
Subject: Add HWCAP2 support for AArch64 Linux. (#1335)

---
 .../crates/core_arch/src/arm_shared/neon/mod.rs    |  24 +--
 .../std_detect/src/detect/os/linux/aarch64.rs      | 227 ++++++++++++++-------
 .../std_detect/src/detect/os/linux/auxvec.rs       |  61 ++++--
 .../detect/test_data/linux-artificial-aarch64.auxv | Bin 0 -> 336 bytes
 .../test_data/linux-empty-hwcap2-aarch64.auxv      | Bin 0 -> 336 bytes
 .../src/detect/test_data/linux-hwcap2-aarch64.auxv | Bin 0 -> 336 bytes
 .../detect/test_data/linux-no-hwcap2-aarch64.auxv  | Bin 0 -> 320 bytes
 .../src/detect/test_data/linux-x64-i7-6850k.auxv   | Bin 304 -> 0 bytes
 8 files changed, 211 insertions(+), 101 deletions(-)
 create mode 100644 library/stdarch/crates/std_detect/src/detect/test_data/linux-artificial-aarch64.auxv
 create mode 100644 library/stdarch/crates/std_detect/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv
 create mode 100644 library/stdarch/crates/std_detect/src/detect/test_data/linux-hwcap2-aarch64.auxv
 create mode 100644 library/stdarch/crates/std_detect/src/detect/test_data/linux-no-hwcap2-aarch64.auxv
 delete mode 100644 library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
index 043f7ed51fd..0559aea83f1 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -12461,30 +12461,30 @@ mod tests {
     }
     #[simd_test(enable = "neon,i8mm")]
     unsafe fn test_vmmlaq_s32() {
-        let a: i32x4 = i32x4::new(1, 3, 4, 9);
-        let b: i8x16 = i8x16::new(1, 21, 31, 14, 5, 6, 17, 8, 9, 13, 15, 12, 13, 19, 20, 16);
-        let c: i8x16 = i8x16::new(12, 22, 3, 4, 5, 56, 7, 8, 91, 10, 11, 15, 13, 14, 17, 16);
-        let e: i32x4 = i32x4::new(1, 2, 3, 4);
+        let a = i32x4::new(1, 3, 4, -0x10000);
+        let b = i8x16::new(1, 21, 31, 14, 5, 6, -128, 8, 9, 13, 15, 12, 13, -1, 20, 16);
+        let c = i8x16::new(12, 22, 3, 4, -1, 56, 7, 8, 91, 10, -128, 15, 13, 14, 17, 16);
+        let e = i32x4::new(123, -5353, 690, -65576);
         let r: i32x4 = transmute(vmmlaq_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon,i8mm")]
     unsafe fn test_vmmlaq_u32() {
-        let a: u32x4 = u32x4::new(1, 3, 4, 9);
-        let b: i8x16 = i8x16::new(1, 21, 31, 14, 5, 6, 17, 8, 9, 13, 15, 12, 13, 19, 20, 16);
-        let c: i8x16 = i8x16::new(12, 22, 3, 4, 5, 56, 7, 8, 91, 10, 11, 15, 13, 14, 17, 16);
-        let e: u32x4 = u32x4::new(1, 2, 3, 4);
+        let a = u32x4::new(1, 3, 4, 0xffff0000);
+        let b = u8x16::new(1, 21, 31, 14, 5, 6, 128, 8, 9, 13, 15, 12, 13, 255, 20, 16);
+        let c = u8x16::new(12, 22, 3, 4, 255, 56, 7, 8, 91, 10, 128, 15, 13, 14, 17, 16);
+        let e = u32x4::new(3195, 6935, 18354, 4294909144);
         let r: u32x4 = transmute(vmmlaq_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon,i8mm")]
     unsafe fn test_vusmmlaq_s32() {
-        let a: i32x4 = i32x4::new(1, 3, 4, 9);
-        let b: i8x16 = i8x16::new(1, 21, 31, 14, 5, 6, 17, 8, 9, 13, 15, 12, 13, 19, 20, 16);
-        let c: i8x16 = i8x16::new(12, 22, 3, 4, 5, 56, 7, 8, 91, 10, 11, 15, 13, 14, 17, 16);
-        let e: i32x4 = i32x4::new(1, 2, 3, 4);
+        let a = i32x4::new(1, 3, 4, -0x10000);
+        let b = u8x16::new(1, 21, 31, 14, 5, 6, 128, 8, 9, 13, 15, 12, 13, 255, 20, 16);
+        let c = i8x16::new(12, 22, 3, 4, -1, 56, 7, 8, 91, 10, -128, 15, 13, 14, 17, 16);
+        let e = i32x4::new(1915, -1001, 15026, -61992);
         let r: i32x4 = transmute(vusmmlaq_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index b6a2e5218cb..6c79ba86d12 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -23,58 +23,62 @@ pub(crate) fn detect_features() -> cache::Initializer {
 /// The names match those used for cpuinfo.
 ///
 /// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
+#[derive(Debug, Default, PartialEq)]
 struct AtHwcap {
-    fp: bool,    // 0
-    asimd: bool, // 1
-    // evtstrm: bool, // 2 No LLVM support
-    aes: bool,     // 3
-    pmull: bool,   // 4
-    sha1: bool,    // 5
-    sha2: bool,    // 6
-    crc32: bool,   // 7
-    atomics: bool, // 8
-    fphp: bool,    // 9
-    asimdhp: bool, // 10
-    // cpuid: bool, // 11 No LLVM support
-    asimdrdm: bool, // 12
-    jscvt: bool,    // 13
-    fcma: bool,     // 14
-    lrcpc: bool,    // 15
-    dcpop: bool,    // 16
-    sha3: bool,     // 17
-    sm3: bool,      // 18
-    sm4: bool,      // 19
-    asimddp: bool,  // 20
-    sha512: bool,   // 21
-    sve: bool,      // 22
-    fhm: bool,      // 23
-    dit: bool,      // 24
-    uscat: bool,    // 25
-    ilrcpc: bool,   // 26
-    flagm: bool,    // 27
-    ssbs: bool,     // 28
-    sb: bool,       // 29
-    paca: bool,     // 30
-    pacg: bool,     // 31
-    dcpodp: bool,   // 32
-    sve2: bool,     // 33
-    sveaes: bool,   // 34
-    // svepmull: bool, // 35 No LLVM support
-    svebitperm: bool, // 36
-    svesha3: bool,    // 37
-    svesm4: bool,     // 38
-    // flagm2: bool, // 39 No LLVM support
-    frint: bool, // 40
-    // svei8mm: bool, // 41 See i8mm feature
-    svef32mm: bool, // 42
-    svef64mm: bool, // 43
-    // svebf16: bool, // 44 See bf16 feature
-    i8mm: bool, // 45
-    bf16: bool, // 46
-    // dgh: bool, // 47 No LLVM support
-    rng: bool, // 48
-    bti: bool, // 49
-    mte: bool, // 50
+    // AT_HWCAP
+    fp: bool,
+    asimd: bool,
+    // evtstrm: No LLVM support.
+    aes: bool,
+    pmull: bool,
+    sha1: bool,
+    sha2: bool,
+    crc32: bool,
+    atomics: bool,
+    fphp: bool,
+    asimdhp: bool,
+    // cpuid: No LLVM support.
+    asimdrdm: bool,
+    jscvt: bool,
+    fcma: bool,
+    lrcpc: bool,
+    dcpop: bool,
+    sha3: bool,
+    sm3: bool,
+    sm4: bool,
+    asimddp: bool,
+    sha512: bool,
+    sve: bool,
+    fhm: bool,
+    dit: bool,
+    uscat: bool,
+    ilrcpc: bool,
+    flagm: bool,
+    ssbs: bool,
+    sb: bool,
+    paca: bool,
+    pacg: bool,
+
+    // AT_HWCAP2
+    dcpodp: bool,
+    sve2: bool,
+    sveaes: bool,
+    // svepmull: No LLVM support.
+    svebitperm: bool,
+    svesha3: bool,
+    svesm4: bool,
+    // flagm2: No LLVM support.
+    frint: bool,
+    // svei8mm: See i8mm feature.
+    svef32mm: bool,
+    svef64mm: bool,
+    // svebf16: See bf16 feature.
+    i8mm: bool,
+    bf16: bool,
+    // dgh: No LLVM support.
+    rng: bool,
+    bti: bool,
+    mte: bool,
 }
 
 impl From<auxvec::AuxVec> for AtHwcap {
@@ -113,25 +117,25 @@ impl From<auxvec::AuxVec> for AtHwcap {
             sb: bit::test(auxv.hwcap, 29),
             paca: bit::test(auxv.hwcap, 30),
             pacg: bit::test(auxv.hwcap, 31),
-            dcpodp: bit::test(auxv.hwcap, 32),
-            sve2: bit::test(auxv.hwcap, 33),
-            sveaes: bit::test(auxv.hwcap, 34),
-            // svepmull: bit::test(auxv.hwcap, 35),
-            svebitperm: bit::test(auxv.hwcap, 36),
-            svesha3: bit::test(auxv.hwcap, 37),
-            svesm4: bit::test(auxv.hwcap, 38),
-            // flagm2: bit::test(auxv.hwcap, 39),
-            frint: bit::test(auxv.hwcap, 40),
-            // svei8mm: bit::test(auxv.hwcap, 41),
-            svef32mm: bit::test(auxv.hwcap, 42),
-            svef64mm: bit::test(auxv.hwcap, 43),
-            // svebf16: bit::test(auxv.hwcap, 44),
-            i8mm: bit::test(auxv.hwcap, 45),
-            bf16: bit::test(auxv.hwcap, 46),
-            // dgh: bit::test(auxv.hwcap, 47),
-            rng: bit::test(auxv.hwcap, 48),
-            bti: bit::test(auxv.hwcap, 49),
-            mte: bit::test(auxv.hwcap, 50),
+            dcpodp: bit::test(auxv.hwcap2, 0),
+            sve2: bit::test(auxv.hwcap2, 1),
+            sveaes: bit::test(auxv.hwcap2, 2),
+            // svepmull: bit::test(auxv.hwcap2, 3),
+            svebitperm: bit::test(auxv.hwcap2, 4),
+            svesha3: bit::test(auxv.hwcap2, 5),
+            svesm4: bit::test(auxv.hwcap2, 6),
+            // flagm2: bit::test(auxv.hwcap2, 7),
+            frint: bit::test(auxv.hwcap2, 8),
+            // svei8mm: bit::test(auxv.hwcap2, 9),
+            svef32mm: bit::test(auxv.hwcap2, 10),
+            svef64mm: bit::test(auxv.hwcap2, 11),
+            // svebf16: bit::test(auxv.hwcap2, 12),
+            i8mm: bit::test(auxv.hwcap2, 13),
+            bf16: bit::test(auxv.hwcap2, 14),
+            // dgh: bit::test(auxv.hwcap2, 15),
+            rng: bit::test(auxv.hwcap2, 16),
+            bti: bit::test(auxv.hwcap2, 17),
+            mte: bit::test(auxv.hwcap2, 18),
         }
     }
 }
@@ -288,3 +292,86 @@ impl AtHwcap {
         value
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[cfg(feature = "std_detect_file_io")]
+    mod auxv_from_file {
+        use super::auxvec::auxv_from_file;
+        use super::*;
+        // The baseline hwcaps used in the (artificial) auxv test files.
+        fn baseline_hwcaps() -> AtHwcap {
+            AtHwcap {
+                fp: true,
+                asimd: true,
+                aes: true,
+                pmull: true,
+                sha1: true,
+                sha2: true,
+                crc32: true,
+                atomics: true,
+                fphp: true,
+                asimdhp: true,
+                asimdrdm: true,
+                lrcpc: true,
+                dcpop: true,
+                asimddp: true,
+                ssbs: true,
+                ..AtHwcap::default()
+            }
+        }
+
+        #[test]
+        fn linux_empty_hwcap2_aarch64() {
+            let file = concat!(
+                env!("CARGO_MANIFEST_DIR"),
+                "/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv"
+            );
+            println!("file: {}", file);
+            let v = auxv_from_file(file).unwrap();
+            println!("HWCAP : 0x{:0x}", v.hwcap);
+            println!("HWCAP2: 0x{:0x}", v.hwcap2);
+            assert_eq!(AtHwcap::from(v), baseline_hwcaps());
+        }
+        #[test]
+        fn linux_no_hwcap2_aarch64() {
+            let file = concat!(
+                env!("CARGO_MANIFEST_DIR"),
+                "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv"
+            );
+            println!("file: {}", file);
+            let v = auxv_from_file(file).unwrap();
+            println!("HWCAP : 0x{:0x}", v.hwcap);
+            println!("HWCAP2: 0x{:0x}", v.hwcap2);
+            assert_eq!(AtHwcap::from(v), baseline_hwcaps());
+        }
+        #[test]
+        fn linux_hwcap2_aarch64() {
+            let file = concat!(
+                env!("CARGO_MANIFEST_DIR"),
+                "/src/detect/test_data/linux-hwcap2-aarch64.auxv"
+            );
+            println!("file: {}", file);
+            let v = auxv_from_file(file).unwrap();
+            println!("HWCAP : 0x{:0x}", v.hwcap);
+            println!("HWCAP2: 0x{:0x}", v.hwcap2);
+            assert_eq!(
+                AtHwcap::from(v),
+                AtHwcap {
+                    // Some other HWCAP bits.
+                    paca: true,
+                    pacg: true,
+                    // HWCAP2-only bits.
+                    dcpodp: true,
+                    frint: true,
+                    rng: true,
+                    bti: true,
+                    mte: true,
+                    ..baseline_hwcaps()
+                }
+            );
+        }
+    }
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index e6447d0cde9..c903903bdfc 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -7,6 +7,7 @@ pub(crate) const AT_NULL: usize = 0;
 pub(crate) const AT_HWCAP: usize = 16;
 /// Key to access the CPU Hardware capabilities 2 bitfield.
 #[cfg(any(
+    target_arch = "aarch64",
     target_arch = "arm",
     target_arch = "powerpc",
     target_arch = "powerpc64"
@@ -21,6 +22,7 @@ pub(crate) const AT_HWCAP2: usize = 26;
 pub(crate) struct AuxVec {
     pub hwcap: usize,
     #[cfg(any(
+        target_arch = "aarch64",
         target_arch = "arm",
         target_arch = "powerpc",
         target_arch = "powerpc64"
@@ -64,13 +66,14 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
         if let Ok(hwcap) = getauxval(AT_HWCAP) {
             // Targets with only AT_HWCAP:
             #[cfg(any(
-                target_arch = "aarch64",
                 target_arch = "riscv32",
                 target_arch = "riscv64",
                 target_arch = "mips",
                 target_arch = "mips64"
             ))]
             {
+                // Zero could indicate that no features were detected, but it's also used to
+                // indicate an error. In either case, try the fallback.
                 if hwcap != 0 {
                     return Ok(AuxVec { hwcap });
                 }
@@ -78,13 +81,18 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
 
             // Targets with AT_HWCAP and AT_HWCAP2:
             #[cfg(any(
+                target_arch = "aarch64",
                 target_arch = "arm",
                 target_arch = "powerpc",
                 target_arch = "powerpc64"
             ))]
             {
                 if let Ok(hwcap2) = getauxval(AT_HWCAP2) {
-                    if hwcap != 0 && hwcap2 != 0 {
+                    // Zero could indicate that no features were detected, but it's also used to
+                    // indicate an error. In particular, on many platforms AT_HWCAP2 will be
+                    // legitimately zero, since it contains the most recent feature flags. Use the
+                    // fallback only if no features were detected at all.
+                    if hwcap != 0 || hwcap2 != 0 {
                         return Ok(AuxVec { hwcap, hwcap2 });
                     }
                 }
@@ -97,7 +105,6 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
     {
         // Targets with only AT_HWCAP:
         #[cfg(any(
-            target_arch = "aarch64",
             target_arch = "riscv32",
             target_arch = "riscv64",
             target_arch = "mips",
@@ -105,6 +112,8 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
         ))]
         {
             let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize };
+            // Zero could indicate that no features were detected, but it's also used to indicate
+            // an error. In either case, try the fallback.
             if hwcap != 0 {
                 return Ok(AuxVec { hwcap });
             }
@@ -112,6 +121,7 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
 
         // Targets with AT_HWCAP and AT_HWCAP2:
         #[cfg(any(
+            target_arch = "aarch64",
             target_arch = "arm",
             target_arch = "powerpc",
             target_arch = "powerpc64"
@@ -119,7 +129,11 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
         {
             let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize };
             let hwcap2 = unsafe { libc::getauxval(AT_HWCAP2 as libc::c_ulong) as usize };
-            if hwcap != 0 && hwcap2 != 0 {
+            // Zero could indicate that no features were detected, but it's also used to indicate
+            // an error. In particular, on many platforms AT_HWCAP2 will be legitimately zero,
+            // since it contains the most recent feature flags. Use the fallback only if no
+            // features were detected at all.
+            if hwcap != 0 || hwcap2 != 0 {
                 return Ok(AuxVec { hwcap, hwcap2 });
             }
         }
@@ -158,7 +172,7 @@ fn getauxval(key: usize) -> Result<usize, ()> {
 /// Tries to read the auxiliary vector from the `file`. If this fails, this
 /// function returns `Err`.
 #[cfg(feature = "std_detect_file_io")]
-fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
+pub(super) fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
     let file = super::read_file(file)?;
 
     // See <https://github.com/torvalds/linux/blob/v3.19/include/uapi/linux/auxvec.h>.
@@ -181,7 +195,6 @@ fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
 fn auxv_from_buf(buf: &[usize; 64]) -> Result<AuxVec, ()> {
     // Targets with only AT_HWCAP:
     #[cfg(any(
-        target_arch = "aarch64",
         target_arch = "riscv32",
         target_arch = "riscv64",
         target_arch = "mips",
@@ -198,23 +211,25 @@ fn auxv_from_buf(buf: &[usize; 64]) -> Result<AuxVec, ()> {
     }
     // Targets with AT_HWCAP and AT_HWCAP2:
     #[cfg(any(
+        target_arch = "aarch64",
         target_arch = "arm",
         target_arch = "powerpc",
         target_arch = "powerpc64"
     ))]
     {
         let mut hwcap = None;
-        let mut hwcap2 = None;
+        // For some platforms, AT_HWCAP2 was added recently, so let it default to zero.
+        let mut hwcap2 = 0;
         for el in buf.chunks(2) {
             match el[0] {
                 AT_NULL => break,
                 AT_HWCAP => hwcap = Some(el[1]),
-                AT_HWCAP2 => hwcap2 = Some(el[1]),
+                AT_HWCAP2 => hwcap2 = el[1],
                 _ => (),
             }
         }
 
-        if let (Some(hwcap), Some(hwcap2)) = (hwcap, hwcap2) {
+        if let Some(hwcap) = hwcap {
             return Ok(AuxVec { hwcap, hwcap2 });
         }
     }
@@ -256,7 +271,6 @@ mod tests {
     // FIXME: on mips/mips64 getauxval returns 0, and /proc/self/auxv
     // does not always contain the AT_HWCAP key under qemu.
     #[cfg(any(
-        target_arch = "aarch64",
         target_arch = "arm",
         target_arch = "powerpc",
         target_arch = "powerpc64"
@@ -271,6 +285,7 @@ mod tests {
 
         // Targets with AT_HWCAP and AT_HWCAP2:
         #[cfg(any(
+            target_arch = "aarch64",
             target_arch = "arm",
             target_arch = "powerpc",
             target_arch = "powerpc64"
@@ -305,24 +320,31 @@ mod tests {
             }
 
             #[test]
-            #[should_panic]
             fn linux_macos_vb() {
                 let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv");
                 println!("file: {}", file);
+                // The file contains HWCAP but not HWCAP2. In that case, we treat HWCAP2 as zero.
                 let v = auxv_from_file(file).unwrap();
-                // this file is incomplete (contains hwcap but not hwcap2), we
-                // want to fall back to /proc/cpuinfo in this case, so
-                // reading should fail. assert_eq!(v.hwcap, 126614527);
-                // assert_eq!(v.hwcap2, 0);
-                let _ = v;
+                assert_eq!(v.hwcap, 126614527);
+                assert_eq!(v.hwcap2, 0);
             }
         } else if #[cfg(target_arch = "aarch64")] {
             #[test]
-            fn linux_x64() {
-                let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-x64-i7-6850k.auxv");
+            fn linux_artificial_aarch64() {
+                let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-artificial-aarch64.auxv");
                 println!("file: {}", file);
                 let v = auxv_from_file(file).unwrap();
-                assert_eq!(v.hwcap, 3219913727);
+                assert_eq!(v.hwcap, 0x0123456789abcdef);
+                assert_eq!(v.hwcap2, 0x02468ace13579bdf);
+            }
+            #[test]
+            fn linux_no_hwcap2_aarch64() {
+                let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv");
+                println!("file: {}", file);
+                let v = auxv_from_file(file).unwrap();
+                // An absent HWCAP2 is treated as zero, and does not prevent acceptance of HWCAP.
+                assert_ne!(v.hwcap, 0);
+                assert_eq!(v.hwcap2, 0);
             }
         }
     }
@@ -353,6 +375,7 @@ mod tests {
 
         // Targets with AT_HWCAP and AT_HWCAP2:
         #[cfg(any(
+            target_arch = "aarch64",
             target_arch = "arm",
             target_arch = "powerpc",
             target_arch = "powerpc64"
diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-artificial-aarch64.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-artificial-aarch64.auxv
new file mode 100644
index 00000000000..ec826afcf38
Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/linux-artificial-aarch64.auxv differ
diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv
new file mode 100644
index 00000000000..95537b73f20
Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv differ
diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-hwcap2-aarch64.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-hwcap2-aarch64.auxv
new file mode 100644
index 00000000000..1d87264b221
Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/linux-hwcap2-aarch64.auxv differ
diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-no-hwcap2-aarch64.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-no-hwcap2-aarch64.auxv
new file mode 100644
index 00000000000..35f01cc767c
Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/linux-no-hwcap2-aarch64.auxv differ
diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv
deleted file mode 100644
index 6afe1b3b46a..00000000000
Binary files a/library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv and /dev/null differ
-- 
cgit 1.4.1-3-g733a5


From 90f618d8dd6224fad826030eec351b6b7e7b7ad9 Mon Sep 17 00:00:00 2001
From: Rageking8 <106309953+Rageking8@users.noreply.github.com>
Date: Tue, 25 Oct 2022 22:01:34 +0800
Subject: Fix dupe word typos (#1344)

---
 library/stdarch/crates/core_arch/src/mips/msa.rs   |  4 ++--
 library/stdarch/crates/core_arch/src/mod.rs        |  2 +-
 library/stdarch/crates/core_arch/src/x86/avx.rs    |  2 +-
 .../stdarch/crates/core_arch/src/x86/avx512bf16.rs |  2 +-
 .../crates/core_arch/src/x86/avx512bitalg.rs       | 12 +++++-----
 .../stdarch/crates/core_arch/src/x86/avx512bw.rs   | 28 +++++++++++-----------
 .../stdarch/crates/core_arch/src/x86/avx512f.rs    |  2 +-
 library/stdarch/crates/core_arch/src/x86/sse2.rs   |  4 ++--
 library/stdarch/crates/core_arch/src/x86/sse42.rs  |  2 +-
 .../stdarch/crates/std_detect/src/detect/cache.rs  |  2 +-
 10 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/mips/msa.rs b/library/stdarch/crates/core_arch/src/mips/msa.rs
index 85ed30d18e4..cded73a544c 100644
--- a/library/stdarch/crates/core_arch/src/mips/msa.rs
+++ b/library/stdarch/crates/core_arch/src/mips/msa.rs
@@ -2208,7 +2208,7 @@ pub unsafe fn __msa_bmnz_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8 {
 /// Immediate Bit Move If Not Zero
 ///
 /// Copy to destination vector `a` (sixteen unsigned 8-bit integer numbers) all bits from source vector
-/// `b` (sixteen unsigned 8-bit integer numbers) for which the corresponding bits from from immediate `imm8`
+/// `b` (sixteen unsigned 8-bit integer numbers) for which the corresponding bits from immediate `imm8`
 /// are 1 and leaves unchanged all destination bits for which the corresponding target bits are 0.
 ///
 #[inline]
@@ -2237,7 +2237,7 @@ pub unsafe fn __msa_bmz_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8 {
 /// Immediate Bit Move If Zero
 ///
 /// Copy to destination vector `a` (sixteen unsigned 8-bit integer numbers) all bits from source vector
-/// `b` (sixteen unsigned 8-bit integer numbers) for which the corresponding bits from from immediate `imm8`
+/// `b` (sixteen unsigned 8-bit integer numbers) for which the corresponding bits from immediate `imm8`
 /// are 0 and leaves unchanged all destination bits for which the corresponding immediate bits are 1.
 ///
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/mod.rs b/library/stdarch/crates/core_arch/src/mod.rs
index 2f7af22cbb8..231d89e3397 100644
--- a/library/stdarch/crates/core_arch/src/mod.rs
+++ b/library/stdarch/crates/core_arch/src/mod.rs
@@ -155,7 +155,7 @@ pub mod arch {
     /// which support SIMD, or it will not have SIMD at all. For compatibility
     /// the standard library itself does not use any SIMD internally.
     /// Determining how best to ship your WebAssembly binary with SIMD is
-    /// largely left up to you as it can can be pretty nuanced depending on
+    /// largely left up to you as it can be pretty nuanced depending on
     /// your situation.
     ///
     /// [condsections]: https://github.com/webassembly/conditional-sections
diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs
index ad9e68db673..f8e83a35bf3 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx.rs
@@ -2450,7 +2450,7 @@ pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i {
     )
 }
 
-/// Broadcasts 16-bit integer `a` to all all elements of returned vector.
+/// Broadcasts 16-bit integer `a` to all elements of returned vector.
 /// This intrinsic may generate the `vpbroadcastw`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi16)
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs b/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs
index e9977e01886..b21ededabce 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs
@@ -80,7 +80,7 @@ pub unsafe fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh {
 }
 
 /// Convert packed single-precision (32-bit) floating-point elements in two vectors a and b
-/// to packed BF16 (16-bit) floating-point elements and and store the results in single vector
+/// to packed BF16 (16-bit) floating-point elements and store the results in single vector
 /// dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654&avx512techs=AVX512_BF16&text=_mm256_mask_cvtne2ps_pbh)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
index 3c9df3912fb..1099ee2cbbd 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
@@ -303,7 +303,7 @@ pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
-/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer.
+/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
 /// It then selects these bits and packs them into the output.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_bitshuffle_epi64_mask)
@@ -315,7 +315,7 @@ pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
-/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer.
+/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
 /// It then selects these bits and packs them into the output.
 ///
 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
@@ -330,7 +330,7 @@ pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
-/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer.
+/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
 /// It then selects these bits and packs them into the output.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bitshuffle_epi64_mask)
@@ -342,7 +342,7 @@ pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
-/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer.
+/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
 /// It then selects these bits and packs them into the output.
 ///
 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
@@ -357,7 +357,7 @@ pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
-/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer.
+/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
 /// It then selects these bits and packs them into the output.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bitshuffle_epi64_mask)
@@ -369,7 +369,7 @@ pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
-/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer.
+/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
 /// It then selects these bits and packs them into the output.
 ///
 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
index 49d78ed60f8..fbf71dfc455 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -7450,7 +7450,7 @@ pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
     transmute(simd_select_bitmask(k, r, zero))
 }
 
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst.
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflelo_epi16&expand=5221)
 #[inline]
@@ -7501,7 +7501,7 @@ pub unsafe fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
     transmute(r)
 }
 
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflelo_epi16&expand=5219)
 #[inline]
@@ -7518,7 +7518,7 @@ pub unsafe fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
     transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
 }
 
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflelo_epi16&expand=5220)
 #[inline]
@@ -7532,7 +7532,7 @@ pub unsafe fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m
     transmute(simd_select_bitmask(k, r.as_i16x32(), zero))
 }
 
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflelo_epi16&expand=5216)
 #[inline]
@@ -7549,7 +7549,7 @@ pub unsafe fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
     transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
 }
 
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflelo_epi16&expand=5217)
 #[inline]
@@ -7563,7 +7563,7 @@ pub unsafe fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m
     transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero))
 }
 
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflelo_epi16&expand=5213)
 #[inline]
@@ -7580,7 +7580,7 @@ pub unsafe fn _mm_mask_shufflelo_epi16<const IMM8: i32>(
     transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
 }
 
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflelo_epi16&expand=5214)
 #[inline]
@@ -7594,7 +7594,7 @@ pub unsafe fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i
     transmute(simd_select_bitmask(k, shuffle.as_i16x8(), zero))
 }
 
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst.
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflehi_epi16&expand=5212)
 #[inline]
@@ -7645,7 +7645,7 @@ pub unsafe fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
     transmute(r)
 }
 
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflehi_epi16&expand=5210)
 #[inline]
@@ -7662,7 +7662,7 @@ pub unsafe fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
     transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
 }
 
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflehi_epi16&expand=5211)
 #[inline]
@@ -7676,7 +7676,7 @@ pub unsafe fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m
     transmute(simd_select_bitmask(k, r.as_i16x32(), zero))
 }
 
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflehi_epi16&expand=5207)
 #[inline]
@@ -7693,7 +7693,7 @@ pub unsafe fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
     transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
 }
 
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflehi_epi16&expand=5208)
 #[inline]
@@ -7707,7 +7707,7 @@ pub unsafe fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m
     transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero))
 }
 
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflehi_epi16&expand=5204)
 #[inline]
@@ -7724,7 +7724,7 @@ pub unsafe fn _mm_mask_shufflehi_epi16<const IMM8: i32>(
     transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
 }
 
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflehi_epi16&expand=5205)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index f70a2846677..0ddb5128363 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -26268,7 +26268,7 @@ pub unsafe fn _mm512_set1_epi8(a: i8) -> __m512i {
     transmute(i8x64::splat(a))
 }
 
-/// Broadcast the low packed 16-bit integer from a to all all elements of dst.
+/// Broadcast the low packed 16-bit integer from a to all elements of dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi16&expand=4944)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs
index 3e79b353955..b32fb44c031 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@@ -1409,7 +1409,7 @@ pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
 /// `IMM8`.
 ///
 /// Put the results in the high 64 bits of the returned vector, with the low 64
-/// bits being copied from from `a`.
+/// bits being copied from `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16)
 #[inline]
@@ -1441,7 +1441,7 @@ pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
 /// `IMM8`.
 ///
 /// Put the results in the low 64 bits of the returned vector, with the high 64
-/// bits being copied from from `a`.
+/// bits being copied from `a`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/sse42.rs b/library/stdarch/crates/core_arch/src/x86/sse42.rs
index f474b0671da..e33a2d3de72 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse42.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse42.rs
@@ -614,7 +614,7 @@ mod tests {
     use crate::core_arch::x86::*;
     use std::ptr;
 
-    // Currently one cannot `load` a &[u8] that is is less than 16
+    // Currently one cannot `load` a &[u8] that is less than 16
     // in length. This makes loading strings less than 16 in length
     // a bit difficult. Rather than `load` and mutate the __m128i,
     // it is easier to memcpy the given string to a local slice with
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index d01a5ea244d..a94c655c3a9 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -179,7 +179,7 @@ fn detect_and_initialize() -> Initializer {
 /// the bit is set, the feature is enabled, and otherwise it is disabled.
 ///
 /// If the feature `std_detect_env_override` is enabled looks for the env
-/// variable `RUST_STD_DETECT_UNSTABLE` and uses its its content to disable
+/// variable `RUST_STD_DETECT_UNSTABLE` and uses its content to disable
 /// Features that would had been otherwise detected.
 #[inline]
 pub(crate) fn test(bit: u32) -> bool {
-- 
cgit 1.4.1-3-g733a5


From 81c221f05816a8eaadafec19e9c40273e9d23c71 Mon Sep 17 00:00:00 2001
From: Yuri Astrakhan <yuriastrakhan@gmail.com>
Date: Tue, 25 Oct 2022 15:17:23 -0400
Subject: Edition 2021, apply clippy::uninlined_format_args fix (#1339)

---
 .../stdarch/crates/assert-instr-macro/Cargo.toml   |   2 +-
 .../stdarch/crates/assert-instr-macro/src/lib.rs   |   8 +-
 library/stdarch/crates/core_arch/Cargo.toml        |   2 +-
 library/stdarch/crates/intrinsic-test/Cargo.toml   |   4 +-
 .../crates/intrinsic-test/src/acle_csv_parser.rs   |   6 +-
 .../stdarch/crates/intrinsic-test/src/intrinsic.rs |   2 +-
 library/stdarch/crates/intrinsic-test/src/main.rs  |  22 +-
 library/stdarch/crates/intrinsic-test/src/types.rs |  14 +-
 .../stdarch/crates/intrinsic-test/src/values.rs    |   2 +-
 library/stdarch/crates/simd-test-macro/Cargo.toml  |   2 +-
 library/stdarch/crates/simd-test-macro/src/lib.rs  |   4 +-
 library/stdarch/crates/std_detect/Cargo.toml       |   2 +-
 .../std_detect/src/detect/os/linux/aarch64.rs      |   6 +-
 .../std_detect/src/detect/os/linux/auxvec.rs       |   8 +-
 .../crates/std_detect/tests/cpu-detection.rs       |   2 +-
 library/stdarch/crates/stdarch-gen/Cargo.toml      |   2 +-
 library/stdarch/crates/stdarch-gen/src/main.rs     | 318 ++++++++++-----------
 library/stdarch/crates/stdarch-test/Cargo.toml     |   2 +-
 .../stdarch/crates/stdarch-test/src/disassembly.rs |   8 +-
 library/stdarch/crates/stdarch-test/src/lib.rs     |  10 +-
 library/stdarch/crates/stdarch-verify/Cargo.toml   |   2 +-
 library/stdarch/crates/stdarch-verify/src/lib.rs   |  14 +-
 library/stdarch/crates/stdarch-verify/tests/arm.rs |   6 +-
 .../stdarch/crates/stdarch-verify/tests/mips.rs    |   8 +-
 .../crates/stdarch-verify/tests/x86-intel.rs       |   8 +-
 library/stdarch/examples/Cargo.toml                |   2 +-
 library/stdarch/examples/connect5.rs               |   2 +-
 library/stdarch/examples/hex.rs                    |   2 +-
 28 files changed, 229 insertions(+), 241 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/assert-instr-macro/Cargo.toml b/library/stdarch/crates/assert-instr-macro/Cargo.toml
index 3d9b32067f8..4ad654e6957 100644
--- a/library/stdarch/crates/assert-instr-macro/Cargo.toml
+++ b/library/stdarch/crates/assert-instr-macro/Cargo.toml
@@ -2,7 +2,7 @@
 name = "assert-instr-macro"
 version = "0.1.0"
 authors = ["Alex Crichton <alex@alexcrichton.com>"]
-edition = "2018"
+edition = "2021"
 
 [lib]
 proc-macro = true
diff --git a/library/stdarch/crates/assert-instr-macro/src/lib.rs b/library/stdarch/crates/assert-instr-macro/src/lib.rs
index 9fa411df3d2..99e37c9105b 100644
--- a/library/stdarch/crates/assert-instr-macro/src/lib.rs
+++ b/library/stdarch/crates/assert-instr-macro/src/lib.rs
@@ -56,14 +56,14 @@ pub fn assert_instr(
         .replace('/', "_")
         .replace(':', "_")
         .replace(char::is_whitespace, "");
-    let assert_name = syn::Ident::new(&format!("assert_{}_{}", name, instr_str), name.span());
+    let assert_name = syn::Ident::new(&format!("assert_{name}_{instr_str}"), name.span());
     // These name has to be unique enough for us to find it in the disassembly later on:
     let shim_name = syn::Ident::new(
-        &format!("stdarch_test_shim_{}_{}", name, instr_str),
+        &format!("stdarch_test_shim_{name}_{instr_str}"),
         name.span(),
     );
     let shim_name_ptr = syn::Ident::new(
-        &format!("stdarch_test_shim_{}_{}_ptr", name, instr_str).to_ascii_uppercase(),
+        &format!("stdarch_test_shim_{name}_{instr_str}_ptr").to_ascii_uppercase(),
         name.span(),
     );
     let mut inputs = Vec::new();
@@ -131,7 +131,7 @@ pub fn assert_instr(
     } else {
         syn::LitStr::new("C", proc_macro2::Span::call_site())
     };
-    let shim_name_str = format!("{}{}", shim_name, assert_name);
+    let shim_name_str = format!("{shim_name}{assert_name}");
     let to_test = if disable_dedup_guard {
         quote! {
             #attrs
diff --git a/library/stdarch/crates/core_arch/Cargo.toml b/library/stdarch/crates/core_arch/Cargo.toml
index e2b332af2b3..a1bb168ee7d 100644
--- a/library/stdarch/crates/core_arch/Cargo.toml
+++ b/library/stdarch/crates/core_arch/Cargo.toml
@@ -13,7 +13,7 @@ readme = "README.md"
 keywords = ["core", "simd", "arch", "intrinsics"]
 categories = ["hardware-support", "no-std"]
 license = "MIT OR Apache-2.0"
-edition = "2018"
+edition = "2021"
 
 [badges]
 is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" }
diff --git a/library/stdarch/crates/intrinsic-test/Cargo.toml b/library/stdarch/crates/intrinsic-test/Cargo.toml
index 5fde23c9eb7..7efbab75545 100644
--- a/library/stdarch/crates/intrinsic-test/Cargo.toml
+++ b/library/stdarch/crates/intrinsic-test/Cargo.toml
@@ -2,7 +2,7 @@
 name = "intrinsic-test"
 version = "0.1.0"
 authors = ["Jamie Cunliffe <Jamie.Cunliffe@arm.com>"]
-edition = "2018"
+edition = "2021"
 
 [dependencies]
 lazy_static = "1.4.0"
@@ -14,4 +14,4 @@ log = "0.4.11"
 pretty_env_logger = "0.4.0"
 rayon = "1.5.0"
 diff = "0.1.12"
-itertools = "0.10.1"
\ No newline at end of file
+itertools = "0.10.1"
diff --git a/library/stdarch/crates/intrinsic-test/src/acle_csv_parser.rs b/library/stdarch/crates/intrinsic-test/src/acle_csv_parser.rs
index d7b06648518..7336c9e8b02 100644
--- a/library/stdarch/crates/intrinsic-test/src/acle_csv_parser.rs
+++ b/library/stdarch/crates/intrinsic-test/src/acle_csv_parser.rs
@@ -59,8 +59,8 @@ impl Into<Intrinsic> for ACLEIntrinsicLine {
         let signature = self.intrinsic;
         let (ret_ty, remaining) = signature.split_once(' ').unwrap();
 
-        let results = type_from_c(ret_ty)
-            .unwrap_or_else(|_| panic!("Failed to parse return type: {}", ret_ty));
+        let results =
+            type_from_c(ret_ty).unwrap_or_else(|_| panic!("Failed to parse return type: {ret_ty}"));
 
         let (name, args) = remaining.split_once('(').unwrap();
         let args = args.trim_end_matches(')');
@@ -177,7 +177,7 @@ fn from_c(pos: usize, s: &str) -> Argument {
     Argument {
         pos,
         name,
-        ty: type_from_c(s).unwrap_or_else(|_| panic!("Failed to parse type: {}", s)),
+        ty: type_from_c(s).unwrap_or_else(|_| panic!("Failed to parse type: {s}")),
         constraints: vec![],
     }
 }
diff --git a/library/stdarch/crates/intrinsic-test/src/intrinsic.rs b/library/stdarch/crates/intrinsic-test/src/intrinsic.rs
index e0645a36bed..fb4eb4cb717 100644
--- a/library/stdarch/crates/intrinsic-test/src/intrinsic.rs
+++ b/library/stdarch/crates/intrinsic-test/src/intrinsic.rs
@@ -109,7 +109,7 @@ impl Intrinsic {
     pub fn generate_loop_rust(&self, additional: &str, passes: u32) -> String {
         let constraints = self.arguments.as_constraint_parameters_rust();
         let constraints = if !constraints.is_empty() {
-            format!("::<{}>", constraints)
+            format!("::<{constraints}>")
         } else {
             constraints
         };
diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs
index 43f2df08bb9..dac9345748f 100644
--- a/library/stdarch/crates/intrinsic-test/src/main.rs
+++ b/library/stdarch/crates/intrinsic-test/src/main.rs
@@ -58,7 +58,7 @@ fn gen_code_c(
                     pass = gen_code_c(
                         intrinsic,
                         constraints,
-                        format!("{}-{}", name, i),
+                        format!("{name}-{i}"),
                         p64_armv7_workaround
                     )
                 )
@@ -117,7 +117,7 @@ int main(int argc, char **argv) {{
 }}"#,
         header_files = header_files
             .iter()
-            .map(|header| format!("#include <{}>", header))
+            .map(|header| format!("#include <{header}>"))
             .collect::<Vec<_>>()
             .join("\n"),
         arglists = intrinsic.arguments.gen_arglists_c(PASSES),
@@ -148,7 +148,7 @@ fn gen_code_rust(intrinsic: &Intrinsic, constraints: &[&Argument], name: String)
                     name = current.name,
                     ty = current.ty.rust_type(),
                     val = i,
-                    pass = gen_code_rust(intrinsic, constraints, format!("{}-{}", name, i))
+                    pass = gen_code_rust(intrinsic, constraints, format!("{name}-{i}"))
                 )
             })
             .collect()
@@ -237,7 +237,7 @@ fn build_rust(intrinsics: &Vec<Intrinsic>, toolchain: &str, a32: bool) -> bool {
     intrinsics.iter().for_each(|i| {
         let rust_dir = format!(r#"rust_programs/{}"#, i.name);
         let _ = std::fs::create_dir_all(&rust_dir);
-        let rust_filename = format!(r#"{}/main.rs"#, rust_dir);
+        let rust_filename = format!(r#"{rust_dir}/main.rs"#);
         let mut file = File::create(&rust_filename).unwrap();
 
         let c_code = generate_rust_program(&i, a32);
@@ -355,7 +355,7 @@ fn main() {
     let filename = matches.value_of("INPUT").unwrap();
     let toolchain = matches
         .value_of("TOOLCHAIN")
-        .map_or("".into(), |t| format!("+{}", t));
+        .map_or("".into(), |t| format!("+{t}"));
 
     let cpp_compiler = matches.value_of("CPPCOMPILER").unwrap();
     let c_runner = matches.value_of("RUNNER").unwrap_or("");
@@ -443,7 +443,7 @@ fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str, a
 
             let (c, rust) = match (c, rust) {
                 (Ok(c), Ok(rust)) => (c, rust),
-                a => panic!("{:#?}", a),
+                a => panic!("{a:#?}"),
             };
 
             if !c.status.success() {
@@ -480,20 +480,20 @@ fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str, a
 
     intrinsics.iter().for_each(|reason| match reason {
         FailureReason::Difference(intrinsic, c, rust) => {
-            println!("Difference for intrinsic: {}", intrinsic);
+            println!("Difference for intrinsic: {intrinsic}");
             let diff = diff::lines(c, rust);
             diff.iter().for_each(|diff| match diff {
-                diff::Result::Left(c) => println!("C: {}", c),
-                diff::Result::Right(rust) => println!("Rust: {}", rust),
+                diff::Result::Left(c) => println!("C: {c}"),
+                diff::Result::Right(rust) => println!("Rust: {rust}"),
                 diff::Result::Both(_, _) => (),
             });
             println!("****************************************************************");
         }
         FailureReason::RunC(intrinsic) => {
-            println!("Failed to run C program for intrinsic {}", intrinsic)
+            println!("Failed to run C program for intrinsic {intrinsic}")
         }
         FailureReason::RunRust(intrinsic) => {
-            println!("Failed to run rust program for intrinsic {}", intrinsic)
+            println!("Failed to run rust program for intrinsic {intrinsic}")
         }
     });
     println!("{} differences found", intrinsics.len());
diff --git a/library/stdarch/crates/intrinsic-test/src/types.rs b/library/stdarch/crates/intrinsic-test/src/types.rs
index dd23586e76a..7442ad5e6c3 100644
--- a/library/stdarch/crates/intrinsic-test/src/types.rs
+++ b/library/stdarch/crates/intrinsic-test/src/types.rs
@@ -25,7 +25,7 @@ impl FromStr for TypeKind {
             "poly" => Ok(Self::Poly),
             "uint" | "unsigned" => Ok(Self::UInt),
             "void" => Ok(Self::Void),
-            _ => Err(format!("Impossible to parse argument kind {}", s)),
+            _ => Err(format!("Impossible to parse argument kind {s}")),
         }
     }
 }
@@ -199,14 +199,14 @@ impl IntrinsicType {
                 simd_len: Some(simd_len),
                 vec_len: None,
                 ..
-            } => format!("{}{}x{}_t", kind.c_prefix(), bit_len, simd_len),
+            } => format!("{}{bit_len}x{simd_len}_t", kind.c_prefix()),
             IntrinsicType::Type {
                 kind,
                 bit_len: Some(bit_len),
                 simd_len: Some(simd_len),
                 vec_len: Some(vec_len),
                 ..
-            } => format!("{}{}x{}x{}_t", kind.c_prefix(), bit_len, simd_len, vec_len),
+            } => format!("{}{bit_len}x{simd_len}x{vec_len}_t", kind.c_prefix()),
             _ => todo!("{:#?}", self),
         }
     }
@@ -220,7 +220,7 @@ impl IntrinsicType {
                 simd_len: Some(simd_len),
                 vec_len: Some(_),
                 ..
-            } => format!("{}{}x{}_t", kind.c_prefix(), bit_len, simd_len),
+            } => format!("{}{bit_len}x{simd_len}_t", kind.c_prefix()),
             _ => unreachable!("Shouldn't be called on this type"),
         }
     }
@@ -234,21 +234,21 @@ impl IntrinsicType {
                 simd_len: None,
                 vec_len: None,
                 ..
-            } => format!("{}{}", kind.rust_prefix(), bit_len),
+            } => format!("{}{bit_len}", kind.rust_prefix()),
             IntrinsicType::Type {
                 kind,
                 bit_len: Some(bit_len),
                 simd_len: Some(simd_len),
                 vec_len: None,
                 ..
-            } => format!("{}{}x{}_t", kind.c_prefix(), bit_len, simd_len),
+            } => format!("{}{bit_len}x{simd_len}_t", kind.c_prefix()),
             IntrinsicType::Type {
                 kind,
                 bit_len: Some(bit_len),
                 simd_len: Some(simd_len),
                 vec_len: Some(vec_len),
                 ..
-            } => format!("{}{}x{}x{}_t", kind.c_prefix(), bit_len, simd_len, vec_len),
+            } => format!("{}{bit_len}x{simd_len}x{vec_len}_t", kind.c_prefix()),
             _ => todo!("{:#?}", self),
         }
     }
diff --git a/library/stdarch/crates/intrinsic-test/src/values.rs b/library/stdarch/crates/intrinsic-test/src/values.rs
index 64b4d9fc99f..68dd30d44e8 100644
--- a/library/stdarch/crates/intrinsic-test/src/values.rs
+++ b/library/stdarch/crates/intrinsic-test/src/values.rs
@@ -13,7 +13,7 @@ pub fn value_for_array(bits: u32, index: u32) -> String {
     } else if bits == 64 {
         format!("{:#X}", VALUES_64[index % VALUES_64.len()])
     } else {
-        panic!("Unknown size: {}", bits);
+        panic!("Unknown size: {bits}");
     }
 }
 
diff --git a/library/stdarch/crates/simd-test-macro/Cargo.toml b/library/stdarch/crates/simd-test-macro/Cargo.toml
index c3ecf981e66..cd110c1d36a 100644
--- a/library/stdarch/crates/simd-test-macro/Cargo.toml
+++ b/library/stdarch/crates/simd-test-macro/Cargo.toml
@@ -2,7 +2,7 @@
 name = "simd-test-macro"
 version = "0.1.0"
 authors = ["Alex Crichton <alex@alexcrichton.com>"]
-edition = "2018"
+edition = "2021"
 
 [lib]
 proc-macro = true
diff --git a/library/stdarch/crates/simd-test-macro/src/lib.rs b/library/stdarch/crates/simd-test-macro/src/lib.rs
index 9d81a4c5ea4..2a31dd7456e 100644
--- a/library/stdarch/crates/simd-test-macro/src/lib.rs
+++ b/library/stdarch/crates/simd-test-macro/src/lib.rs
@@ -59,7 +59,7 @@ pub fn simd_test(
     let macro_test = match target
         .split('-')
         .next()
-        .unwrap_or_else(|| panic!("target triple contained no \"-\": {}", target))
+        .unwrap_or_else(|| panic!("target triple contained no \"-\": {target}"))
     {
         "i686" | "x86_64" | "i586" => "is_x86_feature_detected",
         "arm" | "armv7" => "is_arm_feature_detected",
@@ -82,7 +82,7 @@ pub fn simd_test(
             force_test = true;
             "is_mips64_feature_detected"
         }
-        t => panic!("unknown target: {}", t),
+        t => panic!("unknown target: {t}"),
     };
     let macro_test = Ident::new(macro_test, Span::call_site());
 
diff --git a/library/stdarch/crates/std_detect/Cargo.toml b/library/stdarch/crates/std_detect/Cargo.toml
index 3a482564e01..589a3900a45 100644
--- a/library/stdarch/crates/std_detect/Cargo.toml
+++ b/library/stdarch/crates/std_detect/Cargo.toml
@@ -13,7 +13,7 @@ readme = "README.md"
 keywords = ["std", "run-time", "feature", "detection"]
 categories = ["hardware-support"]
 license = "MIT OR Apache-2.0"
-edition = "2018"
+edition = "2021"
 
 [badges]
 is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index 6c79ba86d12..a75185d435f 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -329,7 +329,7 @@ mod tests {
                 env!("CARGO_MANIFEST_DIR"),
                 "/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv"
             );
-            println!("file: {}", file);
+            println!("file: {file}");
             let v = auxv_from_file(file).unwrap();
             println!("HWCAP : 0x{:0x}", v.hwcap);
             println!("HWCAP2: 0x{:0x}", v.hwcap2);
@@ -341,7 +341,7 @@ mod tests {
                 env!("CARGO_MANIFEST_DIR"),
                 "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv"
             );
-            println!("file: {}", file);
+            println!("file: {file}");
             let v = auxv_from_file(file).unwrap();
             println!("HWCAP : 0x{:0x}", v.hwcap);
             println!("HWCAP2: 0x{:0x}", v.hwcap2);
@@ -353,7 +353,7 @@ mod tests {
                 env!("CARGO_MANIFEST_DIR"),
                 "/src/detect/test_data/linux-hwcap2-aarch64.auxv"
             );
-            println!("file: {}", file);
+            println!("file: {file}");
             let v = auxv_from_file(file).unwrap();
             println!("HWCAP : 0x{:0x}", v.hwcap);
             println!("HWCAP2: 0x{:0x}", v.hwcap2);
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index c903903bdfc..d9e7b28ea09 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -313,7 +313,7 @@ mod tests {
             #[test]
             fn linux_rpi3() {
                 let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-rpi3.auxv");
-                println!("file: {}", file);
+                println!("file: {file}");
                 let v = auxv_from_file(file).unwrap();
                 assert_eq!(v.hwcap, 4174038);
                 assert_eq!(v.hwcap2, 16);
@@ -322,7 +322,7 @@ mod tests {
             #[test]
             fn linux_macos_vb() {
                 let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv");
-                println!("file: {}", file);
+                println!("file: {file}");
                 // The file contains HWCAP but not HWCAP2. In that case, we treat HWCAP2 as zero.
                 let v = auxv_from_file(file).unwrap();
                 assert_eq!(v.hwcap, 126614527);
@@ -332,7 +332,7 @@ mod tests {
             #[test]
             fn linux_artificial_aarch64() {
                 let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-artificial-aarch64.auxv");
-                println!("file: {}", file);
+                println!("file: {file}");
                 let v = auxv_from_file(file).unwrap();
                 assert_eq!(v.hwcap, 0x0123456789abcdef);
                 assert_eq!(v.hwcap2, 0x02468ace13579bdf);
@@ -340,7 +340,7 @@ mod tests {
             #[test]
             fn linux_no_hwcap2_aarch64() {
                 let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv");
-                println!("file: {}", file);
+                println!("file: {file}");
                 let v = auxv_from_file(file).unwrap();
                 // An absent HWCAP2 is treated as zero, and does not prevent acceptance of HWCAP.
                 assert_ne!(v.hwcap, 0);
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index ca8bf28f448..9daaa658044 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -15,7 +15,7 @@ extern crate std_detect;
 #[test]
 fn all() {
     for (f, e) in std_detect::detect::features() {
-        println!("{}: {}", f, e);
+        println!("{f}: {e}");
     }
 }
 
diff --git a/library/stdarch/crates/stdarch-gen/Cargo.toml b/library/stdarch/crates/stdarch-gen/Cargo.toml
index b339672f4e6..a5d19b19921 100644
--- a/library/stdarch/crates/stdarch-gen/Cargo.toml
+++ b/library/stdarch/crates/stdarch-gen/Cargo.toml
@@ -2,7 +2,7 @@
 name = "stdarch-gen"
 version = "0.1.0"
 authors = ["Heinz Gies <heinz@licenser.net>"]
-edition = "2018"
+edition = "2021"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs
index d2f865753c0..e17fbf26054 100644
--- a/library/stdarch/crates/stdarch-gen/src/main.rs
+++ b/library/stdarch/crates/stdarch-gen/src/main.rs
@@ -59,7 +59,7 @@ fn type_len(t: &str) -> usize {
             "4_" => 4,
             "8_" => 8,
             "16" => 16,
-            _ => panic!("unknown type: {}", t),
+            _ => panic!("unknown type: {t}"),
         }
     } else if s.len() == 3 {
         s[1].parse::<usize>().unwrap() * type_sub_len(t)
@@ -77,7 +77,7 @@ fn type_sub_len(t: &str) -> usize {
             "2_t" => 2,
             "3_t" => 3,
             "4_t" => 4,
-            _ => panic!("unknown type len: {}", t),
+            _ => panic!("unknown type len: {t}"),
         }
     }
 }
@@ -92,7 +92,7 @@ fn type_bits(t: &str) -> usize {
         | "float32x4_t" | "f32" => 32,
         "int64x1_t" | "int64x2_t" | "uint64x1_t" | "uint64x2_t" | "poly64x1_t" | "poly64x2_t"
         | "i64" | "u64" | "float64x1_t" | "float64x2_t" | "f64" => 64,
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -105,7 +105,7 @@ fn type_exp_len(t: &str, base_len: usize) -> usize {
         4 => 2,
         8 => 3,
         16 => 4,
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -118,7 +118,7 @@ fn type_bits_exp_len(t: &str) -> usize {
         "int32x2_t" | "int32x4_t" | "uint32x2_t" | "uint32x4_t" | "i32" | "u32" => 5,
         "int64x1_t" | "int64x2_t" | "uint64x1_t" | "uint64x2_t" | "poly64x1_t" | "poly64x2_t"
         | "i64" | "u64" => 6,
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -243,7 +243,7 @@ fn type_to_suffix(t: &str) -> &str {
         "p8" => "b_p8",
         "p16" => "h_p16",
         "p128" => "q_p128",
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -297,7 +297,7 @@ fn type_to_n_suffix(t: &str) -> &str {
         "u16" => "h_n_u16",
         "u32" => "s_n_u32",
         "u64" => "d_n_u64",
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -325,7 +325,7 @@ fn type_to_noq_n_suffix(t: &str) -> &str {
         "u16" => "h_n_u16",
         "u32" => "s_n_u32",
         "u64" => "d_n_u64",
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -354,7 +354,7 @@ fn type_to_rot_suffix(c_name: &str, suf: &str) -> String {
     if suf.starts_with("q") {
         format!("{}q_{}{}", ns[0], ns[1], &suf[1..])
     } else {
-        format!("{}{}", c_name, suf)
+        format!("{c_name}{suf}")
     }
 }
 
@@ -426,7 +426,7 @@ fn type_to_noq_suffix(t: &str) -> &str {
         "poly16x4_t" | "poly16x8_t" => "_p16",
         "poly64x1_t" | "poly64x2_t" | "p64" => "_p64",
         "p128" => "_p128",
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -521,7 +521,7 @@ fn type_to_global_type(t: &str) -> &str {
         "p16" => "p16",
         "p64" => "p64",
         "p128" => "p128",
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -530,7 +530,7 @@ fn type_to_sub_type(t: &str) -> String {
     match s.len() {
         2 => String::from(t),
         3 => format!("{}x{}_t", s[0], s[1]),
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -547,9 +547,9 @@ fn type_to_native_type(t: &str) -> String {
             "uin" => format!("u{}", &s[0][4..]),
             "flo" => format!("f{}", &s[0][5..]),
             "pol" => format!("u{}", &s[0][4..]),
-            _ => panic!("unknown type: {}", t),
+            _ => panic!("unknown type: {t}"),
         },
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -566,7 +566,7 @@ fn native_type_to_type(t: &str) -> &str {
         "f16" => "float16x4_t",
         "f32" => "float32x2_t",
         "f64" => "float64x1_t",
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -583,7 +583,7 @@ fn native_type_to_long_type(t: &str) -> &str {
         "f16" => "float16x8_t",
         "f32" => "float32x4_t",
         "f64" => "float64x2_t",
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -601,7 +601,7 @@ fn type_to_half(t: &str) -> &str {
         "poly16x8_t" => "poly16x4_t",
         "float32x4_t" => "float32x2_t",
         "float64x2_t" => "float64x1_t",
-        _ => panic!("unknown half type for {}", t),
+        _ => panic!("unknown half type for {t}"),
     }
 }
 
@@ -624,7 +624,7 @@ fn transpose1(x: usize) -> &'static str {
         4 => "[0, 4, 2, 6]",
         8 => "[0, 8, 2, 10, 4, 12, 6, 14]",
         16 => "[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]",
-        _ => panic!("unknown transpose order of len {}", x),
+        _ => panic!("unknown transpose order of len {x}"),
     }
 }
 
@@ -634,7 +634,7 @@ fn transpose2(x: usize) -> &'static str {
         4 => "[1, 5, 3, 7]",
         8 => "[1, 9, 3, 11, 5, 13, 7, 15]",
         16 => "[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]",
-        _ => panic!("unknown transpose order of len {}", x),
+        _ => panic!("unknown transpose order of len {x}"),
     }
 }
 
@@ -644,7 +644,7 @@ fn zip1(x: usize) -> &'static str {
         4 => "[0, 4, 1, 5]",
         8 => "[0, 8, 1, 9, 2, 10, 3, 11]",
         16 => "[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]",
-        _ => panic!("unknown zip order of len {}", x),
+        _ => panic!("unknown zip order of len {x}"),
     }
 }
 
@@ -654,7 +654,7 @@ fn zip2(x: usize) -> &'static str {
         4 => "[2, 6, 3, 7]",
         8 => "[4, 12, 5, 13, 6, 14, 7, 15]",
         16 => "[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]",
-        _ => panic!("unknown zip order of len {}", x),
+        _ => panic!("unknown zip order of len {x}"),
     }
 }
 
@@ -664,7 +664,7 @@ fn unzip1(x: usize) -> &'static str {
         4 => "[0, 2, 4, 6]",
         8 => "[0, 2, 4, 6, 8, 10, 12, 14]",
         16 => "[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]",
-        _ => panic!("unknown unzip order of len {}", x),
+        _ => panic!("unknown unzip order of len {x}"),
     }
 }
 
@@ -674,13 +674,13 @@ fn unzip2(x: usize) -> &'static str {
         4 => "[1, 3, 5, 7]",
         8 => "[1, 3, 5, 7, 9, 11, 13, 15]",
         16 => "[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]",
-        _ => panic!("unknown unzip order of len {}", x),
+        _ => panic!("unknown unzip order of len {x}"),
     }
 }
 
 fn values(t: &str, vs: &[String]) -> String {
     if vs.len() == 1 && !t.contains('x') {
-        format!(": {} = {}", t, vs[0])
+        format!(": {t} = {}", vs[0])
     } else if vs.len() == 1 && type_to_global_type(t) == "f64" {
         format!(": {} = {}", type_to_global_type(t), vs[0])
     } else {
@@ -723,7 +723,7 @@ fn max_val(t: &str) -> &'static str {
         "i64" => "0x7F_FF_FF_FF_FF_FF_FF_FF",
         "f32" => "3.40282347e+38",
         "f64" => "1.7976931348623157e+308",
-        _ => panic!("No TRUE for type {}", t),
+        _ => panic!("No TRUE for type {t}"),
     }
 }
 
@@ -739,7 +739,7 @@ fn min_val(t: &str) -> &'static str {
         "i64" => "-9223372036854775808",
         "f32" => "-3.40282347e+38",
         "f64" => "-1.7976931348623157e+308",
-        _ => panic!("No TRUE for type {}", t),
+        _ => panic!("No TRUE for type {t}"),
     }
 }
 
@@ -749,7 +749,7 @@ fn true_val(t: &str) -> &'static str {
         "u16" => "0xFF_FF",
         "u32" => "0xFF_FF_FF_FF",
         "u64" => "0xFF_FF_FF_FF_FF_FF_FF_FF",
-        _ => panic!("No TRUE for type {}", t),
+        _ => panic!("No TRUE for type {t}"),
     }
 }
 
@@ -763,7 +763,7 @@ fn ff_val(t: &str) -> &'static str {
         "i16" => "0xFF_FF",
         "i32" => "0xFF_FF_FF_FF",
         "i64" => "0xFF_FF_FF_FF_FF_FF_FF_FF",
-        _ => panic!("No TRUE for type {}", t),
+        _ => panic!("No TRUE for type {t}"),
     }
 }
 
@@ -784,7 +784,7 @@ fn bits(t: &str) -> &'static str {
         "p8x" => "8",
         "p16" => "16",
         "p64" => "64",
-        _ => panic!("Unknown bits for type {}", t),
+        _ => panic!("Unknown bits for type {t}"),
     }
 }
 
@@ -801,7 +801,7 @@ fn bits_minus_one(t: &str) -> &'static str {
         "p8x" => "7",
         "p16" => "15",
         "p64" => "63",
-        _ => panic!("Unknown bits for type {}", t),
+        _ => panic!("Unknown bits for type {t}"),
     }
 }
 
@@ -818,7 +818,7 @@ fn half_bits(t: &str) -> &'static str {
         "p8x" => "4",
         "p16" => "8",
         "p64" => "32",
-        _ => panic!("Unknown bits for type {}", t),
+        _ => panic!("Unknown bits for type {t}"),
     }
 }
 
@@ -852,7 +852,7 @@ fn type_len_str(t: &str) -> &'static str {
         "poly16x8_t" => "8",
         "poly64x1_t" => "1",
         "poly64x2_t" => "2",
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -886,7 +886,7 @@ fn type_len_minus_one_str(t: &str) -> &'static str {
         "poly16x8_t" => "7",
         "poly64x1_t" => "0",
         "poly64x2_t" => "1",
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -920,7 +920,7 @@ fn type_half_len_str(t: &str) -> &'static str {
         "poly16x8_t" => "4",
         "poly64x1_t" => "0",
         "poly64x2_t" => "1",
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     }
 }
 
@@ -954,12 +954,12 @@ fn type_to_ext(t: &str, v: bool, r: bool, pi8: bool) -> String {
             native
         ),
         _ if pi8 => format!(".p0i8"),
-        _ => format!(".p0{}", native),
+        _ => format!(".p0{native}"),
     };
     let sub_type = match &native[0..1] {
         "i" | "f" => native,
         "u" => native.replace("u", "i"),
-        _ => panic!("unknown type: {}", t),
+        _ => panic!("unknown type: {t}"),
     };
     let ext = format!(
         "v{}{}{}",
@@ -1041,8 +1041,8 @@ fn gen_aarch64(
     fn_type: Fntype,
 ) -> (String, String) {
     let name = match suffix {
-        Normal => format!("{}{}", current_name, type_to_suffix(in_t[1])),
-        NoQ => format!("{}{}", current_name, type_to_noq_suffix(in_t[1])),
+        Normal => format!("{current_name}{}", type_to_suffix(in_t[1])),
+        NoQ => format!("{current_name}{}", type_to_noq_suffix(in_t[1])),
         Double => format!(
             "{}{}",
             current_name,
@@ -1053,15 +1053,15 @@ fn gen_aarch64(
             current_name,
             type_to_noq_double_suffixes(out_t, in_t[1])
         ),
-        NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])),
+        NSuffix => format!("{current_name}{}", type_to_n_suffix(in_t[1])),
         DoubleN => format!(
             "{}{}",
             current_name,
             type_to_double_n_suffixes(out_t, in_t[1])
         ),
-        NoQNSuffix => format!("{}{}", current_name, type_to_noq_n_suffix(in_t[1])),
-        OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)),
-        OutNSuffix => format!("{}{}", current_name, type_to_n_suffix(out_t)),
+        NoQNSuffix => format!("{current_name}{}", type_to_noq_n_suffix(in_t[1])),
+        OutSuffix => format!("{current_name}{}", type_to_suffix(out_t)),
+        OutNSuffix => format!("{current_name}{}", type_to_n_suffix(out_t)),
         OutNox => format!(
             "{}{}",
             current_name,
@@ -1092,7 +1092,7 @@ fn gen_aarch64(
             current_name,
             type_to_lane_suffixes(out_t, in_t[1], false)
         ),
-        In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
+        In2 => format!("{current_name}{}", type_to_suffix(in_t[2])),
         In2Lane => format!(
             "{}{}",
             current_name,
@@ -1122,11 +1122,11 @@ fn gen_aarch64(
     };
     let current_fn = if let Some(current_fn) = current_fn.clone() {
         if link_aarch64.is_some() {
-            panic!("[{}] Can't specify link and fn at the same time.", name)
+            panic!("[{name}] Can't specify link and fn at the same time.")
         }
         current_fn
     } else if link_aarch64.is_some() {
-        format!("{}_", name)
+        format!("{name}_")
     } else {
         if multi_fn.is_empty() {
             panic!(
@@ -1174,8 +1174,8 @@ fn gen_aarch64(
                     let sub = type_to_sub_type(in_t[1]);
                     (
                         match type_sub_len(in_t[1]) {
-                            1 => format!("a: {}, n: i64, ptr: {}", sub, ptr_type),
-                            2 => format!("a: {}, b: {}, n: i64, ptr: {}", sub, sub, ptr_type),
+                            1 => format!("a: {sub}, n: i64, ptr: {ptr_type}"),
+                            2 => format!("a: {sub}, b: {sub}, n: i64, ptr: {ptr_type}"),
                             3 => format!(
                                 "a: {}, b: {}, c: {}, n: i64, ptr: {}",
                                 sub, sub, sub, ptr_type
@@ -1187,7 +1187,7 @@ fn gen_aarch64(
                             _ => panic!("unsupported type: {}", in_t[1]),
                         },
                         if out_t != "void" {
-                            format!(" -> {}", out_t)
+                            format!(" -> {out_t}")
                         } else {
                             String::new()
                         },
@@ -1200,7 +1200,7 @@ fn gen_aarch64(
                             3 => format!("a: {}, b: {}, c: {}, n: i32", in_t[0], in_t[1], in_t[2]),
                             _ => unimplemented!("unknown para_num"),
                         },
-                        format!(" -> {}", out_t),
+                        format!(" -> {out_t}"),
                     )
                 }
             } else if matches!(fn_type, Fntype::Store) {
@@ -1211,23 +1211,20 @@ fn gen_aarch64(
                     type_to_native_type(in_t[1])
                 };
                 let subs = match type_sub_len(in_t[1]) {
-                    1 => format!("a: {}", sub),
-                    2 => format!("a: {}, b: {}", sub, sub),
-                    3 => format!("a: {}, b: {}, c: {}", sub, sub, sub),
-                    4 => format!("a: {}, b: {}, c: {}, d: {}", sub, sub, sub, sub),
+                    1 => format!("a: {sub}"),
+                    2 => format!("a: {sub}, b: {sub}"),
+                    3 => format!("a: {sub}, b: {sub}, c: {sub}"),
+                    4 => format!("a: {sub}, b: {sub}, c: {sub}, d: {sub}"),
                     _ => panic!("unsupported type: {}", in_t[1]),
                 };
-                (format!("{}, ptr: *mut {}", subs, ptr_type), String::new())
+                (format!("{subs}, ptr: *mut {ptr_type}"), String::new())
             } else if is_vldx(&name) {
                 let ptr_type = if name.contains("dup") {
                     type_to_native_type(out_t)
                 } else {
                     type_to_sub_type(out_t)
                 };
-                (
-                    format!("ptr: *const {}", ptr_type),
-                    format!(" -> {}", out_t),
-                )
+                (format!("ptr: *const {ptr_type}"), format!(" -> {out_t}"))
             } else {
                 (
                     match para_num {
@@ -1256,7 +1253,7 @@ fn gen_aarch64(
             assert_eq!(constns.len(), 2);
             format!(r#"<const {}: i32, const {}: i32>"#, constns[0], constns[1])
         } else {
-            format!(r#"<const {}: i32>"#, constn)
+            format!(r#"<const {constn}: i32>"#)
         }
     } else {
         String::new()
@@ -1314,7 +1311,7 @@ fn gen_aarch64(
                 para_num + 1
             )
         } else {
-            format!("\n#[rustc_legacy_const_generics({})]", para_num)
+            format!("\n#[rustc_legacy_const_generics({para_num})]")
         }
     } else {
         String::new()
@@ -1323,7 +1320,7 @@ fn gen_aarch64(
         let fn_output = if out_t == "void" {
             String::new()
         } else {
-            format!("-> {} ", out_t)
+            format!("-> {out_t} ")
         };
         let fn_inputs = match para_num {
             1 => format!("(a: {})", in_t[0]),
@@ -1373,14 +1370,14 @@ fn gen_aarch64(
         } else if link_aarch64.is_some() && matches!(fn_type, Fntype::Store) {
             let cast = if is_vstx(&name) { " as _" } else { "" };
             match type_sub_len(in_t[1]) {
-                1 => format!(r#"{}{}(b, a{})"#, ext_c, current_fn, cast),
-                2 => format!(r#"{}{}(b.0, b.1, a{})"#, ext_c, current_fn, cast),
-                3 => format!(r#"{}{}(b.0, b.1, b.2, a{})"#, ext_c, current_fn, cast),
-                4 => format!(r#"{}{}(b.0, b.1, b.2, b.3, a{})"#, ext_c, current_fn, cast),
+                1 => format!(r#"{ext_c}{current_fn}(b, a{cast})"#),
+                2 => format!(r#"{ext_c}{current_fn}(b.0, b.1, a{cast})"#),
+                3 => format!(r#"{ext_c}{current_fn}(b.0, b.1, b.2, a{cast})"#),
+                4 => format!(r#"{ext_c}{current_fn}(b.0, b.1, b.2, b.3, a{cast})"#),
                 _ => panic!("unsupported type: {}", in_t[1]),
             }
         } else if link_aarch64.is_some() && is_vldx(&name) {
-            format!(r#"{}{}(a as _)"#, ext_c, current_fn,)
+            format!(r#"{ext_c}{current_fn}(a as _)"#,)
         } else {
             let trans: [&str; 2] = if link_t[3] != out_t {
                 ["transmute(", ")"]
@@ -1388,7 +1385,7 @@ fn gen_aarch64(
                 ["", ""]
             };
             match (multi_calls.len(), para_num, fixed.len()) {
-                (0, 1, 0) => format!(r#"{}{}{}(a){}"#, ext_c, trans[0], current_fn, trans[1]),
+                (0, 1, 0) => format!(r#"{ext_c}{}{current_fn}(a){}"#, trans[0], trans[1]),
                 (0, 1, _) => {
                     let fixed: Vec<String> =
                         fixed.iter().take(type_len(in_t[0])).cloned().collect();
@@ -1402,11 +1399,11 @@ fn gen_aarch64(
                         trans[1],
                     )
                 }
-                (0, 2, _) => format!(r#"{}{}{}(a, b){}"#, ext_c, trans[0], current_fn, trans[1],),
-                (0, 3, _) => format!(r#"{}{}(a, b, c)"#, ext_c, current_fn,),
-                (_, 1, _) => format!(r#"{}{}"#, ext_c, multi_calls,),
-                (_, 2, _) => format!(r#"{}{}"#, ext_c, multi_calls,),
-                (_, 3, _) => format!(r#"{}{}"#, ext_c, multi_calls,),
+                (0, 2, _) => format!(r#"{ext_c}{}{current_fn}(a, b){}"#, trans[0], trans[1],),
+                (0, 3, _) => format!(r#"{ext_c}{current_fn}(a, b, c)"#,),
+                (_, 1, _) => format!(r#"{ext_c}{multi_calls}"#,),
+                (_, 2, _) => format!(r#"{ext_c}{multi_calls}"#,),
+                (_, 3, _) => format!(r#"{ext_c}{multi_calls}"#,),
                 (_, _, _) => String::new(),
             }
         }
@@ -1768,8 +1765,8 @@ fn gen_arm(
     separate: bool,
 ) -> (String, String) {
     let name = match suffix {
-        Normal => format!("{}{}", current_name, type_to_suffix(in_t[1])),
-        NoQ => format!("{}{}", current_name, type_to_noq_suffix(in_t[1])),
+        Normal => format!("{current_name}{}", type_to_suffix(in_t[1])),
+        NoQ => format!("{current_name}{}", type_to_noq_suffix(in_t[1])),
         Double => format!(
             "{}{}",
             current_name,
@@ -1780,15 +1777,15 @@ fn gen_arm(
             current_name,
             type_to_noq_double_suffixes(out_t, in_t[1])
         ),
-        NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])),
+        NSuffix => format!("{current_name}{}", type_to_n_suffix(in_t[1])),
         DoubleN => format!(
             "{}{}",
             current_name,
             type_to_double_n_suffixes(out_t, in_t[1])
         ),
-        NoQNSuffix => format!("{}{}", current_name, type_to_noq_n_suffix(in_t[1])),
-        OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)),
-        OutNSuffix => format!("{}{}", current_name, type_to_n_suffix(out_t)),
+        NoQNSuffix => format!("{current_name}{}", type_to_noq_n_suffix(in_t[1])),
+        OutSuffix => format!("{current_name}{}", type_to_suffix(out_t)),
+        OutNSuffix => format!("{current_name}{}", type_to_n_suffix(out_t)),
         OutNox => format!(
             "{}{}",
             current_name,
@@ -1819,7 +1816,7 @@ fn gen_arm(
             current_name,
             type_to_lane_suffixes(out_t, in_t[1], false)
         ),
-        In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
+        In2 => format!("{current_name}{}", type_to_suffix(in_t[2])),
         In2Lane => format!(
             "{}{}",
             current_name,
@@ -1873,7 +1870,7 @@ fn gen_arm(
         }
         current_fn
     } else if link_aarch64.is_some() || link_arm.is_some() {
-        format!("{}_", name)
+        format!("{name}_")
     } else {
         if multi_fn.is_empty() {
             panic!(
@@ -1980,9 +1977,9 @@ fn gen_arm(
                     };
                     let sub_type = type_to_sub_type(in_t[1]);
                     let inputs = match type_sub_len(in_t[1]) {
-                        1 => format!("a: {}", sub_type),
-                        2 => format!("a: {}, b: {}", sub_type, sub_type,),
-                        3 => format!("a: {}, b: {}, c: {}", sub_type, sub_type, sub_type,),
+                        1 => format!("a: {sub_type}"),
+                        2 => format!("a: {sub_type}, b: {sub_type}",),
+                        3 => format!("a: {sub_type}, b: {sub_type}, c: {sub_type}",),
                         4 => format!(
                             "a: {}, b: {}, c: {}, d: {}",
                             sub_type, sub_type, sub_type, sub_type,
@@ -1992,12 +1989,9 @@ fn gen_arm(
                     let out = if out_t == "void" {
                         String::new()
                     } else {
-                        format!(" -> {}", out_t)
+                        format!(" -> {out_t}")
                     };
-                    (
-                        format!("ptr: {}, {}, n: i32, size: i32", ptr_type, inputs),
-                        out,
-                    )
+                    (format!("ptr: {ptr_type}, {inputs}, n: i32, size: i32"), out)
                 } else {
                     let (_, const_type) = if const_arm.contains(":") {
                         let consts: Vec<_> =
@@ -2011,15 +2005,15 @@ fn gen_arm(
                     };
                     (
                         match para_num {
-                            1 => format!("a: {}, n: {}", in_t[0], const_type),
-                            2 => format!("a: {}, b: {}, n: {}", in_t[0], in_t[1], const_type),
+                            1 => format!("a: {}, n: {const_type}", in_t[0]),
+                            2 => format!("a: {}, b: {}, n: {const_type}", in_t[0], in_t[1]),
                             3 => format!(
-                                "a: {}, b: {}, c: {}, n: {}",
-                                in_t[0], in_t[1], in_t[2], const_type
+                                "a: {}, b: {}, c: {}, n: {const_type}",
+                                in_t[0], in_t[1], in_t[2]
                             ),
                             _ => unimplemented!("unknown para_num"),
                         },
-                        format!(" -> {}", out_t),
+                        format!(" -> {out_t}"),
                     )
                 }
             } else if out_t != link_arm_t[3] {
@@ -2038,9 +2032,9 @@ fn gen_arm(
             } else if matches!(fn_type, Fntype::Store) {
                 let sub_type = type_to_sub_type(in_t[1]);
                 let inputs = match type_sub_len(in_t[1]) {
-                    1 => format!("a: {}", sub_type),
-                    2 => format!("a: {}, b: {}", sub_type, sub_type,),
-                    3 => format!("a: {}, b: {}, c: {}", sub_type, sub_type, sub_type,),
+                    1 => format!("a: {sub_type}"),
+                    2 => format!("a: {sub_type}, b: {sub_type}",),
+                    3 => format!("a: {sub_type}, b: {sub_type}, c: {sub_type}",),
                     4 => format!(
                         "a: {}, b: {}, c: {}, d: {}",
                         sub_type, sub_type, sub_type, sub_type,
@@ -2053,14 +2047,11 @@ fn gen_arm(
                     (type_to_native_type(in_t[1]), "")
                 };
                 (
-                    format!("ptr: *mut {}, {}{}", ptr_type, inputs, size),
+                    format!("ptr: *mut {ptr_type}, {inputs}{size}"),
                     String::new(),
                 )
             } else if is_vldx(&name) {
-                (
-                    format!("ptr: *const i8, size: i32"),
-                    format!(" -> {}", out_t),
-                )
+                (format!("ptr: *const i8, size: i32"), format!(" -> {out_t}"))
             } else {
                 (String::new(), String::new())
             }
@@ -2084,20 +2075,20 @@ fn gen_arm(
                     };
                     let sub_type = type_to_sub_type(in_t[1]);
                     let mut inputs = match type_sub_len(in_t[1]) {
-                        1 => format!("a: {}", sub_type,),
-                        2 => format!("a: {}, b: {}", sub_type, sub_type,),
-                        3 => format!("a: {}, b: {}, c: {}", sub_type, sub_type, sub_type,),
+                        1 => format!("a: {sub_type}",),
+                        2 => format!("a: {sub_type}, b: {sub_type}",),
+                        3 => format!("a: {sub_type}, b: {sub_type}, c: {sub_type}",),
                         4 => format!(
                             "a: {}, b: {}, c: {}, d: {}",
                             sub_type, sub_type, sub_type, sub_type,
                         ),
                         _ => panic!("unknown type: {}", in_t[1]),
                     };
-                    inputs.push_str(&format!(", n: i64, ptr: {}", ptr_type));
+                    inputs.push_str(&format!(", n: i64, ptr: {ptr_type}"));
                     let out = if out_t == "void" {
                         String::new()
                     } else {
-                        format!(" -> {}", out_t)
+                        format!(" -> {out_t}")
                     };
                     (inputs, out)
                 } else if const_aarch64.contains("dup-in_len-N as ttn") {
@@ -2111,7 +2102,7 @@ fn gen_arm(
                             ),
                             _ => unimplemented!("unknown para_num"),
                         },
-                        format!(" -> {}", out_t),
+                        format!(" -> {out_t}"),
                     )
                 } else {
                     (
@@ -2121,7 +2112,7 @@ fn gen_arm(
                             3 => format!("a: {}, b: {}, c: {}, n: i32", in_t[0], in_t[1], in_t[2]),
                             _ => unimplemented!("unknown para_num"),
                         },
-                        format!(" -> {}", out_t),
+                        format!(" -> {out_t}"),
                     )
                 }
             } else if out_t != link_aarch64_t[3] {
@@ -2140,9 +2131,9 @@ fn gen_arm(
             } else if matches!(fn_type, Fntype::Store) {
                 let sub_type = type_to_sub_type(in_t[1]);
                 let mut inputs = match type_sub_len(in_t[1]) {
-                    1 => format!("a: {}", sub_type,),
-                    2 => format!("a: {}, b: {}", sub_type, sub_type,),
-                    3 => format!("a: {}, b: {}, c: {}", sub_type, sub_type, sub_type,),
+                    1 => format!("a: {sub_type}",),
+                    2 => format!("a: {sub_type}, b: {sub_type}",),
+                    3 => format!("a: {sub_type}, b: {sub_type}, c: {sub_type}",),
                     4 => format!(
                         "a: {}, b: {}, c: {}, d: {}",
                         sub_type, sub_type, sub_type, sub_type,
@@ -2154,7 +2145,7 @@ fn gen_arm(
                 } else {
                     type_to_native_type(in_t[1])
                 };
-                inputs.push_str(&format!(", ptr: *mut {}", ptr_type));
+                inputs.push_str(&format!(", ptr: *mut {ptr_type}"));
                 (inputs, String::new())
             } else if is_vldx(&name) {
                 let ptr_type = if name.contains("dup") {
@@ -2162,10 +2153,7 @@ fn gen_arm(
                 } else {
                     type_to_sub_type(out_t)
                 };
-                (
-                    format!("ptr: *const {}", ptr_type),
-                    format!(" -> {}", out_t),
-                )
+                (format!("ptr: *const {ptr_type}"), format!(" -> {out_t}"))
             } else {
                 (String::new(), String::new())
             }
@@ -2181,7 +2169,7 @@ fn gen_arm(
         ));
     };
     let const_declare = if let Some(constn) = constn {
-        format!(r#"<const {}: i32>"#, constn)
+        format!(r#"<const {constn}: i32>"#)
     } else {
         String::new()
     };
@@ -2216,7 +2204,7 @@ fn gen_arm(
         String::new()
     };
     let const_legacy = if constn.is_some() {
-        format!("\n#[rustc_legacy_const_generics({})]", para_num)
+        format!("\n#[rustc_legacy_const_generics({para_num})]")
     } else {
         String::new()
     };
@@ -2224,7 +2212,7 @@ fn gen_arm(
         let fn_output = if out_t == "void" {
             String::new()
         } else {
-            format!("-> {} ", out_t)
+            format!("-> {out_t} ")
         };
         let fn_inputs = match para_num {
             1 => format!("(a: {})", in_t[0]),
@@ -2274,15 +2262,15 @@ fn gen_arm(
                         cnt
                     };
                     match para_num {
-                        1 => format!("{}(a, {})", current_fn, cnt),
-                        2 => format!("{}(a, b, {})", current_fn, cnt),
+                        1 => format!("{current_fn}(a, {cnt})"),
+                        2 => format!("{current_fn}(a, b, {cnt})"),
                         _ => String::new(),
                     }
                 }
             } else if out_t != link_arm_t[3] {
                 match para_num {
-                    1 => format!("transmute({}(a))", current_fn,),
-                    2 => format!("transmute({}(transmute(a), transmute(b)))", current_fn,),
+                    1 => format!("transmute({current_fn}(a))",),
+                    2 => format!("transmute({current_fn}(transmute(a), transmute(b)))",),
                     _ => String::new(),
                 }
             } else if matches!(fn_type, Fntype::Store) {
@@ -2295,10 +2283,10 @@ fn gen_arm(
                     ("", String::new())
                 };
                 match type_sub_len(in_t[1]) {
-                    1 => format!("{}(a{}, b{})", current_fn, cast, size),
-                    2 => format!("{}(a{}, b.0, b.1{})", current_fn, cast, size),
-                    3 => format!("{}(a{}, b.0, b.1, b.2{})", current_fn, cast, size),
-                    4 => format!("{}(a{}, b.0, b.1, b.2, b.3{})", current_fn, cast, size),
+                    1 => format!("{current_fn}(a{cast}, b{size})"),
+                    2 => format!("{current_fn}(a{cast}, b.0, b.1{size})"),
+                    3 => format!("{current_fn}(a{cast}, b.0, b.1, b.2{size})"),
+                    4 => format!("{current_fn}(a{cast}, b.0, b.1, b.2, b.3{size})"),
                     _ => String::new(),
                 }
             } else if link_arm.is_some() && is_vldx(&name) {
@@ -2345,31 +2333,31 @@ fn gen_arm(
                             cnt.push_str(&const_aarch64);
                         }
                         cnt.push_str(")");
-                        format!("{}(a, {})", current_fn, cnt)
+                        format!("{current_fn}(a, {cnt})")
                     } else {
                         match para_num {
-                            1 => format!("{}(a, {})", current_fn, const_aarch64),
-                            2 => format!("{}(a, b, {})", current_fn, const_aarch64),
+                            1 => format!("{current_fn}(a, {const_aarch64})"),
+                            2 => format!("{current_fn}(a, b, {const_aarch64})"),
                             _ => String::new(),
                         }
                     }
                 } else if out_t != link_aarch64_t[3] {
                     match para_num {
-                        1 => format!("transmute({}(a))", current_fn,),
-                        2 => format!("transmute({}(a, b))", current_fn,),
+                        1 => format!("transmute({current_fn}(a))",),
+                        2 => format!("transmute({current_fn}(a, b))",),
                         _ => String::new(),
                     }
                 } else if matches!(fn_type, Fntype::Store) {
                     let cast = if is_vstx(&name) { " as _" } else { "" };
                     match type_sub_len(in_t[1]) {
-                        1 => format!("{}(b, a{})", current_fn, cast),
-                        2 => format!("{}(b.0, b.1, a{})", current_fn, cast),
-                        3 => format!("{}(b.0, b.1, b.2, a{})", current_fn, cast),
-                        4 => format!("{}(b.0, b.1, b.2, b.3, a{})", current_fn, cast),
+                        1 => format!("{current_fn}(b, a{cast})"),
+                        2 => format!("{current_fn}(b.0, b.1, a{cast})"),
+                        3 => format!("{current_fn}(b.0, b.1, b.2, a{cast})"),
+                        4 => format!("{current_fn}(b.0, b.1, b.2, b.3, a{cast})"),
                         _ => String::new(),
                     }
                 } else if link_aarch64.is_some() && is_vldx(&name) {
-                    format!("{}(a as _)", current_fn)
+                    format!("{current_fn}(a as _)")
                 } else {
                     String::new()
                 };
@@ -2421,7 +2409,7 @@ fn gen_arm(
     } else {
         let call = {
             let stmts = match (multi_calls.len(), para_num, fixed.len()) {
-                (0, 1, 0) => format!(r#"{}{}(a)"#, ext_c, current_fn,),
+                (0, 1, 0) => format!(r#"{ext_c}{current_fn}(a)"#,),
                 (0, 1, _) => {
                     let fixed: Vec<String> =
                         fixed.iter().take(type_len(in_t[0])).cloned().collect();
@@ -2433,11 +2421,11 @@ fn gen_arm(
                         current_fn,
                     )
                 }
-                (0, 2, _) => format!(r#"{}{}(a, b)"#, ext_c, current_fn,),
-                (0, 3, _) => format!(r#"{}{}(a, b, c)"#, ext_c, current_fn,),
-                (_, 1, _) => format!(r#"{}{}"#, ext_c, multi_calls,),
-                (_, 2, _) => format!(r#"{}{}"#, ext_c, multi_calls,),
-                (_, 3, _) => format!(r#"{}{}"#, ext_c, multi_calls,),
+                (0, 2, _) => format!(r#"{ext_c}{current_fn}(a, b)"#,),
+                (0, 3, _) => format!(r#"{ext_c}{current_fn}(a, b, c)"#,),
+                (_, 1, _) => format!(r#"{ext_c}{multi_calls}"#,),
+                (_, 2, _) => format!(r#"{ext_c}{multi_calls}"#,),
+                (_, 3, _) => format!(r#"{ext_c}{multi_calls}"#,),
                 (_, _, _) => String::new(),
             };
             if stmts != String::new() {
@@ -2536,9 +2524,9 @@ fn expand_intrinsic(intr: &str, t: &str) -> String {
             "poly64x1_t" => "i64x1",
             "poly64x2_t" => "i64x2",
             */
-            _ => panic!("unknown type for extension: {}", t),
+            _ => panic!("unknown type for extension: {t}"),
         };
-        format!(r#""{}{}""#, intr, ext)
+        format!(r#""{intr}{ext}""#)
     } else if intr.ends_with(".s") {
         let ext = match t {
             "int8x8_t" => "s8",
@@ -2571,9 +2559,9 @@ fn expand_intrinsic(intr: &str, t: &str) -> String {
             "poly64x1_t" => "i64x1",
             "poly64x2_t" => "i64x2",
             */
-            _ => panic!("unknown type for extension: {}", t),
+            _ => panic!("unknown type for extension: {t}"),
         };
-        format!(r#""{}{}""#, &intr[..intr.len() - 1], ext)
+        format!(r#""{}{ext}""#, &intr[..intr.len() - 1])
     } else if intr.ends_with(".l") {
         let ext = match t {
             "int8x8_t" => "8",
@@ -2604,9 +2592,9 @@ fn expand_intrinsic(intr: &str, t: &str) -> String {
             "float64x2_t" => "64",
             "poly64x1_t" => "64",
             "poly64x2_t" => "64",
-            _ => panic!("unknown type for extension: {}", t),
+            _ => panic!("unknown type for extension: {t}"),
         };
-        format!(r#""{}{}""#, &intr[..intr.len() - 1], ext)
+        format!(r#""{}{ext}""#, &intr[..intr.len() - 1])
     } else {
         intr.to_string()
     }
@@ -2655,7 +2643,7 @@ fn get_call(
             "halflen" => type_len(in_t[1]) / 2,
             _ => 0,
         };
-        let mut s = format!("{} [", const_declare);
+        let mut s = format!("{const_declare} [");
         for i in 0..len {
             if i != 0 {
                 s.push_str(", ");
@@ -2693,9 +2681,9 @@ fn get_call(
                 if i != 0 || j != 0 {
                     s.push_str(", ");
                 }
-                s.push_str(&format!("{} * {} as u32", base_len, &fn_format[2]));
+                s.push_str(&format!("{base_len} * {} as u32", &fn_format[2]));
                 if j != 0 {
-                    s.push_str(&format!(" + {}", j));
+                    s.push_str(&format!(" + {j}"));
                 }
             }
         }
@@ -2709,7 +2697,7 @@ fn get_call(
             "in_ttn" => type_to_native_type(in_t[1]),
             _ => String::new(),
         };
-        return format!("{} as {}", &fn_format[1], t);
+        return format!("{} as {t}", &fn_format[1]);
     }
     if fn_name.starts_with("ins") {
         let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect();
@@ -2726,7 +2714,7 @@ fn get_call(
             "in0_len" => type_len(in_t[0]),
             _ => 0,
         };
-        let mut s = format!("{} [", const_declare);
+        let mut s = format!("{const_declare} [");
         for i in 0..len {
             if i != 0 {
                 s.push_str(", ");
@@ -2760,7 +2748,7 @@ fn get_call(
                 fn_format[2], fn_format[2]
             );
         } else {
-            return format!(r#"static_assert_imm{}!({});"#, len, fn_format[2]);
+            return format!(r#"static_assert_imm{len}!({});"#, fn_format[2]);
         }
     }
     if fn_name.starts_with("static_assert") {
@@ -2781,13 +2769,13 @@ fn get_call(
         };
         if lim1 == lim2 {
             return format!(
-                r#"static_assert!({} : i32 where {} == {});"#,
-                fn_format[1], fn_format[1], lim1
+                r#"static_assert!({} : i32 where {} == {lim1});"#,
+                fn_format[1], fn_format[1]
             );
         } else {
             return format!(
-                r#"static_assert!({} : i32 where {} >= {} && {} <= {});"#,
-                fn_format[1], fn_format[1], lim1, fn_format[1], lim2
+                r#"static_assert!({} : i32 where {} >= {lim1} && {} <= {lim2});"#,
+                fn_format[1], fn_format[1], fn_format[1]
             );
         }
     }
@@ -2945,7 +2933,7 @@ fn get_call(
     if fn_name == "fixed" {
         let (re_name, re_type) = re.unwrap();
         let fixed: Vec<String> = fixed.iter().take(type_len(in_t[1])).cloned().collect();
-        return format!(r#"let {}{};"#, re_name, values(&re_type, &fixed));
+        return format!(r#"let {re_name}{};"#, values(&re_type, &fixed));
     }
     if fn_name == "fixed-half-right" {
         let fixed: Vec<String> = fixed.iter().take(type_len(in_t[1])).cloned().collect();
@@ -3083,9 +3071,9 @@ fn get_call(
             re_name, re_type, fn_name, param_str
         )
     } else if fn_name.starts_with("*") {
-        format!(r#"{} = {};"#, fn_name, param_str)
+        format!(r#"{fn_name} = {param_str};"#)
     } else {
-        format!(r#"{}({})"#, fn_name, param_str)
+        format!(r#"{fn_name}({param_str})"#)
     };
     return fn_str;
 }
@@ -3337,7 +3325,7 @@ mod test {
                     in_t = [spec[0], spec[1], spec[2]];
                     out_t = spec[3];
                 } else {
-                    panic!("Bad spec: {}", line)
+                    panic!("Bad spec: {line}")
                 }
                 if b.len() == 0 {
                     if matches!(fn_type, Fntype::Store) {
@@ -3430,8 +3418,8 @@ mod test {
         .arg(&arm_out_path)
         .arg(&aarch64_out_path)
         .status() {
-            eprintln!("Could not format `{}`: {}", arm_out_path.to_str().unwrap(), e);
-            eprintln!("Could not format `{}`: {}", aarch64_out_path.to_str().unwrap(), e);
+            eprintln!("Could not format `{}`: {e}", arm_out_path.to_str().unwrap());
+            eprintln!("Could not format `{}`: {e}", aarch64_out_path.to_str().unwrap());
     };
     */
     Ok(())
diff --git a/library/stdarch/crates/stdarch-test/Cargo.toml b/library/stdarch/crates/stdarch-test/Cargo.toml
index 012b4e95926..23bddeda653 100644
--- a/library/stdarch/crates/stdarch-test/Cargo.toml
+++ b/library/stdarch/crates/stdarch-test/Cargo.toml
@@ -2,7 +2,7 @@
 name = "stdarch-test"
 version = "0.1.0"
 authors = ["Alex Crichton <alex@alexcrichton.com>"]
-edition = "2018"
+edition = "2021"
 
 [dependencies]
 assert-instr-macro = { path = "../assert-instr-macro" }
diff --git a/library/stdarch/crates/stdarch-test/src/disassembly.rs b/library/stdarch/crates/stdarch-test/src/disassembly.rs
index 3ace6b20e10..8e4d57d4e6a 100644
--- a/library/stdarch/crates/stdarch-test/src/disassembly.rs
+++ b/library/stdarch/crates/stdarch-test/src/disassembly.rs
@@ -81,7 +81,7 @@ pub(crate) fn disassemble_myself() -> HashSet<Function> {
             .args(add_args)
             .arg(&me)
             .output()
-            .unwrap_or_else(|_| panic!("failed to execute objdump. OBJDUMP={}", objdump));
+            .unwrap_or_else(|_| panic!("failed to execute objdump. OBJDUMP={objdump}"));
         println!(
             "{}\n{}",
             output.status,
@@ -103,7 +103,7 @@ fn parse(output: &str) -> HashSet<Function> {
         lines.clone().count()
     );
     for line in output.lines().take(100) {
-        println!("{}", line);
+        println!("{line}");
     }
 
     let mut functions = HashSet::new();
@@ -112,9 +112,9 @@ fn parse(output: &str) -> HashSet<Function> {
         if !header.ends_with(':') || !header.contains("stdarch_test_shim") {
             continue;
         }
-        eprintln!("header: {}", header);
+        eprintln!("header: {header}");
         let symbol = normalize(header);
-        eprintln!("normalized symbol: {}", symbol);
+        eprintln!("normalized symbol: {symbol}");
         let mut instructions = Vec::new();
         while let Some(instruction) = lines.next() {
             if instruction.ends_with(':') {
diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs
index eba17771ca2..e0cf46cb4db 100644
--- a/library/stdarch/crates/stdarch-test/src/lib.rs
+++ b/library/stdarch/crates/stdarch-test/src/lib.rs
@@ -64,10 +64,10 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
     // Make sure that the shim is not removed
     black_box(shim_addr);
 
-    //eprintln!("shim name: {}", fnname);
+    //eprintln!("shim name: {fnname}");
     let function = &DISASSEMBLY
         .get(&Function::new(fnname))
-        .unwrap_or_else(|| panic!("function \"{}\" not found in the disassembly", fnname));
+        .unwrap_or_else(|| panic!("function \"{fnname}\" not found in the disassembly"));
     //eprintln!("  function: {:?}", function);
 
     let mut instrs = &function.instrs[..];
@@ -165,9 +165,9 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
 
     // Help debug by printing out the found disassembly, and then panic as we
     // didn't find the instruction.
-    println!("disassembly for {}: ", fnname,);
+    println!("disassembly for {fnname}: ",);
     for (i, instr) in instrs.iter().enumerate() {
-        println!("\t{:2}: {}", i, instr);
+        println!("\t{i:2}: {instr}");
     }
 
     if !found {
@@ -194,7 +194,7 @@ pub fn assert_skip_test_ok(name: &str) {
     if env::var("STDARCH_TEST_EVERYTHING").is_err() {
         return;
     }
-    panic!("skipped test `{}` when it shouldn't be skipped", name);
+    panic!("skipped test `{name}` when it shouldn't be skipped");
 }
 
 // See comment in `assert-instr-macro` crate for why this exists
diff --git a/library/stdarch/crates/stdarch-verify/Cargo.toml b/library/stdarch/crates/stdarch-verify/Cargo.toml
index 6362e3d57f7..56548fd08ac 100644
--- a/library/stdarch/crates/stdarch-verify/Cargo.toml
+++ b/library/stdarch/crates/stdarch-verify/Cargo.toml
@@ -2,7 +2,7 @@
 name = "stdarch-verify"
 version = "0.1.0"
 authors = ["Alex Crichton <alex@alexcrichton.com>"]
-edition = "2018"
+edition = "2021"
 
 [dependencies]
 proc-macro2 = "1.0"
diff --git a/library/stdarch/crates/stdarch-verify/src/lib.rs b/library/stdarch/crates/stdarch-verify/src/lib.rs
index 22108d26a15..9b66137ba0c 100644
--- a/library/stdarch/crates/stdarch-verify/src/lib.rs
+++ b/library/stdarch/crates/stdarch-verify/src/lib.rs
@@ -85,20 +85,20 @@ fn functions(input: TokenStream, dirs: &[&str]) -> TokenStream {
         .iter()
         .map(|&(ref f, path)| {
             let name = &f.sig.ident;
-            // println!("{}", name);
+            // println!("{name}");
             let mut arguments = Vec::new();
             let mut const_arguments = Vec::new();
             for input in f.sig.inputs.iter() {
                 let ty = match *input {
                     syn::FnArg::Typed(ref c) => &c.ty,
-                    _ => panic!("invalid argument on {}", name),
+                    _ => panic!("invalid argument on {name}"),
                 };
                 arguments.push(to_type(ty));
             }
             for generic in f.sig.generics.params.iter() {
                 let ty = match *generic {
                     syn::GenericParam::Const(ref c) => &c.ty,
-                    _ => panic!("invalid generic argument on {}", name),
+                    _ => panic!("invalid generic argument on {name}"),
                 };
                 const_arguments.push(to_type(ty));
             }
@@ -144,12 +144,12 @@ fn functions(input: TokenStream, dirs: &[&str]) -> TokenStream {
 
             // strip leading underscore from fn name when building a test
             // _mm_foo -> mm_foo such that the test name is test_mm_foo.
-            let test_name_string = format!("{}", name);
+            let test_name_string = format!("{name}");
             let mut test_name_id = test_name_string.as_str();
             while test_name_id.starts_with('_') {
                 test_name_id = &test_name_id[1..];
             }
-            let has_test = tests.contains(&format!("test_{}", test_name_id));
+            let has_test = tests.contains(&format!("test_{test_name_id}"));
 
             quote! {
                 Function {
@@ -167,7 +167,7 @@ fn functions(input: TokenStream, dirs: &[&str]) -> TokenStream {
         .collect::<Vec<_>>();
 
     let ret = quote! { #input: &[Function] = &[#(#functions),*]; };
-    // println!("{}", ret);
+    // println!("{ret}");
     ret.into()
 }
 
@@ -336,7 +336,7 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
             "v4f32" => quote! { &v4f32 },
             "v2f64" => quote! { &v2f64 },
 
-            s => panic!("unsupported type: \"{}\"", s),
+            s => panic!("unsupported type: \"{s}\""),
         },
         syn::Type::Ptr(syn::TypePtr {
             ref elem,
diff --git a/library/stdarch/crates/stdarch-verify/tests/arm.rs b/library/stdarch/crates/stdarch-verify/tests/arm.rs
index 6ce5ce05fd2..dd6720ef092 100644
--- a/library/stdarch/crates/stdarch-verify/tests/arm.rs
+++ b/library/stdarch/crates/stdarch-verify/tests/arm.rs
@@ -628,7 +628,7 @@ fn verify_all_signatures() {
 
         if let Err(e) = matches(rust, arm) {
             println!("failed to verify `{}`", rust.name);
-            println!("  * {}", e);
+            println!("  * {e}");
             all_valid = false;
         }
     }
@@ -801,7 +801,7 @@ fn parse_intrinsic(node: &Rc<Node>) -> Intrinsic {
 
     let instruction = match instruction {
         Some(s) => s.trim().to_lowercase(),
-        None => panic!("can't find instruction for `{}`", name),
+        None => panic!("can't find instruction for `{name}`"),
     };
 
     Intrinsic {
@@ -973,7 +973,7 @@ fn parse_ty_base(s: &str) -> &'static Type {
         "uint8x8x3_t" => &U8X8X3,
         "uint8x8x4_t" => &U8X8X4,
 
-        _ => panic!("failed to parse html type {:?}", s),
+        _ => panic!("failed to parse html type {s:?}"),
     }
 }
 
diff --git a/library/stdarch/crates/stdarch-verify/tests/mips.rs b/library/stdarch/crates/stdarch-verify/tests/mips.rs
index 1eb86dc29c6..365057b1daf 100644
--- a/library/stdarch/crates/stdarch-verify/tests/mips.rs
+++ b/library/stdarch/crates/stdarch-verify/tests/mips.rs
@@ -125,7 +125,7 @@ impl<'a> From<&'a str> for MsaTy {
             "u64" => MsaTy::u64,
             "void" => MsaTy::Void,
             "void *" => MsaTy::MutVoidPtr,
-            v => panic!("unknown ty: \"{}\"", v),
+            v => panic!("unknown ty: \"{v}\""),
         }
     }
 }
@@ -198,8 +198,8 @@ fn verify_all_signatures() {
         }
 
         use std::convert::TryFrom;
-        let intrinsic: MsaIntrinsic = TryFrom::try_from(line)
-            .unwrap_or_else(|_| panic!("failed to parse line: \"{}\"", line));
+        let intrinsic: MsaIntrinsic =
+            TryFrom::try_from(line).unwrap_or_else(|_| panic!("failed to parse line: \"{line}\""));
         assert!(!intrinsics.contains_key(&intrinsic.id));
         intrinsics.insert(intrinsic.id.clone(), intrinsic);
     }
@@ -253,7 +253,7 @@ fn verify_all_signatures() {
 
         if let Err(e) = matches(rust, mips) {
             println!("failed to verify `{}`", rust.name);
-            println!("  * {}", e);
+            println!("  * {e}");
             all_valid = false;
         }
     }
diff --git a/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
index 89494bfd2a5..132b45e5f87 100644
--- a/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
+++ b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
@@ -367,7 +367,7 @@ fn verify_all_signatures() {
         }
         println!("failed to verify `{}`", rust.name);
         for error in errors {
-            println!("  * {}", error);
+            println!("  * {error}");
         }
         all_valid = false;
     }
@@ -403,18 +403,18 @@ fn verify_all_signatures() {
     if PRINT_MISSING_LISTS || PRINT_MISSING_LISTS_MARKDOWN {
         for (k, v) in missing {
             if PRINT_MISSING_LISTS_MARKDOWN {
-                println!("\n<details><summary>{:?}</summary><p>\n", k);
+                println!("\n<details><summary>{k:?}</summary><p>\n");
                 for intel in v {
                     let url = format!(
                         "https://software.intel.com/sites/landingpage\
                          /IntrinsicsGuide/#text={}&expand=5236",
                         intel.name
                     );
-                    println!("  * [ ] [`{}`]({})", intel.name, url);
+                    println!("  * [ ] [`{}`]({url})", intel.name);
                 }
                 println!("</p></details>\n");
             } else {
-                println!("\n{:?}\n", k);
+                println!("\n{k:?}\n");
                 for intel in v {
                     println!("\t{}", intel.name);
                 }
diff --git a/library/stdarch/examples/Cargo.toml b/library/stdarch/examples/Cargo.toml
index e2590ed9ff2..38f497fa61a 100644
--- a/library/stdarch/examples/Cargo.toml
+++ b/library/stdarch/examples/Cargo.toml
@@ -7,7 +7,7 @@ authors = [
     "Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>",
 ]
 description = "Examples of the stdarch crate."
-edition = "2018"
+edition = "2021"
 default-run = "hex"
 
 [dependencies]
diff --git a/library/stdarch/examples/connect5.rs b/library/stdarch/examples/connect5.rs
index 1b332578521..09e7e48a7c4 100644
--- a/library/stdarch/examples/connect5.rs
+++ b/library/stdarch/examples/connect5.rs
@@ -1256,7 +1256,7 @@ fn main() {
             pos_disp(&test1);
 
             if pos_is_end(&test1) {
-                println!("Game over!!!!!! at Move {}", i);
+                println!("Game over!!!!!! at Move {i}");
                 count = i + 1;
                 break;
             }
diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs
index d982a71b911..a961793a045 100644
--- a/library/stdarch/examples/hex.rs
+++ b/library/stdarch/examples/hex.rs
@@ -40,7 +40,7 @@ fn main() {
     io::stdin().read_to_end(&mut input).unwrap();
     let mut dst = vec![0; 2 * input.len()];
     let s = hex_encode(&input, &mut dst).unwrap();
-    println!("{}", s);
+    println!("{s}");
 }
 
 fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
-- 
cgit 1.4.1-3-g733a5


From 9f7ded4282ac7362fe5da07d1ee1b8b9646659ab Mon Sep 17 00:00:00 2001
From: Mateusz Mikuła <mati865@users.noreply.github.com>
Date: Wed, 9 Nov 2022 01:37:10 +0100
Subject: Fix line endings to use LF (#1350)

---
 .../crates/std_detect/src/detect/os/linux/riscv.rs | 146 ++++++++++-----------
 1 file changed, 73 insertions(+), 73 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index 1ec06959a3c..91a85d58e7c 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -1,73 +1,73 @@
-//! Run-time feature detection for RISC-V on Linux.
-
-use super::auxvec;
-use crate::detect::{bit, cache, Feature};
-
-/// Read list of supported features from the auxiliary vector.
-pub(crate) fn detect_features() -> cache::Initializer {
-    let mut value = cache::Initializer::default();
-    let enable_feature = |value: &mut cache::Initializer, feature, enable| {
-        if enable {
-            value.set(feature as u32);
-        }
-    };
-    let enable_features = |value: &mut cache::Initializer, feature_slice: &[Feature], enable| {
-        if enable {
-            for feature in feature_slice {
-                value.set(*feature as u32);
-            }
-        }
-    };
-
-    // The values are part of the platform-specific [asm/hwcap.h][hwcap]
-    //
-    // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/riscv/include/asm/hwcap.h
-    let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform
-    enable_feature(
-        &mut value,
-        Feature::a,
-        bit::test(auxv.hwcap, (b'a' - b'a').into()),
-    );
-    enable_feature(
-        &mut value,
-        Feature::c,
-        bit::test(auxv.hwcap, (b'c' - b'a').into()),
-    );
-    enable_features(
-        &mut value,
-        &[Feature::d, Feature::f, Feature::zicsr],
-        bit::test(auxv.hwcap, (b'd' - b'a').into()),
-    );
-    enable_features(
-        &mut value,
-        &[Feature::f, Feature::zicsr],
-        bit::test(auxv.hwcap, (b'f' - b'a').into()),
-    );
-    let has_i = bit::test(auxv.hwcap, (b'i' - b'a').into());
-    // If future RV128I is supported, implement with `enable_feature` here
-    #[cfg(target_pointer_width = "64")]
-    enable_feature(&mut value, Feature::rv64i, has_i);
-    #[cfg(target_pointer_width = "32")]
-    enable_feature(&mut value, Feature::rv32i, has_i);
-    #[cfg(target_pointer_width = "32")]
-    enable_feature(
-        &mut value,
-        Feature::rv32e,
-        bit::test(auxv.hwcap, (b'e' - b'a').into()),
-    );
-    enable_feature(
-        &mut value,
-        Feature::h,
-        bit::test(auxv.hwcap, (b'h' - b'a').into()),
-    );
-    enable_feature(
-        &mut value,
-        Feature::m,
-        bit::test(auxv.hwcap, (b'm' - b'a').into()),
-    );
-    // FIXME: Auxvec does not show supervisor feature support, but this mode may be useful
-    // to detect when Rust is used to write Linux kernel modules.
-    // These should be more than Auxvec way to detect supervisor features.
-
-    value
-}
+//! Run-time feature detection for RISC-V on Linux.
+
+use super::auxvec;
+use crate::detect::{bit, cache, Feature};
+
+/// Read list of supported features from the auxiliary vector.
+pub(crate) fn detect_features() -> cache::Initializer {
+    let mut value = cache::Initializer::default();
+    let enable_feature = |value: &mut cache::Initializer, feature, enable| {
+        if enable {
+            value.set(feature as u32);
+        }
+    };
+    let enable_features = |value: &mut cache::Initializer, feature_slice: &[Feature], enable| {
+        if enable {
+            for feature in feature_slice {
+                value.set(*feature as u32);
+            }
+        }
+    };
+
+    // The values are part of the platform-specific [asm/hwcap.h][hwcap]
+    //
+    // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/riscv/include/asm/hwcap.h
+    let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform
+    enable_feature(
+        &mut value,
+        Feature::a,
+        bit::test(auxv.hwcap, (b'a' - b'a').into()),
+    );
+    enable_feature(
+        &mut value,
+        Feature::c,
+        bit::test(auxv.hwcap, (b'c' - b'a').into()),
+    );
+    enable_features(
+        &mut value,
+        &[Feature::d, Feature::f, Feature::zicsr],
+        bit::test(auxv.hwcap, (b'd' - b'a').into()),
+    );
+    enable_features(
+        &mut value,
+        &[Feature::f, Feature::zicsr],
+        bit::test(auxv.hwcap, (b'f' - b'a').into()),
+    );
+    let has_i = bit::test(auxv.hwcap, (b'i' - b'a').into());
+    // If future RV128I is supported, implement with `enable_feature` here
+    #[cfg(target_pointer_width = "64")]
+    enable_feature(&mut value, Feature::rv64i, has_i);
+    #[cfg(target_pointer_width = "32")]
+    enable_feature(&mut value, Feature::rv32i, has_i);
+    #[cfg(target_pointer_width = "32")]
+    enable_feature(
+        &mut value,
+        Feature::rv32e,
+        bit::test(auxv.hwcap, (b'e' - b'a').into()),
+    );
+    enable_feature(
+        &mut value,
+        Feature::h,
+        bit::test(auxv.hwcap, (b'h' - b'a').into()),
+    );
+    enable_feature(
+        &mut value,
+        Feature::m,
+        bit::test(auxv.hwcap, (b'm' - b'a').into()),
+    );
+    // FIXME: Auxvec does not show supervisor feature support, but this mode may be useful
+    // to detect when Rust is used to write Linux kernel modules.
+    // These should be more than Auxvec way to detect supervisor features.
+
+    value
+}
-- 
cgit 1.4.1-3-g733a5


From 45e3039ee9618e8af0708467e34e8e92b9bee7ac Mon Sep 17 00:00:00 2001
From: gendx <gendx@users.noreply.github.com>
Date: Wed, 9 Nov 2022 18:38:48 +0000
Subject: Detect CPU features with Linux methods on Android for non-Intel CPUs.
 (#1351)

Co-authored-by: Amanieu d'Antras <amanieu@gmail.com>
---
 library/stdarch/crates/std_detect/src/detect/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 2bca84ca127..9a135c90a39 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -47,7 +47,7 @@ cfg_if! {
         // On x86/x86_64 no OS specific functionality is required.
         #[path = "os/x86.rs"]
         mod os;
-    } else if #[cfg(all(target_os = "linux", feature = "libc"))] {
+    } else if #[cfg(all(any(target_os = "linux", target_os = "android"), feature = "libc"))] {
         #[path = "os/linux/mod.rs"]
         mod os;
     } else if #[cfg(all(target_os = "freebsd", feature = "libc"))] {
-- 
cgit 1.4.1-3-g733a5


From 0b2b195544f2e171f9332cd5954bf52e784861b1 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Mon, 21 Nov 2022 15:56:45 -0500
Subject: Rename misleading features (#1355)

---
 .../stdarch/crates/core_arch/src/x86/avx512gfni.rs | 1492 --------------------
 .../stdarch/crates/core_arch/src/x86/avx512vaes.rs |  332 -----
 .../crates/core_arch/src/x86/avx512vpclmulqdq.rs   |  258 ----
 library/stdarch/crates/core_arch/src/x86/gfni.rs   | 1492 ++++++++++++++++++++
 library/stdarch/crates/core_arch/src/x86/mod.rs    |   12 +-
 library/stdarch/crates/core_arch/src/x86/vaes.rs   |  332 +++++
 .../stdarch/crates/core_arch/src/x86/vpclmulqdq.rs |  258 ++++
 .../crates/core_arch/tests/cpu-detection.rs        |   21 +-
 .../crates/std_detect/src/detect/arch/x86.rs       |   15 +-
 .../stdarch/crates/std_detect/src/detect/macros.rs |   12 +-
 .../stdarch/crates/std_detect/src/detect/os/x86.rs |    6 +-
 .../crates/std_detect/tests/cpu-detection.rs       |    9 +-
 .../crates/std_detect/tests/x86-specific.rs        |    9 +-
 13 files changed, 2131 insertions(+), 2117 deletions(-)
 delete mode 100644 library/stdarch/crates/core_arch/src/x86/avx512gfni.rs
 delete mode 100644 library/stdarch/crates/core_arch/src/x86/avx512vaes.rs
 delete mode 100644 library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs
 create mode 100644 library/stdarch/crates/core_arch/src/x86/gfni.rs
 create mode 100644 library/stdarch/crates/core_arch/src/x86/vaes.rs
 create mode 100644 library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs b/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs
deleted file mode 100644
index e3d5c3b183b..00000000000
--- a/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs
+++ /dev/null
@@ -1,1492 +0,0 @@
-//! Galois Field New Instructions (GFNI)
-//!
-//! The intrinsics here correspond to those in the `immintrin.h` C header.
-//!
-//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
-//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
-//!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-
-use crate::core_arch::simd::i8x16;
-use crate::core_arch::simd::i8x32;
-use crate::core_arch::simd::i8x64;
-use crate::core_arch::simd_llvm::simd_select_bitmask;
-use crate::core_arch::x86::__m128i;
-use crate::core_arch::x86::__m256i;
-use crate::core_arch::x86::__m512i;
-use crate::core_arch::x86::__mmask16;
-use crate::core_arch::x86::__mmask32;
-use crate::core_arch::x86::__mmask64;
-use crate::core_arch::x86::_mm256_setzero_si256;
-use crate::core_arch::x86::_mm512_setzero_si512;
-use crate::core_arch::x86::_mm_setzero_si128;
-use crate::core_arch::x86::m128iExt;
-use crate::core_arch::x86::m256iExt;
-use crate::core_arch::x86::m512iExt;
-use crate::mem::transmute;
-
-#[cfg(test)]
-use stdarch_test::assert_instr;
-
-#[allow(improper_ctypes)]
-extern "C" {
-    #[link_name = "llvm.x86.vgf2p8affineinvqb.512"]
-    fn vgf2p8affineinvqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64;
-    #[link_name = "llvm.x86.vgf2p8affineinvqb.256"]
-    fn vgf2p8affineinvqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32;
-    #[link_name = "llvm.x86.vgf2p8affineinvqb.128"]
-    fn vgf2p8affineinvqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16;
-    #[link_name = "llvm.x86.vgf2p8affineqb.512"]
-    fn vgf2p8affineqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64;
-    #[link_name = "llvm.x86.vgf2p8affineqb.256"]
-    fn vgf2p8affineqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32;
-    #[link_name = "llvm.x86.vgf2p8affineqb.128"]
-    fn vgf2p8affineqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16;
-    #[link_name = "llvm.x86.vgf2p8mulb.512"]
-    fn vgf2p8mulb_512(a: i8x64, b: i8x64) -> i8x64;
-    #[link_name = "llvm.x86.vgf2p8mulb.256"]
-    fn vgf2p8mulb_256(a: i8x32, b: i8x32) -> i8x32;
-    #[link_name = "llvm.x86.vgf2p8mulb.128"]
-    fn vgf2p8mulb_128(a: i8x16, b: i8x16) -> i8x16;
-}
-
-// LLVM requires AVX512BW for a lot of these instructions, see
-// https://github.com/llvm/llvm-project/blob/release/9.x/clang/include/clang/Basic/BuiltinsX86.def#L457
-// however our tests also require the target feature list to match Intel's
-// which *doesn't* require AVX512BW but only AVX512F, so we added the redundant AVX512F
-// requirement (for now)
-// also see
-// https://github.com/llvm/llvm-project/blob/release/9.x/clang/lib/Headers/gfniintrin.h
-// for forcing GFNI, BW and optionally VL extension
-
-/// Performs a multiplication in GF(2^8) on the packed bytes.
-/// The field is in polynomial representation with the reduction polynomial
-///  x^8 + x^4 + x^3 + x + 1.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8mul_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
-#[cfg_attr(test, assert_instr(vgf2p8mulb))]
-pub unsafe fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i {
-    transmute(vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()))
-}
-
-/// Performs a multiplication in GF(2^8) on the packed bytes.
-/// The field is in polynomial representation with the reduction polynomial
-///  x^8 + x^4 + x^3 + x + 1.
-///
-/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8mul_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
-#[cfg_attr(test, assert_instr(vgf2p8mulb))]
-pub unsafe fn _mm512_mask_gf2p8mul_epi8(
-    src: __m512i,
-    k: __mmask64,
-    a: __m512i,
-    b: __m512i,
-) -> __m512i {
-    transmute(simd_select_bitmask(
-        k,
-        vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()),
-        src.as_i8x64(),
-    ))
-}
-
-/// Performs a multiplication in GF(2^8) on the packed bytes.
-/// The field is in polynomial representation with the reduction polynomial
-///  x^8 + x^4 + x^3 + x + 1.
-///
-/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8mul_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
-#[cfg_attr(test, assert_instr(vgf2p8mulb))]
-pub unsafe fn _mm512_maskz_gf2p8mul_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
-    let zero = _mm512_setzero_si512().as_i8x64();
-    transmute(simd_select_bitmask(
-        k,
-        vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()),
-        zero,
-    ))
-}
-
-/// Performs a multiplication in GF(2^8) on the packed bytes.
-/// The field is in polynomial representation with the reduction polynomial
-///  x^8 + x^4 + x^3 + x + 1.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8mul_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx")]
-#[cfg_attr(test, assert_instr(vgf2p8mulb))]
-pub unsafe fn _mm256_gf2p8mul_epi8(a: __m256i, b: __m256i) -> __m256i {
-    transmute(vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()))
-}
-
-/// Performs a multiplication in GF(2^8) on the packed bytes.
-/// The field is in polynomial representation with the reduction polynomial
-///  x^8 + x^4 + x^3 + x + 1.
-///
-/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8mul_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8mulb))]
-pub unsafe fn _mm256_mask_gf2p8mul_epi8(
-    src: __m256i,
-    k: __mmask32,
-    a: __m256i,
-    b: __m256i,
-) -> __m256i {
-    transmute(simd_select_bitmask(
-        k,
-        vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()),
-        src.as_i8x32(),
-    ))
-}
-
-/// Performs a multiplication in GF(2^8) on the packed bytes.
-/// The field is in polynomial representation with the reduction polynomial
-///  x^8 + x^4 + x^3 + x + 1.
-///
-/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8mul_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8mulb))]
-pub unsafe fn _mm256_maskz_gf2p8mul_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
-    let zero = _mm256_setzero_si256().as_i8x32();
-    transmute(simd_select_bitmask(
-        k,
-        vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()),
-        zero,
-    ))
-}
-
-/// Performs a multiplication in GF(2^8) on the packed bytes.
-/// The field is in polynomial representation with the reduction polynomial
-///  x^8 + x^4 + x^3 + x + 1.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8mul_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni")]
-#[cfg_attr(test, assert_instr(gf2p8mulb))]
-pub unsafe fn _mm_gf2p8mul_epi8(a: __m128i, b: __m128i) -> __m128i {
-    transmute(vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()))
-}
-
-/// Performs a multiplication in GF(2^8) on the packed bytes.
-/// The field is in polynomial representation with the reduction polynomial
-///  x^8 + x^4 + x^3 + x + 1.
-///
-/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8mul_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8mulb))]
-pub unsafe fn _mm_mask_gf2p8mul_epi8(
-    src: __m128i,
-    k: __mmask16,
-    a: __m128i,
-    b: __m128i,
-) -> __m128i {
-    transmute(simd_select_bitmask(
-        k,
-        vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()),
-        src.as_i8x16(),
-    ))
-}
-
-/// Performs a multiplication in GF(2^8) on the packed bytes.
-/// The field is in polynomial representation with the reduction polynomial
-///  x^8 + x^4 + x^3 + x + 1.
-///
-/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8mul_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8mulb))]
-pub unsafe fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
-    let zero = _mm_setzero_si128().as_i8x16();
-    transmute(simd_select_bitmask(
-        k,
-        vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()),
-        zero,
-    ))
-}
-
-/// Performs an affine transformation on the packed bytes in x.
-/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8affine_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
-#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn _mm512_gf2p8affine_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let x = x.as_i8x64();
-    let a = a.as_i8x64();
-    let r = vgf2p8affineqb_512(x, a, b);
-    transmute(r)
-}
-
-/// Performs an affine transformation on the packed bytes in x.
-/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8affine_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
-#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn _mm512_maskz_gf2p8affine_epi64_epi8<const B: i32>(
-    k: __mmask64,
-    x: __m512i,
-    a: __m512i,
-) -> __m512i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let zero = _mm512_setzero_si512().as_i8x64();
-    let x = x.as_i8x64();
-    let a = a.as_i8x64();
-    let r = vgf2p8affineqb_512(x, a, b);
-    transmute(simd_select_bitmask(k, r, zero))
-}
-
-/// Performs an affine transformation on the packed bytes in x.
-/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8affine_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
-#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
-#[rustc_legacy_const_generics(4)]
-pub unsafe fn _mm512_mask_gf2p8affine_epi64_epi8<const B: i32>(
-    src: __m512i,
-    k: __mmask64,
-    x: __m512i,
-    a: __m512i,
-) -> __m512i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let x = x.as_i8x64();
-    let a = a.as_i8x64();
-    let r = vgf2p8affineqb_512(x, a, b);
-    transmute(simd_select_bitmask(k, r, src.as_i8x64()))
-}
-
-/// Performs an affine transformation on the packed bytes in x.
-/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8affine_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx")]
-#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn _mm256_gf2p8affine_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let x = x.as_i8x32();
-    let a = a.as_i8x32();
-    let r = vgf2p8affineqb_256(x, a, b);
-    transmute(r)
-}
-
-/// Performs an affine transformation on the packed bytes in x.
-/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8affine_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn _mm256_maskz_gf2p8affine_epi64_epi8<const B: i32>(
-    k: __mmask32,
-    x: __m256i,
-    a: __m256i,
-) -> __m256i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let zero = _mm256_setzero_si256().as_i8x32();
-    let x = x.as_i8x32();
-    let a = a.as_i8x32();
-    let r = vgf2p8affineqb_256(x, a, b);
-    transmute(simd_select_bitmask(k, r, zero))
-}
-
-/// Performs an affine transformation on the packed bytes in x.
-/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8affine_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
-#[rustc_legacy_const_generics(4)]
-pub unsafe fn _mm256_mask_gf2p8affine_epi64_epi8<const B: i32>(
-    src: __m256i,
-    k: __mmask32,
-    x: __m256i,
-    a: __m256i,
-) -> __m256i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let x = x.as_i8x32();
-    let a = a.as_i8x32();
-    let r = vgf2p8affineqb_256(x, a, b);
-    transmute(simd_select_bitmask(k, r, src.as_i8x32()))
-}
-
-/// Performs an affine transformation on the packed bytes in x.
-/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8affine_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni")]
-#[cfg_attr(test, assert_instr(gf2p8affineqb, B = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn _mm_gf2p8affine_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let x = x.as_i8x16();
-    let a = a.as_i8x16();
-    let r = vgf2p8affineqb_128(x, a, b);
-    transmute(r)
-}
-
-/// Performs an affine transformation on the packed bytes in x.
-/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8affine_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn _mm_maskz_gf2p8affine_epi64_epi8<const B: i32>(
-    k: __mmask16,
-    x: __m128i,
-    a: __m128i,
-) -> __m128i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let zero = _mm_setzero_si128().as_i8x16();
-    let x = x.as_i8x16();
-    let a = a.as_i8x16();
-    let r = vgf2p8affineqb_128(x, a, b);
-    transmute(simd_select_bitmask(k, r, zero))
-}
-
-/// Performs an affine transformation on the packed bytes in x.
-/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8affine_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
-#[rustc_legacy_const_generics(4)]
-pub unsafe fn _mm_mask_gf2p8affine_epi64_epi8<const B: i32>(
-    src: __m128i,
-    k: __mmask16,
-    x: __m128i,
-    a: __m128i,
-) -> __m128i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let x = x.as_i8x16();
-    let a = a.as_i8x16();
-    let r = vgf2p8affineqb_128(x, a, b);
-    transmute(simd_select_bitmask(k, r, src.as_i8x16()))
-}
-
-/// Performs an affine transformation on the inverted packed bytes in x.
-/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
-/// The inverse of 0 is 0.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8affineinv_epi64_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
-#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn _mm512_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let x = x.as_i8x64();
-    let a = a.as_i8x64();
-    let r = vgf2p8affineinvqb_512(x, a, b);
-    transmute(r)
-}
-
-/// Performs an affine transformation on the inverted packed bytes in x.
-/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
-/// The inverse of 0 is 0.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8affineinv_epi64_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
-#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn _mm512_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
-    k: __mmask64,
-    x: __m512i,
-    a: __m512i,
-) -> __m512i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let zero = _mm512_setzero_si512().as_i8x64();
-    let x = x.as_i8x64();
-    let a = a.as_i8x64();
-    let r = vgf2p8affineinvqb_512(x, a, b);
-    transmute(simd_select_bitmask(k, r, zero))
-}
-
-/// Performs an affine transformation on the inverted packed bytes in x.
-/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
-/// The inverse of 0 is 0.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8affineinv_epi64_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
-#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
-#[rustc_legacy_const_generics(4)]
-pub unsafe fn _mm512_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
-    src: __m512i,
-    k: __mmask64,
-    x: __m512i,
-    a: __m512i,
-) -> __m512i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let x = x.as_i8x64();
-    let a = a.as_i8x64();
-    let r = vgf2p8affineinvqb_512(x, a, b);
-    transmute(simd_select_bitmask(k, r, src.as_i8x64()))
-}
-
-/// Performs an affine transformation on the inverted packed bytes in x.
-/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
-/// The inverse of 0 is 0.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8affineinv_epi64_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx")]
-#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn _mm256_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let x = x.as_i8x32();
-    let a = a.as_i8x32();
-    let r = vgf2p8affineinvqb_256(x, a, b);
-    transmute(r)
-}
-
-/// Performs an affine transformation on the inverted packed bytes in x.
-/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
-/// The inverse of 0 is 0.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8affineinv_epi64_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn _mm256_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
-    k: __mmask32,
-    x: __m256i,
-    a: __m256i,
-) -> __m256i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let zero = _mm256_setzero_si256().as_i8x32();
-    let x = x.as_i8x32();
-    let a = a.as_i8x32();
-    let r = vgf2p8affineinvqb_256(x, a, b);
-    transmute(simd_select_bitmask(k, r, zero))
-}
-
-/// Performs an affine transformation on the inverted packed bytes in x.
-/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
-/// The inverse of 0 is 0.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8affineinv_epi64_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
-#[rustc_legacy_const_generics(4)]
-pub unsafe fn _mm256_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
-    src: __m256i,
-    k: __mmask32,
-    x: __m256i,
-    a: __m256i,
-) -> __m256i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let x = x.as_i8x32();
-    let a = a.as_i8x32();
-    let r = vgf2p8affineinvqb_256(x, a, b);
-    transmute(simd_select_bitmask(k, r, src.as_i8x32()))
-}
-
-/// Performs an affine transformation on the inverted packed bytes in x.
-/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
-/// The inverse of 0 is 0.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8affineinv_epi64_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni")]
-#[cfg_attr(test, assert_instr(gf2p8affineinvqb, B = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn _mm_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let x = x.as_i8x16();
-    let a = a.as_i8x16();
-    let r = vgf2p8affineinvqb_128(x, a, b);
-    transmute(r)
-}
-
-/// Performs an affine transformation on the inverted packed bytes in x.
-/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
-/// The inverse of 0 is 0.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8affineinv_epi64_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn _mm_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
-    k: __mmask16,
-    x: __m128i,
-    a: __m128i,
-) -> __m128i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let zero = _mm_setzero_si128().as_i8x16();
-    let x = x.as_i8x16();
-    let a = a.as_i8x16();
-    let r = vgf2p8affineinvqb_128(x, a, b);
-    transmute(simd_select_bitmask(k, r, zero))
-}
-
-/// Performs an affine transformation on the inverted packed bytes in x.
-/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
-/// and b being a constant 8-bit immediate value.
-/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
-/// The inverse of 0 is 0.
-/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
-///
-/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
-/// Otherwise the computation result is written into the result.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8affineinv_epi64_epi8)
-#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
-#[rustc_legacy_const_generics(4)]
-pub unsafe fn _mm_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
-    src: __m128i,
-    k: __mmask16,
-    x: __m128i,
-    a: __m128i,
-) -> __m128i {
-    static_assert_imm8!(B);
-    let b = B as u8;
-    let x = x.as_i8x16();
-    let a = a.as_i8x16();
-    let r = vgf2p8affineinvqb_128(x, a, b);
-    transmute(simd_select_bitmask(k, r, src.as_i8x16()))
-}
-
-#[cfg(test)]
-mod tests {
-    // The constants in the tests below are just bit patterns. They should not
-    // be interpreted as integers; signedness does not make sense for them, but
-    // __mXXXi happens to be defined in terms of signed integers.
-    #![allow(overflowing_literals)]
-
-    use core::hint::black_box;
-    use core::intrinsics::size_of;
-    use stdarch_test::simd_test;
-
-    use crate::core_arch::x86::*;
-
-    fn mulbyte(left: u8, right: u8) -> u8 {
-        // this implementation follows the description in
-        // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8mul_epi8
-        const REDUCTION_POLYNOMIAL: u16 = 0x11b;
-        let left: u16 = left.into();
-        let right: u16 = right.into();
-        let mut carryless_product: u16 = 0;
-
-        // Carryless multiplication
-        for i in 0..8 {
-            if ((left >> i) & 0x01) != 0 {
-                carryless_product ^= right << i;
-            }
-        }
-
-        // reduction, adding in "0" where appropriate to clear out high bits
-        // note that REDUCTION_POLYNOMIAL is zero in this context
-        for i in (8..=14).rev() {
-            if ((carryless_product >> i) & 0x01) != 0 {
-                carryless_product ^= REDUCTION_POLYNOMIAL << (i - 8);
-            }
-        }
-
-        carryless_product as u8
-    }
-
-    const NUM_TEST_WORDS_512: usize = 4;
-    const NUM_TEST_WORDS_256: usize = NUM_TEST_WORDS_512 * 2;
-    const NUM_TEST_WORDS_128: usize = NUM_TEST_WORDS_256 * 2;
-    const NUM_TEST_ENTRIES: usize = NUM_TEST_WORDS_512 * 64;
-    const NUM_TEST_WORDS_64: usize = NUM_TEST_WORDS_128 * 2;
-    const NUM_BYTES: usize = 256;
-    const NUM_BYTES_WORDS_128: usize = NUM_BYTES / 16;
-    const NUM_BYTES_WORDS_256: usize = NUM_BYTES_WORDS_128 / 2;
-    const NUM_BYTES_WORDS_512: usize = NUM_BYTES_WORDS_256 / 2;
-
-    fn parity(input: u8) -> u8 {
-        let mut accumulator = 0;
-        for i in 0..8 {
-            accumulator ^= (input >> i) & 0x01;
-        }
-        accumulator
-    }
-
-    fn mat_vec_multiply_affine(matrix: u64, x: u8, b: u8) -> u8 {
-        // this implementation follows the description in
-        // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8affine_epi64_epi8
-        let mut accumulator = 0;
-
-        for bit in 0..8 {
-            accumulator |= parity(x & matrix.to_le_bytes()[bit]) << (7 - bit);
-        }
-
-        accumulator ^ b
-    }
-
-    fn generate_affine_mul_test_data(
-        immediate: u8,
-    ) -> (
-        [u64; NUM_TEST_WORDS_64],
-        [u8; NUM_TEST_ENTRIES],
-        [u8; NUM_TEST_ENTRIES],
-    ) {
-        let mut left: [u64; NUM_TEST_WORDS_64] = [0; NUM_TEST_WORDS_64];
-        let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
-        let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
-
-        for i in 0..NUM_TEST_WORDS_64 {
-            left[i] = (i as u64) * 103 * 101;
-            for j in 0..8 {
-                let j64 = j as u64;
-                right[i * 8 + j] = ((left[i] + j64) % 256) as u8;
-                result[i * 8 + j] = mat_vec_multiply_affine(left[i], right[i * 8 + j], immediate);
-            }
-        }
-
-        (left, right, result)
-    }
-
-    fn generate_inv_tests_data() -> ([u8; NUM_BYTES], [u8; NUM_BYTES]) {
-        let mut input: [u8; NUM_BYTES] = [0; NUM_BYTES];
-        let mut result: [u8; NUM_BYTES] = [0; NUM_BYTES];
-
-        for i in 0..NUM_BYTES {
-            input[i] = (i % 256) as u8;
-            result[i] = if i == 0 { 0 } else { 1 };
-        }
-
-        (input, result)
-    }
-
-    const AES_S_BOX: [u8; NUM_BYTES] = [
-        0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab,
-        0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4,
-        0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71,
-        0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
-        0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6,
-        0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb,
-        0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45,
-        0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
-        0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44,
-        0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a,
-        0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49,
-        0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d,
-        0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25,
-        0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e,
-        0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1,
-        0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
-        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb,
-        0x16,
-    ];
-
-    fn generate_byte_mul_test_data() -> (
-        [u8; NUM_TEST_ENTRIES],
-        [u8; NUM_TEST_ENTRIES],
-        [u8; NUM_TEST_ENTRIES],
-    ) {
-        let mut left: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
-        let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
-        let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
-
-        for i in 0..NUM_TEST_ENTRIES {
-            left[i] = (i % 256) as u8;
-            right[i] = left[i].wrapping_mul(101);
-            result[i] = mulbyte(left[i], right[i]);
-        }
-
-        (left, right, result)
-    }
-
-    #[target_feature(enable = "sse2")]
-    unsafe fn load_m128i_word<T>(data: &[T], word_index: usize) -> __m128i {
-        let byte_offset = word_index * 16 / size_of::<T>();
-        let pointer = data.as_ptr().add(byte_offset) as *const __m128i;
-        _mm_loadu_si128(black_box(pointer))
-    }
-
-    #[target_feature(enable = "avx")]
-    unsafe fn load_m256i_word<T>(data: &[T], word_index: usize) -> __m256i {
-        let byte_offset = word_index * 32 / size_of::<T>();
-        let pointer = data.as_ptr().add(byte_offset) as *const __m256i;
-        _mm256_loadu_si256(black_box(pointer))
-    }
-
-    #[target_feature(enable = "avx512f")]
-    unsafe fn load_m512i_word<T>(data: &[T], word_index: usize) -> __m512i {
-        let byte_offset = word_index * 64 / size_of::<T>();
-        let pointer = data.as_ptr().add(byte_offset) as *const i32;
-        _mm512_loadu_si512(black_box(pointer))
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw")]
-    unsafe fn test_mm512_gf2p8mul_epi8() {
-        let (left, right, expected) = generate_byte_mul_test_data();
-
-        for i in 0..NUM_TEST_WORDS_512 {
-            let left = load_m512i_word(&left, i);
-            let right = load_m512i_word(&right, i);
-            let expected = load_m512i_word(&expected, i);
-            let result = _mm512_gf2p8mul_epi8(left, right);
-            assert_eq_m512i(result, expected);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw")]
-    unsafe fn test_mm512_maskz_gf2p8mul_epi8() {
-        let (left, right, _expected) = generate_byte_mul_test_data();
-
-        for i in 0..NUM_TEST_WORDS_512 {
-            let left = load_m512i_word(&left, i);
-            let right = load_m512i_word(&right, i);
-            let result_zero = _mm512_maskz_gf2p8mul_epi8(0, left, right);
-            assert_eq_m512i(result_zero, _mm512_setzero_si512());
-            let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
-            let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
-            let expected_result = _mm512_gf2p8mul_epi8(left, right);
-            let result_masked = _mm512_maskz_gf2p8mul_epi8(mask_bytes, left, right);
-            let expected_masked =
-                _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
-            assert_eq_m512i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw")]
-    unsafe fn test_mm512_mask_gf2p8mul_epi8() {
-        let (left, right, _expected) = generate_byte_mul_test_data();
-
-        for i in 0..NUM_TEST_WORDS_512 {
-            let left = load_m512i_word(&left, i);
-            let right = load_m512i_word(&right, i);
-            let result_left = _mm512_mask_gf2p8mul_epi8(left, 0, left, right);
-            assert_eq_m512i(result_left, left);
-            let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
-            let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
-            let expected_result = _mm512_gf2p8mul_epi8(left, right);
-            let result_masked = _mm512_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
-            let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
-            assert_eq_m512i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_gf2p8mul_epi8() {
-        let (left, right, expected) = generate_byte_mul_test_data();
-
-        for i in 0..NUM_TEST_WORDS_256 {
-            let left = load_m256i_word(&left, i);
-            let right = load_m256i_word(&right, i);
-            let expected = load_m256i_word(&expected, i);
-            let result = _mm256_gf2p8mul_epi8(left, right);
-            assert_eq_m256i(result, expected);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_gf2p8mul_epi8() {
-        let (left, right, _expected) = generate_byte_mul_test_data();
-
-        for i in 0..NUM_TEST_WORDS_256 {
-            let left = load_m256i_word(&left, i);
-            let right = load_m256i_word(&right, i);
-            let result_zero = _mm256_maskz_gf2p8mul_epi8(0, left, right);
-            assert_eq_m256i(result_zero, _mm256_setzero_si256());
-            let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
-            const MASK_WORDS: i32 = 0b01_10_11_00;
-            let expected_result = _mm256_gf2p8mul_epi8(left, right);
-            let result_masked = _mm256_maskz_gf2p8mul_epi8(mask_bytes, left, right);
-            let expected_masked =
-                _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
-            assert_eq_m256i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_gf2p8mul_epi8() {
-        let (left, right, _expected) = generate_byte_mul_test_data();
-
-        for i in 0..NUM_TEST_WORDS_256 {
-            let left = load_m256i_word(&left, i);
-            let right = load_m256i_word(&right, i);
-            let result_left = _mm256_mask_gf2p8mul_epi8(left, 0, left, right);
-            assert_eq_m256i(result_left, left);
-            let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
-            const MASK_WORDS: i32 = 0b01_10_11_00;
-            let expected_result = _mm256_gf2p8mul_epi8(left, right);
-            let result_masked = _mm256_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
-            let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
-            assert_eq_m256i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_gf2p8mul_epi8() {
-        let (left, right, expected) = generate_byte_mul_test_data();
-
-        for i in 0..NUM_TEST_WORDS_128 {
-            let left = load_m128i_word(&left, i);
-            let right = load_m128i_word(&right, i);
-            let expected = load_m128i_word(&expected, i);
-            let result = _mm_gf2p8mul_epi8(left, right);
-            assert_eq_m128i(result, expected);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_gf2p8mul_epi8() {
-        let (left, right, _expected) = generate_byte_mul_test_data();
-
-        for i in 0..NUM_TEST_WORDS_128 {
-            let left = load_m128i_word(&left, i);
-            let right = load_m128i_word(&right, i);
-            let result_zero = _mm_maskz_gf2p8mul_epi8(0, left, right);
-            assert_eq_m128i(result_zero, _mm_setzero_si128());
-            let mask_bytes: __mmask16 = 0x0F_F0;
-            const MASK_WORDS: i32 = 0b01_10;
-            let expected_result = _mm_gf2p8mul_epi8(left, right);
-            let result_masked = _mm_maskz_gf2p8mul_epi8(mask_bytes, left, right);
-            let expected_masked =
-                _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
-            assert_eq_m128i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_gf2p8mul_epi8() {
-        let (left, right, _expected) = generate_byte_mul_test_data();
-
-        for i in 0..NUM_TEST_WORDS_128 {
-            let left = load_m128i_word(&left, i);
-            let right = load_m128i_word(&right, i);
-            let result_left = _mm_mask_gf2p8mul_epi8(left, 0, left, right);
-            assert_eq_m128i(result_left, left);
-            let mask_bytes: __mmask16 = 0x0F_F0;
-            const MASK_WORDS: i32 = 0b01_10;
-            let expected_result = _mm_gf2p8mul_epi8(left, right);
-            let result_masked = _mm_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
-            let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
-            assert_eq_m128i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw")]
-    unsafe fn test_mm512_gf2p8affine_epi64_epi8() {
-        let identity: i64 = 0x01_02_04_08_10_20_40_80;
-        const IDENTITY_BYTE: i32 = 0;
-        let constant: i64 = 0;
-        const CONSTANT_BYTE: i32 = 0x63;
-        let identity = _mm512_set1_epi64(identity);
-        let constant = _mm512_set1_epi64(constant);
-        let constant_reference = _mm512_set1_epi8(CONSTANT_BYTE as i8);
-
-        let (bytes, more_bytes, _) = generate_byte_mul_test_data();
-        let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_512 {
-            let data = load_m512i_word(&bytes, i);
-            let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
-            assert_eq_m512i(result, data);
-            let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
-            assert_eq_m512i(result, constant_reference);
-            let data = load_m512i_word(&more_bytes, i);
-            let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
-            assert_eq_m512i(result, data);
-            let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
-            assert_eq_m512i(result, constant_reference);
-
-            let matrix = load_m512i_word(&matrices, i);
-            let vector = load_m512i_word(&vectors, i);
-            let reference = load_m512i_word(&references, i);
-
-            let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
-            assert_eq_m512i(result, reference);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw")]
-    unsafe fn test_mm512_maskz_gf2p8affine_epi64_epi8() {
-        const CONSTANT_BYTE: i32 = 0x63;
-        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_512 {
-            let matrix = load_m512i_word(&matrices, i);
-            let vector = load_m512i_word(&vectors, i);
-            let result_zero =
-                _mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
-            assert_eq_m512i(result_zero, _mm512_setzero_si512());
-            let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
-            let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
-            let expected_result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
-            let result_masked =
-                _mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
-            let expected_masked =
-                _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
-            assert_eq_m512i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw")]
-    unsafe fn test_mm512_mask_gf2p8affine_epi64_epi8() {
-        const CONSTANT_BYTE: i32 = 0x63;
-        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_512 {
-            let left = load_m512i_word(&vectors, i);
-            let right = load_m512i_word(&matrices, i);
-            let result_left =
-                _mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
-            assert_eq_m512i(result_left, left);
-            let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
-            let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
-            let expected_result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
-            let result_masked =
-                _mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
-            let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
-            assert_eq_m512i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_gf2p8affine_epi64_epi8() {
-        let identity: i64 = 0x01_02_04_08_10_20_40_80;
-        const IDENTITY_BYTE: i32 = 0;
-        let constant: i64 = 0;
-        const CONSTANT_BYTE: i32 = 0x63;
-        let identity = _mm256_set1_epi64x(identity);
-        let constant = _mm256_set1_epi64x(constant);
-        let constant_reference = _mm256_set1_epi8(CONSTANT_BYTE as i8);
-
-        let (bytes, more_bytes, _) = generate_byte_mul_test_data();
-        let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_256 {
-            let data = load_m256i_word(&bytes, i);
-            let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
-            assert_eq_m256i(result, data);
-            let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
-            assert_eq_m256i(result, constant_reference);
-            let data = load_m256i_word(&more_bytes, i);
-            let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
-            assert_eq_m256i(result, data);
-            let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
-            assert_eq_m256i(result, constant_reference);
-
-            let matrix = load_m256i_word(&matrices, i);
-            let vector = load_m256i_word(&vectors, i);
-            let reference = load_m256i_word(&references, i);
-
-            let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
-            assert_eq_m256i(result, reference);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_gf2p8affine_epi64_epi8() {
-        const CONSTANT_BYTE: i32 = 0x63;
-        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_256 {
-            let matrix = load_m256i_word(&matrices, i);
-            let vector = load_m256i_word(&vectors, i);
-            let result_zero =
-                _mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
-            assert_eq_m256i(result_zero, _mm256_setzero_si256());
-            let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
-            const MASK_WORDS: i32 = 0b11_01_10_00;
-            let expected_result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
-            let result_masked =
-                _mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
-            let expected_masked =
-                _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
-            assert_eq_m256i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_gf2p8affine_epi64_epi8() {
-        const CONSTANT_BYTE: i32 = 0x63;
-        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_256 {
-            let left = load_m256i_word(&vectors, i);
-            let right = load_m256i_word(&matrices, i);
-            let result_left =
-                _mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
-            assert_eq_m256i(result_left, left);
-            let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
-            const MASK_WORDS: i32 = 0b11_01_10_00;
-            let expected_result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
-            let result_masked =
-                _mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
-            let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
-            assert_eq_m256i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_gf2p8affine_epi64_epi8() {
-        let identity: i64 = 0x01_02_04_08_10_20_40_80;
-        const IDENTITY_BYTE: i32 = 0;
-        let constant: i64 = 0;
-        const CONSTANT_BYTE: i32 = 0x63;
-        let identity = _mm_set1_epi64x(identity);
-        let constant = _mm_set1_epi64x(constant);
-        let constant_reference = _mm_set1_epi8(CONSTANT_BYTE as i8);
-
-        let (bytes, more_bytes, _) = generate_byte_mul_test_data();
-        let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_128 {
-            let data = load_m128i_word(&bytes, i);
-            let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
-            assert_eq_m128i(result, data);
-            let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
-            assert_eq_m128i(result, constant_reference);
-            let data = load_m128i_word(&more_bytes, i);
-            let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
-            assert_eq_m128i(result, data);
-            let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
-            assert_eq_m128i(result, constant_reference);
-
-            let matrix = load_m128i_word(&matrices, i);
-            let vector = load_m128i_word(&vectors, i);
-            let reference = load_m128i_word(&references, i);
-
-            let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
-            assert_eq_m128i(result, reference);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_gf2p8affine_epi64_epi8() {
-        const CONSTANT_BYTE: i32 = 0x63;
-        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_128 {
-            let matrix = load_m128i_word(&matrices, i);
-            let vector = load_m128i_word(&vectors, i);
-            let result_zero = _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
-            assert_eq_m128i(result_zero, _mm_setzero_si128());
-            let mask_bytes: __mmask16 = 0x0F_F0;
-            const MASK_WORDS: i32 = 0b01_10;
-            let expected_result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
-            let result_masked =
-                _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
-            let expected_masked =
-                _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
-            assert_eq_m128i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_gf2p8affine_epi64_epi8() {
-        const CONSTANT_BYTE: i32 = 0x63;
-        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_128 {
-            let left = load_m128i_word(&vectors, i);
-            let right = load_m128i_word(&matrices, i);
-            let result_left =
-                _mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
-            assert_eq_m128i(result_left, left);
-            let mask_bytes: __mmask16 = 0x0F_F0;
-            const MASK_WORDS: i32 = 0b01_10;
-            let expected_result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
-            let result_masked =
-                _mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
-            let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
-            assert_eq_m128i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw")]
-    unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() {
-        let identity: i64 = 0x01_02_04_08_10_20_40_80;
-        const IDENTITY_BYTE: i32 = 0;
-        const CONSTANT_BYTE: i32 = 0x63;
-        let identity = _mm512_set1_epi64(identity);
-
-        // validate inversion
-        let (inputs, results) = generate_inv_tests_data();
-
-        for i in 0..NUM_BYTES_WORDS_512 {
-            let input = load_m512i_word(&inputs, i);
-            let reference = load_m512i_word(&results, i);
-            let result = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
-            let remultiplied = _mm512_gf2p8mul_epi8(result, input);
-            assert_eq_m512i(remultiplied, reference);
-        }
-
-        // validate subsequent affine operation
-        let (matrices, vectors, _affine_expected) =
-            generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_512 {
-            let vector = load_m512i_word(&vectors, i);
-            let matrix = load_m512i_word(&matrices, i);
-
-            let inv_vec = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
-            let reference = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
-            let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
-            assert_eq_m512i(result, reference);
-        }
-
-        // validate everything by virtue of checking against the AES SBox
-        const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
-        let sbox_matrix = _mm512_set1_epi64(AES_S_BOX_MATRIX);
-
-        for i in 0..NUM_BYTES_WORDS_512 {
-            let reference = load_m512i_word(&AES_S_BOX, i);
-            let input = load_m512i_word(&inputs, i);
-            let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
-            assert_eq_m512i(result, reference);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw")]
-    unsafe fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() {
-        const CONSTANT_BYTE: i32 = 0x63;
-        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_512 {
-            let matrix = load_m512i_word(&matrices, i);
-            let vector = load_m512i_word(&vectors, i);
-            let result_zero =
-                _mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
-            assert_eq_m512i(result_zero, _mm512_setzero_si512());
-            let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
-            let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
-            let expected_result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
-            let result_masked =
-                _mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
-            let expected_masked =
-                _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
-            assert_eq_m512i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw")]
-    unsafe fn test_mm512_mask_gf2p8affineinv_epi64_epi8() {
-        const CONSTANT_BYTE: i32 = 0x63;
-        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_512 {
-            let left = load_m512i_word(&vectors, i);
-            let right = load_m512i_word(&matrices, i);
-            let result_left =
-                _mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
-            assert_eq_m512i(result_left, left);
-            let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
-            let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
-            let expected_result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
-            let result_masked = _mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(
-                left, mask_bytes, left, right,
-            );
-            let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
-            assert_eq_m512i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() {
-        let identity: i64 = 0x01_02_04_08_10_20_40_80;
-        const IDENTITY_BYTE: i32 = 0;
-        const CONSTANT_BYTE: i32 = 0x63;
-        let identity = _mm256_set1_epi64x(identity);
-
-        // validate inversion
-        let (inputs, results) = generate_inv_tests_data();
-
-        for i in 0..NUM_BYTES_WORDS_256 {
-            let input = load_m256i_word(&inputs, i);
-            let reference = load_m256i_word(&results, i);
-            let result = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
-            let remultiplied = _mm256_gf2p8mul_epi8(result, input);
-            assert_eq_m256i(remultiplied, reference);
-        }
-
-        // validate subsequent affine operation
-        let (matrices, vectors, _affine_expected) =
-            generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_256 {
-            let vector = load_m256i_word(&vectors, i);
-            let matrix = load_m256i_word(&matrices, i);
-
-            let inv_vec = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
-            let reference = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
-            let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
-            assert_eq_m256i(result, reference);
-        }
-
-        // validate everything by virtue of checking against the AES SBox
-        const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
-        let sbox_matrix = _mm256_set1_epi64x(AES_S_BOX_MATRIX);
-
-        for i in 0..NUM_BYTES_WORDS_256 {
-            let reference = load_m256i_word(&AES_S_BOX, i);
-            let input = load_m256i_word(&inputs, i);
-            let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
-            assert_eq_m256i(result, reference);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() {
-        const CONSTANT_BYTE: i32 = 0x63;
-        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_256 {
-            let matrix = load_m256i_word(&matrices, i);
-            let vector = load_m256i_word(&vectors, i);
-            let result_zero =
-                _mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
-            assert_eq_m256i(result_zero, _mm256_setzero_si256());
-            let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
-            const MASK_WORDS: i32 = 0b11_01_10_00;
-            let expected_result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
-            let result_masked =
-                _mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
-            let expected_masked =
-                _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
-            assert_eq_m256i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_gf2p8affineinv_epi64_epi8() {
-        const CONSTANT_BYTE: i32 = 0x63;
-        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_256 {
-            let left = load_m256i_word(&vectors, i);
-            let right = load_m256i_word(&matrices, i);
-            let result_left =
-                _mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
-            assert_eq_m256i(result_left, left);
-            let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
-            const MASK_WORDS: i32 = 0b11_01_10_00;
-            let expected_result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
-            let result_masked = _mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(
-                left, mask_bytes, left, right,
-            );
-            let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
-            assert_eq_m256i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_gf2p8affineinv_epi64_epi8() {
-        let identity: i64 = 0x01_02_04_08_10_20_40_80;
-        const IDENTITY_BYTE: i32 = 0;
-        const CONSTANT_BYTE: i32 = 0x63;
-        let identity = _mm_set1_epi64x(identity);
-
-        // validate inversion
-        let (inputs, results) = generate_inv_tests_data();
-
-        for i in 0..NUM_BYTES_WORDS_128 {
-            let input = load_m128i_word(&inputs, i);
-            let reference = load_m128i_word(&results, i);
-            let result = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
-            let remultiplied = _mm_gf2p8mul_epi8(result, input);
-            assert_eq_m128i(remultiplied, reference);
-        }
-
-        // validate subsequent affine operation
-        let (matrices, vectors, _affine_expected) =
-            generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_128 {
-            let vector = load_m128i_word(&vectors, i);
-            let matrix = load_m128i_word(&matrices, i);
-
-            let inv_vec = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
-            let reference = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
-            let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
-            assert_eq_m128i(result, reference);
-        }
-
-        // validate everything by virtue of checking against the AES SBox
-        const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
-        let sbox_matrix = _mm_set1_epi64x(AES_S_BOX_MATRIX);
-
-        for i in 0..NUM_BYTES_WORDS_128 {
-            let reference = load_m128i_word(&AES_S_BOX, i);
-            let input = load_m128i_word(&inputs, i);
-            let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
-            assert_eq_m128i(result, reference);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_gf2p8affineinv_epi64_epi8() {
-        const CONSTANT_BYTE: i32 = 0x63;
-        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_128 {
-            let matrix = load_m128i_word(&matrices, i);
-            let vector = load_m128i_word(&vectors, i);
-            let result_zero =
-                _mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
-            assert_eq_m128i(result_zero, _mm_setzero_si128());
-            let mask_bytes: __mmask16 = 0x0F_F0;
-            const MASK_WORDS: i32 = 0b01_10;
-            let expected_result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
-            let result_masked =
-                _mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
-            let expected_masked =
-                _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
-            assert_eq_m128i(result_masked, expected_masked);
-        }
-    }
-
-    #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_gf2p8affineinv_epi64_epi8() {
-        const CONSTANT_BYTE: i32 = 0x63;
-        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
-
-        for i in 0..NUM_TEST_WORDS_128 {
-            let left = load_m128i_word(&vectors, i);
-            let right = load_m128i_word(&matrices, i);
-            let result_left =
-                _mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
-            assert_eq_m128i(result_left, left);
-            let mask_bytes: __mmask16 = 0x0F_F0;
-            const MASK_WORDS: i32 = 0b01_10;
-            let expected_result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
-            let result_masked =
-                _mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
-            let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
-            assert_eq_m128i(result_masked, expected_masked);
-        }
-    }
-}
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vaes.rs b/library/stdarch/crates/core_arch/src/x86/avx512vaes.rs
deleted file mode 100644
index c7830ee42a4..00000000000
--- a/library/stdarch/crates/core_arch/src/x86/avx512vaes.rs
+++ /dev/null
@@ -1,332 +0,0 @@
-//! Vectorized AES Instructions (VAES)
-//!
-//! The intrinsics here correspond to those in the `immintrin.h` C header.
-//!
-//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
-//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
-//!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-
-use crate::core_arch::x86::__m256i;
-use crate::core_arch::x86::__m512i;
-
-#[cfg(test)]
-use stdarch_test::assert_instr;
-
-#[allow(improper_ctypes)]
-extern "C" {
-    #[link_name = "llvm.x86.aesni.aesenc.256"]
-    fn aesenc_256(a: __m256i, round_key: __m256i) -> __m256i;
-    #[link_name = "llvm.x86.aesni.aesenclast.256"]
-    fn aesenclast_256(a: __m256i, round_key: __m256i) -> __m256i;
-    #[link_name = "llvm.x86.aesni.aesdec.256"]
-    fn aesdec_256(a: __m256i, round_key: __m256i) -> __m256i;
-    #[link_name = "llvm.x86.aesni.aesdeclast.256"]
-    fn aesdeclast_256(a: __m256i, round_key: __m256i) -> __m256i;
-    #[link_name = "llvm.x86.aesni.aesenc.512"]
-    fn aesenc_512(a: __m512i, round_key: __m512i) -> __m512i;
-    #[link_name = "llvm.x86.aesni.aesenclast.512"]
-    fn aesenclast_512(a: __m512i, round_key: __m512i) -> __m512i;
-    #[link_name = "llvm.x86.aesni.aesdec.512"]
-    fn aesdec_512(a: __m512i, round_key: __m512i) -> __m512i;
-    #[link_name = "llvm.x86.aesni.aesdeclast.512"]
-    fn aesdeclast_512(a: __m512i, round_key: __m512i) -> __m512i;
-}
-
-/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
-/// the corresponding 128-bit word (key) in `round_key`.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenc_epi128)
-#[inline]
-#[target_feature(enable = "avx512vaes")]
-#[cfg_attr(test, assert_instr(vaesenc))]
-pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i {
-    aesenc_256(a, round_key)
-}
-
-/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
-/// the corresponding 128-bit word (key) in `round_key`.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenclast_epi128)
-#[inline]
-#[target_feature(enable = "avx512vaes")]
-#[cfg_attr(test, assert_instr(vaesenclast))]
-pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
-    aesenclast_256(a, round_key)
-}
-
-/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
-/// the corresponding 128-bit word (key) in `round_key`.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdec_epi128)
-#[inline]
-#[target_feature(enable = "avx512vaes")]
-#[cfg_attr(test, assert_instr(vaesdec))]
-pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i {
-    aesdec_256(a, round_key)
-}
-
-/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
-/// the corresponding 128-bit word (key) in `round_key`.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdeclast_epi128)
-#[inline]
-#[target_feature(enable = "avx512vaes")]
-#[cfg_attr(test, assert_instr(vaesdeclast))]
-pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
-    aesdeclast_256(a, round_key)
-}
-
-/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
-/// the corresponding 128-bit word (key) in `round_key`.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenc_epi128)
-#[inline]
-#[target_feature(enable = "avx512vaes,avx512f")]
-#[cfg_attr(test, assert_instr(vaesenc))]
-pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i {
-    aesenc_512(a, round_key)
-}
-
-/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
-/// the corresponding 128-bit word (key) in `round_key`.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenclast_epi128)
-#[inline]
-#[target_feature(enable = "avx512vaes,avx512f")]
-#[cfg_attr(test, assert_instr(vaesenclast))]
-pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
-    aesenclast_512(a, round_key)
-}
-
-/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
-/// the corresponding 128-bit word (key) in `round_key`.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdec_epi128)
-#[inline]
-#[target_feature(enable = "avx512vaes,avx512f")]
-#[cfg_attr(test, assert_instr(vaesdec))]
-pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i {
-    aesdec_512(a, round_key)
-}
-
-/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
-/// the corresponding 128-bit word (key) in `round_key`.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdeclast_epi128)
-#[inline]
-#[target_feature(enable = "avx512vaes,avx512f")]
-#[cfg_attr(test, assert_instr(vaesdeclast))]
-pub unsafe fn _mm512_aesdeclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
-    aesdeclast_512(a, round_key)
-}
-
-#[cfg(test)]
-mod tests {
-    // The constants in the tests below are just bit patterns. They should not
-    // be interpreted as integers; signedness does not make sense for them, but
-    // __mXXXi happens to be defined in terms of signed integers.
-    #![allow(overflowing_literals)]
-
-    use stdarch_test::simd_test;
-
-    use crate::core_arch::x86::*;
-
-    // the first parts of these tests are straight ports from the AES-NI tests
-    // the second parts directly compare the two, for inputs that are different across lanes
-    // and "more random" than the standard test vectors
-    // ideally we'd be using quickcheck here instead
-
-    #[target_feature(enable = "avx2")]
-    unsafe fn helper_for_256_avx512vaes(
-        linear: unsafe fn(__m128i, __m128i) -> __m128i,
-        vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
-    ) {
-        let a = _mm256_set_epi64x(
-            0xDCB4DB3657BF0B7D,
-            0x18DB0601068EDD9F,
-            0xB76B908233200DC5,
-            0xE478235FA8E22D5E,
-        );
-        let k = _mm256_set_epi64x(
-            0x672F6F105A94CEA7,
-            0x8298B8FFCA5F829C,
-            0xA3927047B3FB61D8,
-            0x978093862CDE7187,
-        );
-        let mut a_decomp = [_mm_setzero_si128(); 2];
-        a_decomp[0] = _mm256_extracti128_si256::<0>(a);
-        a_decomp[1] = _mm256_extracti128_si256::<1>(a);
-        let mut k_decomp = [_mm_setzero_si128(); 2];
-        k_decomp[0] = _mm256_extracti128_si256::<0>(k);
-        k_decomp[1] = _mm256_extracti128_si256::<1>(k);
-        let r = vectorized(a, k);
-        let mut e_decomp = [_mm_setzero_si128(); 2];
-        for i in 0..2 {
-            e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
-        }
-        assert_eq_m128i(_mm256_extracti128_si256::<0>(r), e_decomp[0]);
-        assert_eq_m128i(_mm256_extracti128_si256::<1>(r), e_decomp[1]);
-    }
-
-    #[target_feature(enable = "sse2")]
-    unsafe fn setup_state_key<T>(broadcast: unsafe fn(__m128i) -> T) -> (T, T) {
-        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
-        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
-        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
-        (broadcast(a), broadcast(k))
-    }
-
-    #[target_feature(enable = "avx2")]
-    unsafe fn setup_state_key_256() -> (__m256i, __m256i) {
-        setup_state_key(_mm256_broadcastsi128_si256)
-    }
-
-    #[target_feature(enable = "avx512f")]
-    unsafe fn setup_state_key_512() -> (__m512i, __m512i) {
-        setup_state_key(_mm512_broadcast_i32x4)
-    }
-
-    #[simd_test(enable = "avx512vaes,avx512vl")]
-    unsafe fn test_mm256_aesdec_epi128() {
-        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
-        let (a, k) = setup_state_key_256();
-        let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
-        let e = _mm256_broadcastsi128_si256(e);
-        let r = _mm256_aesdec_epi128(a, k);
-        assert_eq_m256i(r, e);
-
-        helper_for_256_avx512vaes(_mm_aesdec_si128, _mm256_aesdec_epi128);
-    }
-
-    #[simd_test(enable = "avx512vaes,avx512vl")]
-    unsafe fn test_mm256_aesdeclast_epi128() {
-        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
-        let (a, k) = setup_state_key_256();
-        let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
-        let e = _mm256_broadcastsi128_si256(e);
-        let r = _mm256_aesdeclast_epi128(a, k);
-        assert_eq_m256i(r, e);
-
-        helper_for_256_avx512vaes(_mm_aesdeclast_si128, _mm256_aesdeclast_epi128);
-    }
-
-    #[simd_test(enable = "avx512vaes,avx512vl")]
-    unsafe fn test_mm256_aesenc_epi128() {
-        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
-        // they are repeated appropriately
-        let (a, k) = setup_state_key_256();
-        let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
-        let e = _mm256_broadcastsi128_si256(e);
-        let r = _mm256_aesenc_epi128(a, k);
-        assert_eq_m256i(r, e);
-
-        helper_for_256_avx512vaes(_mm_aesenc_si128, _mm256_aesenc_epi128);
-    }
-
-    #[simd_test(enable = "avx512vaes,avx512vl")]
-    unsafe fn test_mm256_aesenclast_epi128() {
-        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
-        let (a, k) = setup_state_key_256();
-        let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
-        let e = _mm256_broadcastsi128_si256(e);
-        let r = _mm256_aesenclast_epi128(a, k);
-        assert_eq_m256i(r, e);
-
-        helper_for_256_avx512vaes(_mm_aesenclast_si128, _mm256_aesenclast_epi128);
-    }
-
-    #[target_feature(enable = "avx512f")]
-    unsafe fn helper_for_512_avx512vaes(
-        linear: unsafe fn(__m128i, __m128i) -> __m128i,
-        vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
-    ) {
-        let a = _mm512_set_epi64(
-            0xDCB4DB3657BF0B7D,
-            0x18DB0601068EDD9F,
-            0xB76B908233200DC5,
-            0xE478235FA8E22D5E,
-            0xAB05CFFA2621154C,
-            0x1171B47A186174C9,
-            0x8C6B6C0E7595CEC9,
-            0xBE3E7D4934E961BD,
-        );
-        let k = _mm512_set_epi64(
-            0x672F6F105A94CEA7,
-            0x8298B8FFCA5F829C,
-            0xA3927047B3FB61D8,
-            0x978093862CDE7187,
-            0xB1927AB22F31D0EC,
-            0xA9A5DA619BE4D7AF,
-            0xCA2590F56884FDC6,
-            0x19BE9F660038BDB5,
-        );
-        let mut a_decomp = [_mm_setzero_si128(); 4];
-        a_decomp[0] = _mm512_extracti32x4_epi32::<0>(a);
-        a_decomp[1] = _mm512_extracti32x4_epi32::<1>(a);
-        a_decomp[2] = _mm512_extracti32x4_epi32::<2>(a);
-        a_decomp[3] = _mm512_extracti32x4_epi32::<3>(a);
-        let mut k_decomp = [_mm_setzero_si128(); 4];
-        k_decomp[0] = _mm512_extracti32x4_epi32::<0>(k);
-        k_decomp[1] = _mm512_extracti32x4_epi32::<1>(k);
-        k_decomp[2] = _mm512_extracti32x4_epi32::<2>(k);
-        k_decomp[3] = _mm512_extracti32x4_epi32::<3>(k);
-        let r = vectorized(a, k);
-        let mut e_decomp = [_mm_setzero_si128(); 4];
-        for i in 0..4 {
-            e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
-        }
-        assert_eq_m128i(_mm512_extracti32x4_epi32::<0>(r), e_decomp[0]);
-        assert_eq_m128i(_mm512_extracti32x4_epi32::<1>(r), e_decomp[1]);
-        assert_eq_m128i(_mm512_extracti32x4_epi32::<2>(r), e_decomp[2]);
-        assert_eq_m128i(_mm512_extracti32x4_epi32::<3>(r), e_decomp[3]);
-    }
-
-    #[simd_test(enable = "avx512vaes,avx512f")]
-    unsafe fn test_mm512_aesdec_epi128() {
-        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
-        let (a, k) = setup_state_key_512();
-        let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
-        let e = _mm512_broadcast_i32x4(e);
-        let r = _mm512_aesdec_epi128(a, k);
-        assert_eq_m512i(r, e);
-
-        helper_for_512_avx512vaes(_mm_aesdec_si128, _mm512_aesdec_epi128);
-    }
-
-    #[simd_test(enable = "avx512vaes,avx512f")]
-    unsafe fn test_mm512_aesdeclast_epi128() {
-        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
-        let (a, k) = setup_state_key_512();
-        let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
-        let e = _mm512_broadcast_i32x4(e);
-        let r = _mm512_aesdeclast_epi128(a, k);
-        assert_eq_m512i(r, e);
-
-        helper_for_512_avx512vaes(_mm_aesdeclast_si128, _mm512_aesdeclast_epi128);
-    }
-
-    #[simd_test(enable = "avx512vaes,avx512f")]
-    unsafe fn test_mm512_aesenc_epi128() {
-        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
-        let (a, k) = setup_state_key_512();
-        let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
-        let e = _mm512_broadcast_i32x4(e);
-        let r = _mm512_aesenc_epi128(a, k);
-        assert_eq_m512i(r, e);
-
-        helper_for_512_avx512vaes(_mm_aesenc_si128, _mm512_aesenc_epi128);
-    }
-
-    #[simd_test(enable = "avx512vaes,avx512f")]
-    unsafe fn test_mm512_aesenclast_epi128() {
-        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
-        let (a, k) = setup_state_key_512();
-        let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
-        let e = _mm512_broadcast_i32x4(e);
-        let r = _mm512_aesenclast_epi128(a, k);
-        assert_eq_m512i(r, e);
-
-        helper_for_512_avx512vaes(_mm_aesenclast_si128, _mm512_aesenclast_epi128);
-    }
-}
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs
deleted file mode 100644
index 54381d8558c..00000000000
--- a/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs
+++ /dev/null
@@ -1,258 +0,0 @@
-//! Vectorized Carry-less Multiplication (VCLMUL)
-//!
-//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
-//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref] (p. 4-241).
-//!
-//! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-
-use crate::core_arch::x86::__m256i;
-use crate::core_arch::x86::__m512i;
-
-#[cfg(test)]
-use stdarch_test::assert_instr;
-
-#[allow(improper_ctypes)]
-extern "C" {
-    #[link_name = "llvm.x86.pclmulqdq.256"]
-    fn pclmulqdq_256(a: __m256i, round_key: __m256i, imm8: u8) -> __m256i;
-    #[link_name = "llvm.x86.pclmulqdq.512"]
-    fn pclmulqdq_512(a: __m512i, round_key: __m512i, imm8: u8) -> __m512i;
-}
-
-// for some odd reason on x86_64 we generate the correct long name instructions
-// but on i686 we generate the short name + imm8
-// so we need to special-case on that...
-
-/// Performs a carry-less multiplication of two 64-bit polynomials over the
-/// finite field GF(2^k) - in each of the 4 128-bit lanes.
-///
-/// The immediate byte is used for determining which halves of each lane `a` and `b`
-/// should be used. Immediate bits other than 0 and 4 are ignored.
-/// All lanes share immediate byte.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_clmulepi64_epi128)
-#[inline]
-#[target_feature(enable = "avx512vpclmulqdq,avx512f")]
-// technically according to Intel's documentation we don't need avx512f here, however LLVM gets confused otherwise
-#[cfg_attr(test, assert_instr(vpclmul, IMM8 = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn _mm512_clmulepi64_epi128<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
-    static_assert_imm8!(IMM8);
-    pclmulqdq_512(a, b, IMM8 as u8)
-}
-
-/// Performs a carry-less multiplication of two 64-bit polynomials over the
-/// finite field GF(2^k) - in each of the 2 128-bit lanes.
-///
-/// The immediate byte is used for determining which halves of each lane `a` and `b`
-/// should be used. Immediate bits other than 0 and 4 are ignored.
-/// All lanes share immediate byte.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_clmulepi64_epi128)
-#[inline]
-#[target_feature(enable = "avx512vpclmulqdq")]
-#[cfg_attr(test, assert_instr(vpclmul, IMM8 = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn _mm256_clmulepi64_epi128<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
-    static_assert_imm8!(IMM8);
-    pclmulqdq_256(a, b, IMM8 as u8)
-}
-
-#[cfg(test)]
-mod tests {
-    // The constants in the tests below are just bit patterns. They should not
-    // be interpreted as integers; signedness does not make sense for them, but
-    // __mXXXi happens to be defined in terms of signed integers.
-    #![allow(overflowing_literals)]
-
-    use stdarch_test::simd_test;
-
-    use crate::core_arch::x86::*;
-
-    macro_rules! verify_kat_pclmul {
-        ($broadcast:ident, $clmul:ident, $assert:ident) => {
-            // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf
-         let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d);
-         let a = $broadcast(a);
-         let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d);
-         let b = $broadcast(b);
-         let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451);
-         let r00 = $broadcast(r00);
-         let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315);
-         let r01 = $broadcast(r01);
-         let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9);
-         let r10 = $broadcast(r10);
-         let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed);
-         let r11 = $broadcast(r11);
-
-         $assert($clmul::<0x00>(a, b), r00);
-         $assert($clmul::<0x10>(a, b), r01);
-         $assert($clmul::<0x01>(a, b), r10);
-         $assert($clmul::<0x11>(a, b), r11);
-
-         let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000);
-         let a0 = $broadcast(a0);
-         let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000);
-         let r = $broadcast(r);
-         $assert($clmul::<0x00>(a0, a0), r);
-        }
-    }
-
-    macro_rules! unroll {
-        ($target:ident[4] = $op:ident::<4>($source:ident);) => {
-            $target[3] = $op::<3>($source);
-            $target[2] = $op::<2>($source);
-            unroll! {$target[2] = $op::<2>($source);}
-        };
-        ($target:ident[2] = $op:ident::<2>($source:ident);) => {
-            $target[1] = $op::<1>($source);
-            $target[0] = $op::<0>($source);
-        };
-        (assert_eq_m128i($op:ident::<4>($vec_res:ident),$lin_res:ident[4]);) => {
-            assert_eq_m128i($op::<3>($vec_res), $lin_res[3]);
-            assert_eq_m128i($op::<2>($vec_res), $lin_res[2]);
-            unroll! {assert_eq_m128i($op::<2>($vec_res),$lin_res[2]);}
-        };
-        (assert_eq_m128i($op:ident::<2>($vec_res:ident),$lin_res:ident[2]);) => {
-            assert_eq_m128i($op::<1>($vec_res), $lin_res[1]);
-            assert_eq_m128i($op::<0>($vec_res), $lin_res[0]);
-        };
-    }
-
-    // this function tests one of the possible 4 instances
-    // with different inputs across lanes
-    #[target_feature(enable = "avx512vpclmulqdq,avx512f")]
-    unsafe fn verify_512_helper(
-        linear: unsafe fn(__m128i, __m128i) -> __m128i,
-        vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
-    ) {
-        let a = _mm512_set_epi64(
-            0xDCB4DB3657BF0B7D,
-            0x18DB0601068EDD9F,
-            0xB76B908233200DC5,
-            0xE478235FA8E22D5E,
-            0xAB05CFFA2621154C,
-            0x1171B47A186174C9,
-            0x8C6B6C0E7595CEC9,
-            0xBE3E7D4934E961BD,
-        );
-        let b = _mm512_set_epi64(
-            0x672F6F105A94CEA7,
-            0x8298B8FFCA5F829C,
-            0xA3927047B3FB61D8,
-            0x978093862CDE7187,
-            0xB1927AB22F31D0EC,
-            0xA9A5DA619BE4D7AF,
-            0xCA2590F56884FDC6,
-            0x19BE9F660038BDB5,
-        );
-
-        let mut a_decomp = [_mm_setzero_si128(); 4];
-        unroll! {a_decomp[4] = _mm512_extracti32x4_epi32::<4>(a);}
-        let mut b_decomp = [_mm_setzero_si128(); 4];
-        unroll! {b_decomp[4] = _mm512_extracti32x4_epi32::<4>(b);}
-
-        let r = vectorized(a, b);
-        let mut e_decomp = [_mm_setzero_si128(); 4];
-        for i in 0..4 {
-            e_decomp[i] = linear(a_decomp[i], b_decomp[i]);
-        }
-        unroll! {assert_eq_m128i(_mm512_extracti32x4_epi32::<4>(r),e_decomp[4]);}
-    }
-
-    // this function tests one of the possible 4 instances
-    // with different inputs across lanes for the VL version
-    #[target_feature(enable = "avx512vpclmulqdq,avx512vl")]
-    unsafe fn verify_256_helper(
-        linear: unsafe fn(__m128i, __m128i) -> __m128i,
-        vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
-    ) {
-        let a = _mm512_set_epi64(
-            0xDCB4DB3657BF0B7D,
-            0x18DB0601068EDD9F,
-            0xB76B908233200DC5,
-            0xE478235FA8E22D5E,
-            0xAB05CFFA2621154C,
-            0x1171B47A186174C9,
-            0x8C6B6C0E7595CEC9,
-            0xBE3E7D4934E961BD,
-        );
-        let b = _mm512_set_epi64(
-            0x672F6F105A94CEA7,
-            0x8298B8FFCA5F829C,
-            0xA3927047B3FB61D8,
-            0x978093862CDE7187,
-            0xB1927AB22F31D0EC,
-            0xA9A5DA619BE4D7AF,
-            0xCA2590F56884FDC6,
-            0x19BE9F660038BDB5,
-        );
-
-        let mut a_decomp = [_mm_setzero_si128(); 2];
-        unroll! {a_decomp[2] = _mm512_extracti32x4_epi32::<2>(a);}
-        let mut b_decomp = [_mm_setzero_si128(); 2];
-        unroll! {b_decomp[2] = _mm512_extracti32x4_epi32::<2>(b);}
-
-        let r = vectorized(
-            _mm512_extracti64x4_epi64::<0>(a),
-            _mm512_extracti64x4_epi64::<0>(b),
-        );
-        let mut e_decomp = [_mm_setzero_si128(); 2];
-        for i in 0..2 {
-            e_decomp[i] = linear(a_decomp[i], b_decomp[i]);
-        }
-        unroll! {assert_eq_m128i(_mm256_extracti128_si256::<2>(r),e_decomp[2]);}
-    }
-
-    #[simd_test(enable = "avx512vpclmulqdq,avx512f")]
-    unsafe fn test_mm512_clmulepi64_epi128() {
-        verify_kat_pclmul!(
-            _mm512_broadcast_i32x4,
-            _mm512_clmulepi64_epi128,
-            assert_eq_m512i
-        );
-
-        verify_512_helper(
-            |a, b| _mm_clmulepi64_si128::<0x00>(a, b),
-            |a, b| _mm512_clmulepi64_epi128::<0x00>(a, b),
-        );
-        verify_512_helper(
-            |a, b| _mm_clmulepi64_si128::<0x01>(a, b),
-            |a, b| _mm512_clmulepi64_epi128::<0x01>(a, b),
-        );
-        verify_512_helper(
-            |a, b| _mm_clmulepi64_si128::<0x10>(a, b),
-            |a, b| _mm512_clmulepi64_epi128::<0x10>(a, b),
-        );
-        verify_512_helper(
-            |a, b| _mm_clmulepi64_si128::<0x11>(a, b),
-            |a, b| _mm512_clmulepi64_epi128::<0x11>(a, b),
-        );
-    }
-
-    #[simd_test(enable = "avx512vpclmulqdq,avx512vl")]
-    unsafe fn test_mm256_clmulepi64_epi128() {
-        verify_kat_pclmul!(
-            _mm256_broadcastsi128_si256,
-            _mm256_clmulepi64_epi128,
-            assert_eq_m256i
-        );
-
-        verify_256_helper(
-            |a, b| _mm_clmulepi64_si128::<0x00>(a, b),
-            |a, b| _mm256_clmulepi64_epi128::<0x00>(a, b),
-        );
-        verify_256_helper(
-            |a, b| _mm_clmulepi64_si128::<0x01>(a, b),
-            |a, b| _mm256_clmulepi64_epi128::<0x01>(a, b),
-        );
-        verify_256_helper(
-            |a, b| _mm_clmulepi64_si128::<0x10>(a, b),
-            |a, b| _mm256_clmulepi64_epi128::<0x10>(a, b),
-        );
-        verify_256_helper(
-            |a, b| _mm_clmulepi64_si128::<0x11>(a, b),
-            |a, b| _mm256_clmulepi64_epi128::<0x11>(a, b),
-        );
-    }
-}
diff --git a/library/stdarch/crates/core_arch/src/x86/gfni.rs b/library/stdarch/crates/core_arch/src/x86/gfni.rs
new file mode 100644
index 00000000000..679b2548a97
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86/gfni.rs
@@ -0,0 +1,1492 @@
+//! Galois Field New Instructions (GFNI)
+//!
+//! The intrinsics here correspond to those in the `immintrin.h` C header.
+//!
+//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
+//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+
+use crate::core_arch::simd::i8x16;
+use crate::core_arch::simd::i8x32;
+use crate::core_arch::simd::i8x64;
+use crate::core_arch::simd_llvm::simd_select_bitmask;
+use crate::core_arch::x86::__m128i;
+use crate::core_arch::x86::__m256i;
+use crate::core_arch::x86::__m512i;
+use crate::core_arch::x86::__mmask16;
+use crate::core_arch::x86::__mmask32;
+use crate::core_arch::x86::__mmask64;
+use crate::core_arch::x86::_mm256_setzero_si256;
+use crate::core_arch::x86::_mm512_setzero_si512;
+use crate::core_arch::x86::_mm_setzero_si128;
+use crate::core_arch::x86::m128iExt;
+use crate::core_arch::x86::m256iExt;
+use crate::core_arch::x86::m512iExt;
+use crate::mem::transmute;
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.x86.vgf2p8affineinvqb.512"]
+    fn vgf2p8affineinvqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64;
+    #[link_name = "llvm.x86.vgf2p8affineinvqb.256"]
+    fn vgf2p8affineinvqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32;
+    #[link_name = "llvm.x86.vgf2p8affineinvqb.128"]
+    fn vgf2p8affineinvqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16;
+    #[link_name = "llvm.x86.vgf2p8affineqb.512"]
+    fn vgf2p8affineqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64;
+    #[link_name = "llvm.x86.vgf2p8affineqb.256"]
+    fn vgf2p8affineqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32;
+    #[link_name = "llvm.x86.vgf2p8affineqb.128"]
+    fn vgf2p8affineqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16;
+    #[link_name = "llvm.x86.vgf2p8mulb.512"]
+    fn vgf2p8mulb_512(a: i8x64, b: i8x64) -> i8x64;
+    #[link_name = "llvm.x86.vgf2p8mulb.256"]
+    fn vgf2p8mulb_256(a: i8x32, b: i8x32) -> i8x32;
+    #[link_name = "llvm.x86.vgf2p8mulb.128"]
+    fn vgf2p8mulb_128(a: i8x16, b: i8x16) -> i8x16;
+}
+
+// LLVM requires AVX512BW for a lot of these instructions, see
+// https://github.com/llvm/llvm-project/blob/release/9.x/clang/include/clang/Basic/BuiltinsX86.def#L457
+// however our tests also require the target feature list to match Intel's
+// which *doesn't* require AVX512BW but only AVX512F, so we added the redundant AVX512F
+// requirement (for now)
+// also see
+// https://github.com/llvm/llvm-project/blob/release/9.x/clang/lib/Headers/gfniintrin.h
+// for forcing GFNI, BW and optionally VL extension
+
+/// Performs a multiplication in GF(2^8) on the packed bytes.
+/// The field is in polynomial representation with the reduction polynomial
+///  x^8 + x^4 + x^3 + x + 1.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8mul_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
+#[cfg_attr(test, assert_instr(vgf2p8mulb))]
+pub unsafe fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i {
+    transmute(vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()))
+}
+
+/// Performs a multiplication in GF(2^8) on the packed bytes.
+/// The field is in polynomial representation with the reduction polynomial
+///  x^8 + x^4 + x^3 + x + 1.
+///
+/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8mul_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
+#[cfg_attr(test, assert_instr(vgf2p8mulb))]
+pub unsafe fn _mm512_mask_gf2p8mul_epi8(
+    src: __m512i,
+    k: __mmask64,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
+    transmute(simd_select_bitmask(
+        k,
+        vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()),
+        src.as_i8x64(),
+    ))
+}
+
+/// Performs a multiplication in GF(2^8) on the packed bytes.
+/// The field is in polynomial representation with the reduction polynomial
+///  x^8 + x^4 + x^3 + x + 1.
+///
+/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8mul_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
+#[cfg_attr(test, assert_instr(vgf2p8mulb))]
+pub unsafe fn _mm512_maskz_gf2p8mul_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+    let zero = _mm512_setzero_si512().as_i8x64();
+    transmute(simd_select_bitmask(
+        k,
+        vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()),
+        zero,
+    ))
+}
+
+/// Performs a multiplication in GF(2^8) on the packed bytes.
+/// The field is in polynomial representation with the reduction polynomial
+///  x^8 + x^4 + x^3 + x + 1.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8mul_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx")]
+#[cfg_attr(test, assert_instr(vgf2p8mulb))]
+pub unsafe fn _mm256_gf2p8mul_epi8(a: __m256i, b: __m256i) -> __m256i {
+    transmute(vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()))
+}
+
+/// Performs a multiplication in GF(2^8) on the packed bytes.
+/// The field is in polynomial representation with the reduction polynomial
+///  x^8 + x^4 + x^3 + x + 1.
+///
+/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8mul_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
+#[cfg_attr(test, assert_instr(vgf2p8mulb))]
+pub unsafe fn _mm256_mask_gf2p8mul_epi8(
+    src: __m256i,
+    k: __mmask32,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
+    transmute(simd_select_bitmask(
+        k,
+        vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()),
+        src.as_i8x32(),
+    ))
+}
+
+/// Performs a multiplication in GF(2^8) on the packed bytes.
+/// The field is in polynomial representation with the reduction polynomial
+///  x^8 + x^4 + x^3 + x + 1.
+///
+/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8mul_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
+#[cfg_attr(test, assert_instr(vgf2p8mulb))]
+pub unsafe fn _mm256_maskz_gf2p8mul_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+    let zero = _mm256_setzero_si256().as_i8x32();
+    transmute(simd_select_bitmask(
+        k,
+        vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()),
+        zero,
+    ))
+}
+
+/// Performs a multiplication in GF(2^8) on the packed bytes.
+/// The field is in polynomial representation with the reduction polynomial
+///  x^8 + x^4 + x^3 + x + 1.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8mul_epi8)
+#[inline]
+#[target_feature(enable = "gfni")]
+#[cfg_attr(test, assert_instr(gf2p8mulb))]
+pub unsafe fn _mm_gf2p8mul_epi8(a: __m128i, b: __m128i) -> __m128i {
+    transmute(vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()))
+}
+
+/// Performs a multiplication in GF(2^8) on the packed bytes.
+/// The field is in polynomial representation with the reduction polynomial
+///  x^8 + x^4 + x^3 + x + 1.
+///
+/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8mul_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
+#[cfg_attr(test, assert_instr(vgf2p8mulb))]
+pub unsafe fn _mm_mask_gf2p8mul_epi8(
+    src: __m128i,
+    k: __mmask16,
+    a: __m128i,
+    b: __m128i,
+) -> __m128i {
+    transmute(simd_select_bitmask(
+        k,
+        vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()),
+        src.as_i8x16(),
+    ))
+}
+
+/// Performs a multiplication in GF(2^8) on the packed bytes.
+/// The field is in polynomial representation with the reduction polynomial
+///  x^8 + x^4 + x^3 + x + 1.
+///
+/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8mul_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
+#[cfg_attr(test, assert_instr(vgf2p8mulb))]
+pub unsafe fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+    let zero = _mm_setzero_si128().as_i8x16();
+    transmute(simd_select_bitmask(
+        k,
+        vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()),
+        zero,
+    ))
+}
+
+/// Performs an affine transformation on the packed bytes in x.
+/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8affine_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
+#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_gf2p8affine_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let x = x.as_i8x64();
+    let a = a.as_i8x64();
+    let r = vgf2p8affineqb_512(x, a, b);
+    transmute(r)
+}
+
+/// Performs an affine transformation on the packed bytes in x.
+/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8affine_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
+#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm512_maskz_gf2p8affine_epi64_epi8<const B: i32>(
+    k: __mmask64,
+    x: __m512i,
+    a: __m512i,
+) -> __m512i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let zero = _mm512_setzero_si512().as_i8x64();
+    let x = x.as_i8x64();
+    let a = a.as_i8x64();
+    let r = vgf2p8affineqb_512(x, a, b);
+    transmute(simd_select_bitmask(k, r, zero))
+}
+
+/// Performs an affine transformation on the packed bytes in x.
+/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8affine_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
+#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
+#[rustc_legacy_const_generics(4)]
+pub unsafe fn _mm512_mask_gf2p8affine_epi64_epi8<const B: i32>(
+    src: __m512i,
+    k: __mmask64,
+    x: __m512i,
+    a: __m512i,
+) -> __m512i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let x = x.as_i8x64();
+    let a = a.as_i8x64();
+    let r = vgf2p8affineqb_512(x, a, b);
+    transmute(simd_select_bitmask(k, r, src.as_i8x64()))
+}
+
+/// Performs an affine transformation on the packed bytes in x.
+/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8affine_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx")]
+#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_gf2p8affine_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let x = x.as_i8x32();
+    let a = a.as_i8x32();
+    let r = vgf2p8affineqb_256(x, a, b);
+    transmute(r)
+}
+
+/// Performs an affine transformation on the packed bytes in x.
+/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8affine_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
+#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm256_maskz_gf2p8affine_epi64_epi8<const B: i32>(
+    k: __mmask32,
+    x: __m256i,
+    a: __m256i,
+) -> __m256i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let zero = _mm256_setzero_si256().as_i8x32();
+    let x = x.as_i8x32();
+    let a = a.as_i8x32();
+    let r = vgf2p8affineqb_256(x, a, b);
+    transmute(simd_select_bitmask(k, r, zero))
+}
+
+/// Performs an affine transformation on the packed bytes in x.
+/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8affine_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
+#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
+#[rustc_legacy_const_generics(4)]
+pub unsafe fn _mm256_mask_gf2p8affine_epi64_epi8<const B: i32>(
+    src: __m256i,
+    k: __mmask32,
+    x: __m256i,
+    a: __m256i,
+) -> __m256i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let x = x.as_i8x32();
+    let a = a.as_i8x32();
+    let r = vgf2p8affineqb_256(x, a, b);
+    transmute(simd_select_bitmask(k, r, src.as_i8x32()))
+}
+
+/// Performs an affine transformation on the packed bytes in x.
+/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8affine_epi8)
+#[inline]
+#[target_feature(enable = "gfni")]
+#[cfg_attr(test, assert_instr(gf2p8affineqb, B = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_gf2p8affine_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let x = x.as_i8x16();
+    let a = a.as_i8x16();
+    let r = vgf2p8affineqb_128(x, a, b);
+    transmute(r)
+}
+
+/// Performs an affine transformation on the packed bytes in x.
+/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8affine_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
+#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm_maskz_gf2p8affine_epi64_epi8<const B: i32>(
+    k: __mmask16,
+    x: __m128i,
+    a: __m128i,
+) -> __m128i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let zero = _mm_setzero_si128().as_i8x16();
+    let x = x.as_i8x16();
+    let a = a.as_i8x16();
+    let r = vgf2p8affineqb_128(x, a, b);
+    transmute(simd_select_bitmask(k, r, zero))
+}
+
+/// Performs an affine transformation on the packed bytes in x.
+/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8affine_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
+#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
+#[rustc_legacy_const_generics(4)]
+pub unsafe fn _mm_mask_gf2p8affine_epi64_epi8<const B: i32>(
+    src: __m128i,
+    k: __mmask16,
+    x: __m128i,
+    a: __m128i,
+) -> __m128i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let x = x.as_i8x16();
+    let a = a.as_i8x16();
+    let r = vgf2p8affineqb_128(x, a, b);
+    transmute(simd_select_bitmask(k, r, src.as_i8x16()))
+}
+
+/// Performs an affine transformation on the inverted packed bytes in x.
+/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
+/// The inverse of 0 is 0.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8affineinv_epi64_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
+#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let x = x.as_i8x64();
+    let a = a.as_i8x64();
+    let r = vgf2p8affineinvqb_512(x, a, b);
+    transmute(r)
+}
+
+/// Performs an affine transformation on the inverted packed bytes in x.
+/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
+/// The inverse of 0 is 0.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8affineinv_epi64_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
+#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm512_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
+    k: __mmask64,
+    x: __m512i,
+    a: __m512i,
+) -> __m512i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let zero = _mm512_setzero_si512().as_i8x64();
+    let x = x.as_i8x64();
+    let a = a.as_i8x64();
+    let r = vgf2p8affineinvqb_512(x, a, b);
+    transmute(simd_select_bitmask(k, r, zero))
+}
+
+/// Performs an affine transformation on the inverted packed bytes in x.
+/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
+/// The inverse of 0 is 0.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8affineinv_epi64_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
+#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
+#[rustc_legacy_const_generics(4)]
+pub unsafe fn _mm512_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
+    src: __m512i,
+    k: __mmask64,
+    x: __m512i,
+    a: __m512i,
+) -> __m512i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let x = x.as_i8x64();
+    let a = a.as_i8x64();
+    let r = vgf2p8affineinvqb_512(x, a, b);
+    transmute(simd_select_bitmask(k, r, src.as_i8x64()))
+}
+
+/// Performs an affine transformation on the inverted packed bytes in x.
+/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
+/// The inverse of 0 is 0.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8affineinv_epi64_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx")]
+#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let x = x.as_i8x32();
+    let a = a.as_i8x32();
+    let r = vgf2p8affineinvqb_256(x, a, b);
+    transmute(r)
+}
+
+/// Performs an affine transformation on the inverted packed bytes in x.
+/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
+/// The inverse of 0 is 0.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8affineinv_epi64_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
+#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm256_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
+    k: __mmask32,
+    x: __m256i,
+    a: __m256i,
+) -> __m256i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let zero = _mm256_setzero_si256().as_i8x32();
+    let x = x.as_i8x32();
+    let a = a.as_i8x32();
+    let r = vgf2p8affineinvqb_256(x, a, b);
+    transmute(simd_select_bitmask(k, r, zero))
+}
+
+/// Performs an affine transformation on the inverted packed bytes in x.
+/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
+/// The inverse of 0 is 0.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8affineinv_epi64_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
+#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
+#[rustc_legacy_const_generics(4)]
+pub unsafe fn _mm256_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
+    src: __m256i,
+    k: __mmask32,
+    x: __m256i,
+    a: __m256i,
+) -> __m256i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let x = x.as_i8x32();
+    let a = a.as_i8x32();
+    let r = vgf2p8affineinvqb_256(x, a, b);
+    transmute(simd_select_bitmask(k, r, src.as_i8x32()))
+}
+
+/// Performs an affine transformation on the inverted packed bytes in x.
+/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
+/// The inverse of 0 is 0.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8affineinv_epi64_epi8)
+#[inline]
+#[target_feature(enable = "gfni")]
+#[cfg_attr(test, assert_instr(gf2p8affineinvqb, B = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let x = x.as_i8x16();
+    let a = a.as_i8x16();
+    let r = vgf2p8affineinvqb_128(x, a, b);
+    transmute(r)
+}
+
+/// Performs an affine transformation on the inverted packed bytes in x.
+/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
+/// The inverse of 0 is 0.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8affineinv_epi64_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
+#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
+    k: __mmask16,
+    x: __m128i,
+    a: __m128i,
+) -> __m128i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let zero = _mm_setzero_si128().as_i8x16();
+    let x = x.as_i8x16();
+    let a = a.as_i8x16();
+    let r = vgf2p8affineinvqb_128(x, a, b);
+    transmute(simd_select_bitmask(k, r, zero))
+}
+
+/// Performs an affine transformation on the inverted packed bytes in x.
+/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix
+/// and b being a constant 8-bit immediate value.
+/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1.
+/// The inverse of 0 is 0.
+/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a.
+///
+/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
+/// Otherwise the computation result is written into the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8affineinv_epi64_epi8)
+#[inline]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
+#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
+#[rustc_legacy_const_generics(4)]
+pub unsafe fn _mm_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
+    src: __m128i,
+    k: __mmask16,
+    x: __m128i,
+    a: __m128i,
+) -> __m128i {
+    static_assert_imm8!(B);
+    let b = B as u8;
+    let x = x.as_i8x16();
+    let a = a.as_i8x16();
+    let r = vgf2p8affineinvqb_128(x, a, b);
+    transmute(simd_select_bitmask(k, r, src.as_i8x16()))
+}
+
+#[cfg(test)]
+mod tests {
+    // The constants in the tests below are just bit patterns. They should not
+    // be interpreted as integers; signedness does not make sense for them, but
+    // __mXXXi happens to be defined in terms of signed integers.
+    #![allow(overflowing_literals)]
+
+    use core::hint::black_box;
+    use core::intrinsics::size_of;
+    use stdarch_test::simd_test;
+
+    use crate::core_arch::x86::*;
+
+    fn mulbyte(left: u8, right: u8) -> u8 {
+        // this implementation follows the description in
+        // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8mul_epi8
+        const REDUCTION_POLYNOMIAL: u16 = 0x11b;
+        let left: u16 = left.into();
+        let right: u16 = right.into();
+        let mut carryless_product: u16 = 0;
+
+        // Carryless multiplication
+        for i in 0..8 {
+            if ((left >> i) & 0x01) != 0 {
+                carryless_product ^= right << i;
+            }
+        }
+
+        // reduction, adding in "0" where appropriate to clear out high bits
+        // note that REDUCTION_POLYNOMIAL is zero in this context
+        for i in (8..=14).rev() {
+            if ((carryless_product >> i) & 0x01) != 0 {
+                carryless_product ^= REDUCTION_POLYNOMIAL << (i - 8);
+            }
+        }
+
+        carryless_product as u8
+    }
+
+    const NUM_TEST_WORDS_512: usize = 4;
+    const NUM_TEST_WORDS_256: usize = NUM_TEST_WORDS_512 * 2;
+    const NUM_TEST_WORDS_128: usize = NUM_TEST_WORDS_256 * 2;
+    const NUM_TEST_ENTRIES: usize = NUM_TEST_WORDS_512 * 64;
+    const NUM_TEST_WORDS_64: usize = NUM_TEST_WORDS_128 * 2;
+    const NUM_BYTES: usize = 256;
+    const NUM_BYTES_WORDS_128: usize = NUM_BYTES / 16;
+    const NUM_BYTES_WORDS_256: usize = NUM_BYTES_WORDS_128 / 2;
+    const NUM_BYTES_WORDS_512: usize = NUM_BYTES_WORDS_256 / 2;
+
+    fn parity(input: u8) -> u8 {
+        let mut accumulator = 0;
+        for i in 0..8 {
+            accumulator ^= (input >> i) & 0x01;
+        }
+        accumulator
+    }
+
+    fn mat_vec_multiply_affine(matrix: u64, x: u8, b: u8) -> u8 {
+        // this implementation follows the description in
+        // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8affine_epi64_epi8
+        let mut accumulator = 0;
+
+        for bit in 0..8 {
+            accumulator |= parity(x & matrix.to_le_bytes()[bit]) << (7 - bit);
+        }
+
+        accumulator ^ b
+    }
+
+    fn generate_affine_mul_test_data(
+        immediate: u8,
+    ) -> (
+        [u64; NUM_TEST_WORDS_64],
+        [u8; NUM_TEST_ENTRIES],
+        [u8; NUM_TEST_ENTRIES],
+    ) {
+        let mut left: [u64; NUM_TEST_WORDS_64] = [0; NUM_TEST_WORDS_64];
+        let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
+        let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
+
+        for i in 0..NUM_TEST_WORDS_64 {
+            left[i] = (i as u64) * 103 * 101;
+            for j in 0..8 {
+                let j64 = j as u64;
+                right[i * 8 + j] = ((left[i] + j64) % 256) as u8;
+                result[i * 8 + j] = mat_vec_multiply_affine(left[i], right[i * 8 + j], immediate);
+            }
+        }
+
+        (left, right, result)
+    }
+
+    fn generate_inv_tests_data() -> ([u8; NUM_BYTES], [u8; NUM_BYTES]) {
+        let mut input: [u8; NUM_BYTES] = [0; NUM_BYTES];
+        let mut result: [u8; NUM_BYTES] = [0; NUM_BYTES];
+
+        for i in 0..NUM_BYTES {
+            input[i] = (i % 256) as u8;
+            result[i] = if i == 0 { 0 } else { 1 };
+        }
+
+        (input, result)
+    }
+
+    const AES_S_BOX: [u8; NUM_BYTES] = [
+        0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab,
+        0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4,
+        0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71,
+        0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
+        0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6,
+        0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb,
+        0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45,
+        0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
+        0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44,
+        0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a,
+        0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49,
+        0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d,
+        0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25,
+        0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e,
+        0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1,
+        0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb,
+        0x16,
+    ];
+
+    fn generate_byte_mul_test_data() -> (
+        [u8; NUM_TEST_ENTRIES],
+        [u8; NUM_TEST_ENTRIES],
+        [u8; NUM_TEST_ENTRIES],
+    ) {
+        let mut left: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
+        let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
+        let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
+
+        for i in 0..NUM_TEST_ENTRIES {
+            left[i] = (i % 256) as u8;
+            right[i] = left[i].wrapping_mul(101);
+            result[i] = mulbyte(left[i], right[i]);
+        }
+
+        (left, right, result)
+    }
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn load_m128i_word<T>(data: &[T], word_index: usize) -> __m128i {
+        let byte_offset = word_index * 16 / size_of::<T>();
+        let pointer = data.as_ptr().add(byte_offset) as *const __m128i;
+        _mm_loadu_si128(black_box(pointer))
+    }
+
+    #[target_feature(enable = "avx")]
+    unsafe fn load_m256i_word<T>(data: &[T], word_index: usize) -> __m256i {
+        let byte_offset = word_index * 32 / size_of::<T>();
+        let pointer = data.as_ptr().add(byte_offset) as *const __m256i;
+        _mm256_loadu_si256(black_box(pointer))
+    }
+
+    #[target_feature(enable = "avx512f")]
+    unsafe fn load_m512i_word<T>(data: &[T], word_index: usize) -> __m512i {
+        let byte_offset = word_index * 64 / size_of::<T>();
+        let pointer = data.as_ptr().add(byte_offset) as *const i32;
+        _mm512_loadu_si512(black_box(pointer))
+    }
+
+    #[simd_test(enable = "gfni,avx512bw")]
+    unsafe fn test_mm512_gf2p8mul_epi8() {
+        let (left, right, expected) = generate_byte_mul_test_data();
+
+        for i in 0..NUM_TEST_WORDS_512 {
+            let left = load_m512i_word(&left, i);
+            let right = load_m512i_word(&right, i);
+            let expected = load_m512i_word(&expected, i);
+            let result = _mm512_gf2p8mul_epi8(left, right);
+            assert_eq_m512i(result, expected);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw")]
+    unsafe fn test_mm512_maskz_gf2p8mul_epi8() {
+        let (left, right, _expected) = generate_byte_mul_test_data();
+
+        for i in 0..NUM_TEST_WORDS_512 {
+            let left = load_m512i_word(&left, i);
+            let right = load_m512i_word(&right, i);
+            let result_zero = _mm512_maskz_gf2p8mul_epi8(0, left, right);
+            assert_eq_m512i(result_zero, _mm512_setzero_si512());
+            let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
+            let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
+            let expected_result = _mm512_gf2p8mul_epi8(left, right);
+            let result_masked = _mm512_maskz_gf2p8mul_epi8(mask_bytes, left, right);
+            let expected_masked =
+                _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
+            assert_eq_m512i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw")]
+    unsafe fn test_mm512_mask_gf2p8mul_epi8() {
+        let (left, right, _expected) = generate_byte_mul_test_data();
+
+        for i in 0..NUM_TEST_WORDS_512 {
+            let left = load_m512i_word(&left, i);
+            let right = load_m512i_word(&right, i);
+            let result_left = _mm512_mask_gf2p8mul_epi8(left, 0, left, right);
+            assert_eq_m512i(result_left, left);
+            let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
+            let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
+            let expected_result = _mm512_gf2p8mul_epi8(left, right);
+            let result_masked = _mm512_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
+            let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
+            assert_eq_m512i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm256_gf2p8mul_epi8() {
+        let (left, right, expected) = generate_byte_mul_test_data();
+
+        for i in 0..NUM_TEST_WORDS_256 {
+            let left = load_m256i_word(&left, i);
+            let right = load_m256i_word(&right, i);
+            let expected = load_m256i_word(&expected, i);
+            let result = _mm256_gf2p8mul_epi8(left, right);
+            assert_eq_m256i(result, expected);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm256_maskz_gf2p8mul_epi8() {
+        let (left, right, _expected) = generate_byte_mul_test_data();
+
+        for i in 0..NUM_TEST_WORDS_256 {
+            let left = load_m256i_word(&left, i);
+            let right = load_m256i_word(&right, i);
+            let result_zero = _mm256_maskz_gf2p8mul_epi8(0, left, right);
+            assert_eq_m256i(result_zero, _mm256_setzero_si256());
+            let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
+            const MASK_WORDS: i32 = 0b01_10_11_00;
+            let expected_result = _mm256_gf2p8mul_epi8(left, right);
+            let result_masked = _mm256_maskz_gf2p8mul_epi8(mask_bytes, left, right);
+            let expected_masked =
+                _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
+            assert_eq_m256i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm256_mask_gf2p8mul_epi8() {
+        let (left, right, _expected) = generate_byte_mul_test_data();
+
+        for i in 0..NUM_TEST_WORDS_256 {
+            let left = load_m256i_word(&left, i);
+            let right = load_m256i_word(&right, i);
+            let result_left = _mm256_mask_gf2p8mul_epi8(left, 0, left, right);
+            assert_eq_m256i(result_left, left);
+            let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
+            const MASK_WORDS: i32 = 0b01_10_11_00;
+            let expected_result = _mm256_gf2p8mul_epi8(left, right);
+            let result_masked = _mm256_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
+            let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
+            assert_eq_m256i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm_gf2p8mul_epi8() {
+        let (left, right, expected) = generate_byte_mul_test_data();
+
+        for i in 0..NUM_TEST_WORDS_128 {
+            let left = load_m128i_word(&left, i);
+            let right = load_m128i_word(&right, i);
+            let expected = load_m128i_word(&expected, i);
+            let result = _mm_gf2p8mul_epi8(left, right);
+            assert_eq_m128i(result, expected);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm_maskz_gf2p8mul_epi8() {
+        let (left, right, _expected) = generate_byte_mul_test_data();
+
+        for i in 0..NUM_TEST_WORDS_128 {
+            let left = load_m128i_word(&left, i);
+            let right = load_m128i_word(&right, i);
+            let result_zero = _mm_maskz_gf2p8mul_epi8(0, left, right);
+            assert_eq_m128i(result_zero, _mm_setzero_si128());
+            let mask_bytes: __mmask16 = 0x0F_F0;
+            const MASK_WORDS: i32 = 0b01_10;
+            let expected_result = _mm_gf2p8mul_epi8(left, right);
+            let result_masked = _mm_maskz_gf2p8mul_epi8(mask_bytes, left, right);
+            let expected_masked =
+                _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
+            assert_eq_m128i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm_mask_gf2p8mul_epi8() {
+        let (left, right, _expected) = generate_byte_mul_test_data();
+
+        for i in 0..NUM_TEST_WORDS_128 {
+            let left = load_m128i_word(&left, i);
+            let right = load_m128i_word(&right, i);
+            let result_left = _mm_mask_gf2p8mul_epi8(left, 0, left, right);
+            assert_eq_m128i(result_left, left);
+            let mask_bytes: __mmask16 = 0x0F_F0;
+            const MASK_WORDS: i32 = 0b01_10;
+            let expected_result = _mm_gf2p8mul_epi8(left, right);
+            let result_masked = _mm_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
+            let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
+            assert_eq_m128i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw")]
+    unsafe fn test_mm512_gf2p8affine_epi64_epi8() {
+        let identity: i64 = 0x01_02_04_08_10_20_40_80;
+        const IDENTITY_BYTE: i32 = 0;
+        let constant: i64 = 0;
+        const CONSTANT_BYTE: i32 = 0x63;
+        let identity = _mm512_set1_epi64(identity);
+        let constant = _mm512_set1_epi64(constant);
+        let constant_reference = _mm512_set1_epi8(CONSTANT_BYTE as i8);
+
+        let (bytes, more_bytes, _) = generate_byte_mul_test_data();
+        let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_512 {
+            let data = load_m512i_word(&bytes, i);
+            let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
+            assert_eq_m512i(result, data);
+            let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
+            assert_eq_m512i(result, constant_reference);
+            let data = load_m512i_word(&more_bytes, i);
+            let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
+            assert_eq_m512i(result, data);
+            let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
+            assert_eq_m512i(result, constant_reference);
+
+            let matrix = load_m512i_word(&matrices, i);
+            let vector = load_m512i_word(&vectors, i);
+            let reference = load_m512i_word(&references, i);
+
+            let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
+            assert_eq_m512i(result, reference);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw")]
+    unsafe fn test_mm512_maskz_gf2p8affine_epi64_epi8() {
+        const CONSTANT_BYTE: i32 = 0x63;
+        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_512 {
+            let matrix = load_m512i_word(&matrices, i);
+            let vector = load_m512i_word(&vectors, i);
+            let result_zero =
+                _mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
+            assert_eq_m512i(result_zero, _mm512_setzero_si512());
+            let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
+            let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
+            let expected_result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
+            let result_masked =
+                _mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
+            let expected_masked =
+                _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
+            assert_eq_m512i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw")]
+    unsafe fn test_mm512_mask_gf2p8affine_epi64_epi8() {
+        const CONSTANT_BYTE: i32 = 0x63;
+        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_512 {
+            let left = load_m512i_word(&vectors, i);
+            let right = load_m512i_word(&matrices, i);
+            let result_left =
+                _mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
+            assert_eq_m512i(result_left, left);
+            let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
+            let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
+            let expected_result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
+            let result_masked =
+                _mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
+            let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
+            assert_eq_m512i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm256_gf2p8affine_epi64_epi8() {
+        let identity: i64 = 0x01_02_04_08_10_20_40_80;
+        const IDENTITY_BYTE: i32 = 0;
+        let constant: i64 = 0;
+        const CONSTANT_BYTE: i32 = 0x63;
+        let identity = _mm256_set1_epi64x(identity);
+        let constant = _mm256_set1_epi64x(constant);
+        let constant_reference = _mm256_set1_epi8(CONSTANT_BYTE as i8);
+
+        let (bytes, more_bytes, _) = generate_byte_mul_test_data();
+        let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_256 {
+            let data = load_m256i_word(&bytes, i);
+            let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
+            assert_eq_m256i(result, data);
+            let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
+            assert_eq_m256i(result, constant_reference);
+            let data = load_m256i_word(&more_bytes, i);
+            let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
+            assert_eq_m256i(result, data);
+            let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
+            assert_eq_m256i(result, constant_reference);
+
+            let matrix = load_m256i_word(&matrices, i);
+            let vector = load_m256i_word(&vectors, i);
+            let reference = load_m256i_word(&references, i);
+
+            let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
+            assert_eq_m256i(result, reference);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm256_maskz_gf2p8affine_epi64_epi8() {
+        const CONSTANT_BYTE: i32 = 0x63;
+        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_256 {
+            let matrix = load_m256i_word(&matrices, i);
+            let vector = load_m256i_word(&vectors, i);
+            let result_zero =
+                _mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
+            assert_eq_m256i(result_zero, _mm256_setzero_si256());
+            let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
+            const MASK_WORDS: i32 = 0b11_01_10_00;
+            let expected_result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
+            let result_masked =
+                _mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
+            let expected_masked =
+                _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
+            assert_eq_m256i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm256_mask_gf2p8affine_epi64_epi8() {
+        const CONSTANT_BYTE: i32 = 0x63;
+        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_256 {
+            let left = load_m256i_word(&vectors, i);
+            let right = load_m256i_word(&matrices, i);
+            let result_left =
+                _mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
+            assert_eq_m256i(result_left, left);
+            let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
+            const MASK_WORDS: i32 = 0b11_01_10_00;
+            let expected_result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
+            let result_masked =
+                _mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
+            let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
+            assert_eq_m256i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm_gf2p8affine_epi64_epi8() {
+        let identity: i64 = 0x01_02_04_08_10_20_40_80;
+        const IDENTITY_BYTE: i32 = 0;
+        let constant: i64 = 0;
+        const CONSTANT_BYTE: i32 = 0x63;
+        let identity = _mm_set1_epi64x(identity);
+        let constant = _mm_set1_epi64x(constant);
+        let constant_reference = _mm_set1_epi8(CONSTANT_BYTE as i8);
+
+        let (bytes, more_bytes, _) = generate_byte_mul_test_data();
+        let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_128 {
+            let data = load_m128i_word(&bytes, i);
+            let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
+            assert_eq_m128i(result, data);
+            let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
+            assert_eq_m128i(result, constant_reference);
+            let data = load_m128i_word(&more_bytes, i);
+            let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
+            assert_eq_m128i(result, data);
+            let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
+            assert_eq_m128i(result, constant_reference);
+
+            let matrix = load_m128i_word(&matrices, i);
+            let vector = load_m128i_word(&vectors, i);
+            let reference = load_m128i_word(&references, i);
+
+            let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
+            assert_eq_m128i(result, reference);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm_maskz_gf2p8affine_epi64_epi8() {
+        const CONSTANT_BYTE: i32 = 0x63;
+        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_128 {
+            let matrix = load_m128i_word(&matrices, i);
+            let vector = load_m128i_word(&vectors, i);
+            let result_zero = _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
+            assert_eq_m128i(result_zero, _mm_setzero_si128());
+            let mask_bytes: __mmask16 = 0x0F_F0;
+            const MASK_WORDS: i32 = 0b01_10;
+            let expected_result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
+            let result_masked =
+                _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
+            let expected_masked =
+                _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
+            assert_eq_m128i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm_mask_gf2p8affine_epi64_epi8() {
+        const CONSTANT_BYTE: i32 = 0x63;
+        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_128 {
+            let left = load_m128i_word(&vectors, i);
+            let right = load_m128i_word(&matrices, i);
+            let result_left =
+                _mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
+            assert_eq_m128i(result_left, left);
+            let mask_bytes: __mmask16 = 0x0F_F0;
+            const MASK_WORDS: i32 = 0b01_10;
+            let expected_result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
+            let result_masked =
+                _mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
+            let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
+            assert_eq_m128i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw")]
+    unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() {
+        let identity: i64 = 0x01_02_04_08_10_20_40_80;
+        const IDENTITY_BYTE: i32 = 0;
+        const CONSTANT_BYTE: i32 = 0x63;
+        let identity = _mm512_set1_epi64(identity);
+
+        // validate inversion
+        let (inputs, results) = generate_inv_tests_data();
+
+        for i in 0..NUM_BYTES_WORDS_512 {
+            let input = load_m512i_word(&inputs, i);
+            let reference = load_m512i_word(&results, i);
+            let result = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
+            let remultiplied = _mm512_gf2p8mul_epi8(result, input);
+            assert_eq_m512i(remultiplied, reference);
+        }
+
+        // validate subsequent affine operation
+        let (matrices, vectors, _affine_expected) =
+            generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_512 {
+            let vector = load_m512i_word(&vectors, i);
+            let matrix = load_m512i_word(&matrices, i);
+
+            let inv_vec = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
+            let reference = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
+            let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
+            assert_eq_m512i(result, reference);
+        }
+
+        // validate everything by virtue of checking against the AES SBox
+        const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
+        let sbox_matrix = _mm512_set1_epi64(AES_S_BOX_MATRIX);
+
+        for i in 0..NUM_BYTES_WORDS_512 {
+            let reference = load_m512i_word(&AES_S_BOX, i);
+            let input = load_m512i_word(&inputs, i);
+            let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
+            assert_eq_m512i(result, reference);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw")]
+    unsafe fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() {
+        const CONSTANT_BYTE: i32 = 0x63;
+        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_512 {
+            let matrix = load_m512i_word(&matrices, i);
+            let vector = load_m512i_word(&vectors, i);
+            let result_zero =
+                _mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
+            assert_eq_m512i(result_zero, _mm512_setzero_si512());
+            let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
+            let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
+            let expected_result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
+            let result_masked =
+                _mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
+            let expected_masked =
+                _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
+            assert_eq_m512i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw")]
+    unsafe fn test_mm512_mask_gf2p8affineinv_epi64_epi8() {
+        const CONSTANT_BYTE: i32 = 0x63;
+        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_512 {
+            let left = load_m512i_word(&vectors, i);
+            let right = load_m512i_word(&matrices, i);
+            let result_left =
+                _mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
+            assert_eq_m512i(result_left, left);
+            let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
+            let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
+            let expected_result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
+            let result_masked = _mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(
+                left, mask_bytes, left, right,
+            );
+            let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
+            assert_eq_m512i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() {
+        let identity: i64 = 0x01_02_04_08_10_20_40_80;
+        const IDENTITY_BYTE: i32 = 0;
+        const CONSTANT_BYTE: i32 = 0x63;
+        let identity = _mm256_set1_epi64x(identity);
+
+        // validate inversion
+        let (inputs, results) = generate_inv_tests_data();
+
+        for i in 0..NUM_BYTES_WORDS_256 {
+            let input = load_m256i_word(&inputs, i);
+            let reference = load_m256i_word(&results, i);
+            let result = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
+            let remultiplied = _mm256_gf2p8mul_epi8(result, input);
+            assert_eq_m256i(remultiplied, reference);
+        }
+
+        // validate subsequent affine operation
+        let (matrices, vectors, _affine_expected) =
+            generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_256 {
+            let vector = load_m256i_word(&vectors, i);
+            let matrix = load_m256i_word(&matrices, i);
+
+            let inv_vec = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
+            let reference = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
+            let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
+            assert_eq_m256i(result, reference);
+        }
+
+        // validate everything by virtue of checking against the AES SBox
+        const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
+        let sbox_matrix = _mm256_set1_epi64x(AES_S_BOX_MATRIX);
+
+        for i in 0..NUM_BYTES_WORDS_256 {
+            let reference = load_m256i_word(&AES_S_BOX, i);
+            let input = load_m256i_word(&inputs, i);
+            let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
+            assert_eq_m256i(result, reference);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() {
+        const CONSTANT_BYTE: i32 = 0x63;
+        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_256 {
+            let matrix = load_m256i_word(&matrices, i);
+            let vector = load_m256i_word(&vectors, i);
+            let result_zero =
+                _mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
+            assert_eq_m256i(result_zero, _mm256_setzero_si256());
+            let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
+            const MASK_WORDS: i32 = 0b11_01_10_00;
+            let expected_result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
+            let result_masked =
+                _mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
+            let expected_masked =
+                _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
+            assert_eq_m256i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm256_mask_gf2p8affineinv_epi64_epi8() {
+        const CONSTANT_BYTE: i32 = 0x63;
+        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_256 {
+            let left = load_m256i_word(&vectors, i);
+            let right = load_m256i_word(&matrices, i);
+            let result_left =
+                _mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
+            assert_eq_m256i(result_left, left);
+            let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
+            const MASK_WORDS: i32 = 0b11_01_10_00;
+            let expected_result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
+            let result_masked = _mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(
+                left, mask_bytes, left, right,
+            );
+            let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
+            assert_eq_m256i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm_gf2p8affineinv_epi64_epi8() {
+        let identity: i64 = 0x01_02_04_08_10_20_40_80;
+        const IDENTITY_BYTE: i32 = 0;
+        const CONSTANT_BYTE: i32 = 0x63;
+        let identity = _mm_set1_epi64x(identity);
+
+        // validate inversion
+        let (inputs, results) = generate_inv_tests_data();
+
+        for i in 0..NUM_BYTES_WORDS_128 {
+            let input = load_m128i_word(&inputs, i);
+            let reference = load_m128i_word(&results, i);
+            let result = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
+            let remultiplied = _mm_gf2p8mul_epi8(result, input);
+            assert_eq_m128i(remultiplied, reference);
+        }
+
+        // validate subsequent affine operation
+        let (matrices, vectors, _affine_expected) =
+            generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_128 {
+            let vector = load_m128i_word(&vectors, i);
+            let matrix = load_m128i_word(&matrices, i);
+
+            let inv_vec = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
+            let reference = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
+            let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
+            assert_eq_m128i(result, reference);
+        }
+
+        // validate everything by virtue of checking against the AES SBox
+        const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
+        let sbox_matrix = _mm_set1_epi64x(AES_S_BOX_MATRIX);
+
+        for i in 0..NUM_BYTES_WORDS_128 {
+            let reference = load_m128i_word(&AES_S_BOX, i);
+            let input = load_m128i_word(&inputs, i);
+            let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
+            assert_eq_m128i(result, reference);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm_maskz_gf2p8affineinv_epi64_epi8() {
+        const CONSTANT_BYTE: i32 = 0x63;
+        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_128 {
+            let matrix = load_m128i_word(&matrices, i);
+            let vector = load_m128i_word(&vectors, i);
+            let result_zero =
+                _mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
+            assert_eq_m128i(result_zero, _mm_setzero_si128());
+            let mask_bytes: __mmask16 = 0x0F_F0;
+            const MASK_WORDS: i32 = 0b01_10;
+            let expected_result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
+            let result_masked =
+                _mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
+            let expected_masked =
+                _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
+            assert_eq_m128i(result_masked, expected_masked);
+        }
+    }
+
+    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    unsafe fn test_mm_mask_gf2p8affineinv_epi64_epi8() {
+        const CONSTANT_BYTE: i32 = 0x63;
+        let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
+
+        for i in 0..NUM_TEST_WORDS_128 {
+            let left = load_m128i_word(&vectors, i);
+            let right = load_m128i_word(&matrices, i);
+            let result_left =
+                _mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
+            assert_eq_m128i(result_left, left);
+            let mask_bytes: __mmask16 = 0x0F_F0;
+            const MASK_WORDS: i32 = 0b01_10;
+            let expected_result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
+            let result_masked =
+                _mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
+            let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
+            assert_eq_m128i(result_masked, expected_masked);
+        }
+    }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs
index 6b50e95b2c1..37045e40e05 100644
--- a/library/stdarch/crates/core_arch/src/x86/mod.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mod.rs
@@ -835,17 +835,17 @@ pub use self::avx512vnni::*;
 mod avx512bitalg;
 pub use self::avx512bitalg::*;
 
-mod avx512gfni;
-pub use self::avx512gfni::*;
+mod gfni;
+pub use self::gfni::*;
 
 mod avx512vpopcntdq;
 pub use self::avx512vpopcntdq::*;
 
-mod avx512vaes;
-pub use self::avx512vaes::*;
+mod vaes;
+pub use self::vaes::*;
 
-mod avx512vpclmulqdq;
-pub use self::avx512vpclmulqdq::*;
+mod vpclmulqdq;
+pub use self::vpclmulqdq::*;
 
 mod bt;
 pub use self::bt::*;
diff --git a/library/stdarch/crates/core_arch/src/x86/vaes.rs b/library/stdarch/crates/core_arch/src/x86/vaes.rs
new file mode 100644
index 00000000000..e09f8a1131b
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86/vaes.rs
@@ -0,0 +1,332 @@
+//! Vectorized AES Instructions (VAES)
+//!
+//! The intrinsics here correspond to those in the `immintrin.h` C header.
+//!
+//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
+//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+
+use crate::core_arch::x86::__m256i;
+use crate::core_arch::x86::__m512i;
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.x86.aesni.aesenc.256"]
+    fn aesenc_256(a: __m256i, round_key: __m256i) -> __m256i;
+    #[link_name = "llvm.x86.aesni.aesenclast.256"]
+    fn aesenclast_256(a: __m256i, round_key: __m256i) -> __m256i;
+    #[link_name = "llvm.x86.aesni.aesdec.256"]
+    fn aesdec_256(a: __m256i, round_key: __m256i) -> __m256i;
+    #[link_name = "llvm.x86.aesni.aesdeclast.256"]
+    fn aesdeclast_256(a: __m256i, round_key: __m256i) -> __m256i;
+    #[link_name = "llvm.x86.aesni.aesenc.512"]
+    fn aesenc_512(a: __m512i, round_key: __m512i) -> __m512i;
+    #[link_name = "llvm.x86.aesni.aesenclast.512"]
+    fn aesenclast_512(a: __m512i, round_key: __m512i) -> __m512i;
+    #[link_name = "llvm.x86.aesni.aesdec.512"]
+    fn aesdec_512(a: __m512i, round_key: __m512i) -> __m512i;
+    #[link_name = "llvm.x86.aesni.aesdeclast.512"]
+    fn aesdeclast_512(a: __m512i, round_key: __m512i) -> __m512i;
+}
+
+/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
+/// the corresponding 128-bit word (key) in `round_key`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenc_epi128)
+#[inline]
+#[target_feature(enable = "vaes")]
+#[cfg_attr(test, assert_instr(vaesenc))]
+pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i {
+    aesenc_256(a, round_key)
+}
+
+/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
+/// the corresponding 128-bit word (key) in `round_key`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenclast_epi128)
+#[inline]
+#[target_feature(enable = "vaes")]
+#[cfg_attr(test, assert_instr(vaesenclast))]
+pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
+    aesenclast_256(a, round_key)
+}
+
+/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
+/// the corresponding 128-bit word (key) in `round_key`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdec_epi128)
+#[inline]
+#[target_feature(enable = "vaes")]
+#[cfg_attr(test, assert_instr(vaesdec))]
+pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i {
+    aesdec_256(a, round_key)
+}
+
+/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
+/// the corresponding 128-bit word (key) in `round_key`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdeclast_epi128)
+#[inline]
+#[target_feature(enable = "vaes")]
+#[cfg_attr(test, assert_instr(vaesdeclast))]
+pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
+    aesdeclast_256(a, round_key)
+}
+
+/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
+/// the corresponding 128-bit word (key) in `round_key`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenc_epi128)
+#[inline]
+#[target_feature(enable = "vaes,avx512f")]
+#[cfg_attr(test, assert_instr(vaesenc))]
+pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i {
+    aesenc_512(a, round_key)
+}
+
+/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
+/// the corresponding 128-bit word (key) in `round_key`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenclast_epi128)
+#[inline]
+#[target_feature(enable = "vaes,avx512f")]
+#[cfg_attr(test, assert_instr(vaesenclast))]
+pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
+    aesenclast_512(a, round_key)
+}
+
+/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
+/// the corresponding 128-bit word (key) in `round_key`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdec_epi128)
+#[inline]
+#[target_feature(enable = "vaes,avx512f")]
+#[cfg_attr(test, assert_instr(vaesdec))]
+pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i {
+    aesdec_512(a, round_key)
+}
+
+/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
+/// the corresponding 128-bit word (key) in `round_key`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdeclast_epi128)
+#[inline]
+#[target_feature(enable = "vaes,avx512f")]
+#[cfg_attr(test, assert_instr(vaesdeclast))]
+pub unsafe fn _mm512_aesdeclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
+    aesdeclast_512(a, round_key)
+}
+
+#[cfg(test)]
+mod tests {
+    // The constants in the tests below are just bit patterns. They should not
+    // be interpreted as integers; signedness does not make sense for them, but
+    // __mXXXi happens to be defined in terms of signed integers.
+    #![allow(overflowing_literals)]
+
+    use stdarch_test::simd_test;
+
+    use crate::core_arch::x86::*;
+
+    // the first parts of these tests are straight ports from the AES-NI tests
+    // the second parts directly compare the two, for inputs that are different across lanes
+    // and "more random" than the standard test vectors
+    // ideally we'd be using quickcheck here instead
+
+    #[target_feature(enable = "avx2")]
+    unsafe fn helper_for_256_vaes(
+        linear: unsafe fn(__m128i, __m128i) -> __m128i,
+        vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
+    ) {
+        let a = _mm256_set_epi64x(
+            0xDCB4DB3657BF0B7D,
+            0x18DB0601068EDD9F,
+            0xB76B908233200DC5,
+            0xE478235FA8E22D5E,
+        );
+        let k = _mm256_set_epi64x(
+            0x672F6F105A94CEA7,
+            0x8298B8FFCA5F829C,
+            0xA3927047B3FB61D8,
+            0x978093862CDE7187,
+        );
+        let mut a_decomp = [_mm_setzero_si128(); 2];
+        a_decomp[0] = _mm256_extracti128_si256::<0>(a);
+        a_decomp[1] = _mm256_extracti128_si256::<1>(a);
+        let mut k_decomp = [_mm_setzero_si128(); 2];
+        k_decomp[0] = _mm256_extracti128_si256::<0>(k);
+        k_decomp[1] = _mm256_extracti128_si256::<1>(k);
+        let r = vectorized(a, k);
+        let mut e_decomp = [_mm_setzero_si128(); 2];
+        for i in 0..2 {
+            e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
+        }
+        assert_eq_m128i(_mm256_extracti128_si256::<0>(r), e_decomp[0]);
+        assert_eq_m128i(_mm256_extracti128_si256::<1>(r), e_decomp[1]);
+    }
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn setup_state_key<T>(broadcast: unsafe fn(__m128i) -> T) -> (T, T) {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
+        (broadcast(a), broadcast(k))
+    }
+
+    #[target_feature(enable = "avx2")]
+    unsafe fn setup_state_key_256() -> (__m256i, __m256i) {
+        setup_state_key(_mm256_broadcastsi128_si256)
+    }
+
+    #[target_feature(enable = "avx512f")]
+    unsafe fn setup_state_key_512() -> (__m512i, __m512i) {
+        setup_state_key(_mm512_broadcast_i32x4)
+    }
+
+    #[simd_test(enable = "vaes,avx512vl")]
+    unsafe fn test_mm256_aesdec_epi128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
+        let (a, k) = setup_state_key_256();
+        let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
+        let e = _mm256_broadcastsi128_si256(e);
+        let r = _mm256_aesdec_epi128(a, k);
+        assert_eq_m256i(r, e);
+
+        helper_for_256_vaes(_mm_aesdec_si128, _mm256_aesdec_epi128);
+    }
+
+    #[simd_test(enable = "vaes,avx512vl")]
+    unsafe fn test_mm256_aesdeclast_epi128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
+        let (a, k) = setup_state_key_256();
+        let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
+        let e = _mm256_broadcastsi128_si256(e);
+        let r = _mm256_aesdeclast_epi128(a, k);
+        assert_eq_m256i(r, e);
+
+        helper_for_256_vaes(_mm_aesdeclast_si128, _mm256_aesdeclast_epi128);
+    }
+
+    #[simd_test(enable = "vaes,avx512vl")]
+    unsafe fn test_mm256_aesenc_epi128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
+        // they are repeated appropriately
+        let (a, k) = setup_state_key_256();
+        let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
+        let e = _mm256_broadcastsi128_si256(e);
+        let r = _mm256_aesenc_epi128(a, k);
+        assert_eq_m256i(r, e);
+
+        helper_for_256_vaes(_mm_aesenc_si128, _mm256_aesenc_epi128);
+    }
+
+    #[simd_test(enable = "vaes,avx512vl")]
+    unsafe fn test_mm256_aesenclast_epi128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
+        let (a, k) = setup_state_key_256();
+        let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
+        let e = _mm256_broadcastsi128_si256(e);
+        let r = _mm256_aesenclast_epi128(a, k);
+        assert_eq_m256i(r, e);
+
+        helper_for_256_vaes(_mm_aesenclast_si128, _mm256_aesenclast_epi128);
+    }
+
+    #[target_feature(enable = "avx512f")]
+    unsafe fn helper_for_512_vaes(
+        linear: unsafe fn(__m128i, __m128i) -> __m128i,
+        vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
+    ) {
+        let a = _mm512_set_epi64(
+            0xDCB4DB3657BF0B7D,
+            0x18DB0601068EDD9F,
+            0xB76B908233200DC5,
+            0xE478235FA8E22D5E,
+            0xAB05CFFA2621154C,
+            0x1171B47A186174C9,
+            0x8C6B6C0E7595CEC9,
+            0xBE3E7D4934E961BD,
+        );
+        let k = _mm512_set_epi64(
+            0x672F6F105A94CEA7,
+            0x8298B8FFCA5F829C,
+            0xA3927047B3FB61D8,
+            0x978093862CDE7187,
+            0xB1927AB22F31D0EC,
+            0xA9A5DA619BE4D7AF,
+            0xCA2590F56884FDC6,
+            0x19BE9F660038BDB5,
+        );
+        let mut a_decomp = [_mm_setzero_si128(); 4];
+        a_decomp[0] = _mm512_extracti32x4_epi32::<0>(a);
+        a_decomp[1] = _mm512_extracti32x4_epi32::<1>(a);
+        a_decomp[2] = _mm512_extracti32x4_epi32::<2>(a);
+        a_decomp[3] = _mm512_extracti32x4_epi32::<3>(a);
+        let mut k_decomp = [_mm_setzero_si128(); 4];
+        k_decomp[0] = _mm512_extracti32x4_epi32::<0>(k);
+        k_decomp[1] = _mm512_extracti32x4_epi32::<1>(k);
+        k_decomp[2] = _mm512_extracti32x4_epi32::<2>(k);
+        k_decomp[3] = _mm512_extracti32x4_epi32::<3>(k);
+        let r = vectorized(a, k);
+        let mut e_decomp = [_mm_setzero_si128(); 4];
+        for i in 0..4 {
+            e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
+        }
+        assert_eq_m128i(_mm512_extracti32x4_epi32::<0>(r), e_decomp[0]);
+        assert_eq_m128i(_mm512_extracti32x4_epi32::<1>(r), e_decomp[1]);
+        assert_eq_m128i(_mm512_extracti32x4_epi32::<2>(r), e_decomp[2]);
+        assert_eq_m128i(_mm512_extracti32x4_epi32::<3>(r), e_decomp[3]);
+    }
+
+    #[simd_test(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesdec_epi128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
+        let (a, k) = setup_state_key_512();
+        let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
+        let e = _mm512_broadcast_i32x4(e);
+        let r = _mm512_aesdec_epi128(a, k);
+        assert_eq_m512i(r, e);
+
+        helper_for_512_vaes(_mm_aesdec_si128, _mm512_aesdec_epi128);
+    }
+
+    #[simd_test(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesdeclast_epi128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
+        let (a, k) = setup_state_key_512();
+        let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
+        let e = _mm512_broadcast_i32x4(e);
+        let r = _mm512_aesdeclast_epi128(a, k);
+        assert_eq_m512i(r, e);
+
+        helper_for_512_vaes(_mm_aesdeclast_si128, _mm512_aesdeclast_epi128);
+    }
+
+    #[simd_test(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesenc_epi128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
+        let (a, k) = setup_state_key_512();
+        let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
+        let e = _mm512_broadcast_i32x4(e);
+        let r = _mm512_aesenc_epi128(a, k);
+        assert_eq_m512i(r, e);
+
+        helper_for_512_vaes(_mm_aesenc_si128, _mm512_aesenc_epi128);
+    }
+
+    #[simd_test(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesenclast_epi128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
+        let (a, k) = setup_state_key_512();
+        let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
+        let e = _mm512_broadcast_i32x4(e);
+        let r = _mm512_aesenclast_epi128(a, k);
+        assert_eq_m512i(r, e);
+
+        helper_for_512_vaes(_mm_aesenclast_si128, _mm512_aesenclast_epi128);
+    }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs
new file mode 100644
index 00000000000..ea76708b89a
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs
@@ -0,0 +1,258 @@
+//! Vectorized Carry-less Multiplication (VCLMUL)
+//!
+//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
+//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref] (p. 4-241).
+//!
+//! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+
+use crate::core_arch::x86::__m256i;
+use crate::core_arch::x86::__m512i;
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.x86.pclmulqdq.256"]
+    fn pclmulqdq_256(a: __m256i, round_key: __m256i, imm8: u8) -> __m256i;
+    #[link_name = "llvm.x86.pclmulqdq.512"]
+    fn pclmulqdq_512(a: __m512i, round_key: __m512i, imm8: u8) -> __m512i;
+}
+
+// for some odd reason on x86_64 we generate the correct long name instructions
+// but on i686 we generate the short name + imm8
+// so we need to special-case on that...
+
+/// Performs a carry-less multiplication of two 64-bit polynomials over the
+/// finite field GF(2^k) - in each of the 4 128-bit lanes.
+///
+/// The immediate byte is used for determining which halves of each lane `a` and `b`
+/// should be used. Immediate bits other than 0 and 4 are ignored.
+/// All lanes share immediate byte.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_clmulepi64_epi128)
+#[inline]
+#[target_feature(enable = "vpclmulqdq,avx512f")]
+// technically according to Intel's documentation we don't need avx512f here, however LLVM gets confused otherwise
+#[cfg_attr(test, assert_instr(vpclmul, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_clmulepi64_epi128<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
+    static_assert_imm8!(IMM8);
+    pclmulqdq_512(a, b, IMM8 as u8)
+}
+
+/// Performs a carry-less multiplication of two 64-bit polynomials over the
+/// finite field GF(2^k) - in each of the 2 128-bit lanes.
+///
+/// The immediate byte is used for determining which halves of each lane `a` and `b`
+/// should be used. Immediate bits other than 0 and 4 are ignored.
+/// All lanes share immediate byte.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_clmulepi64_epi128)
+#[inline]
+#[target_feature(enable = "vpclmulqdq")]
+#[cfg_attr(test, assert_instr(vpclmul, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_clmulepi64_epi128<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+    static_assert_imm8!(IMM8);
+    pclmulqdq_256(a, b, IMM8 as u8)
+}
+
+#[cfg(test)]
+mod tests {
+    // The constants in the tests below are just bit patterns. They should not
+    // be interpreted as integers; signedness does not make sense for them, but
+    // __mXXXi happens to be defined in terms of signed integers.
+    #![allow(overflowing_literals)]
+
+    use stdarch_test::simd_test;
+
+    use crate::core_arch::x86::*;
+
+    macro_rules! verify_kat_pclmul {
+        ($broadcast:ident, $clmul:ident, $assert:ident) => {
+            // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf
+         let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d);
+         let a = $broadcast(a);
+         let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d);
+         let b = $broadcast(b);
+         let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451);
+         let r00 = $broadcast(r00);
+         let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315);
+         let r01 = $broadcast(r01);
+         let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9);
+         let r10 = $broadcast(r10);
+         let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed);
+         let r11 = $broadcast(r11);
+
+         $assert($clmul::<0x00>(a, b), r00);
+         $assert($clmul::<0x10>(a, b), r01);
+         $assert($clmul::<0x01>(a, b), r10);
+         $assert($clmul::<0x11>(a, b), r11);
+
+         let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000);
+         let a0 = $broadcast(a0);
+         let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000);
+         let r = $broadcast(r);
+         $assert($clmul::<0x00>(a0, a0), r);
+        }
+    }
+
+    macro_rules! unroll {
+        ($target:ident[4] = $op:ident::<4>($source:ident);) => {
+            $target[3] = $op::<3>($source);
+            $target[2] = $op::<2>($source);
+            unroll! {$target[2] = $op::<2>($source);}
+        };
+        ($target:ident[2] = $op:ident::<2>($source:ident);) => {
+            $target[1] = $op::<1>($source);
+            $target[0] = $op::<0>($source);
+        };
+        (assert_eq_m128i($op:ident::<4>($vec_res:ident),$lin_res:ident[4]);) => {
+            assert_eq_m128i($op::<3>($vec_res), $lin_res[3]);
+            assert_eq_m128i($op::<2>($vec_res), $lin_res[2]);
+            unroll! {assert_eq_m128i($op::<2>($vec_res),$lin_res[2]);}
+        };
+        (assert_eq_m128i($op:ident::<2>($vec_res:ident),$lin_res:ident[2]);) => {
+            assert_eq_m128i($op::<1>($vec_res), $lin_res[1]);
+            assert_eq_m128i($op::<0>($vec_res), $lin_res[0]);
+        };
+    }
+
+    // this function tests one of the possible 4 instances
+    // with different inputs across lanes
+    #[target_feature(enable = "vpclmulqdq,avx512f")]
+    unsafe fn verify_512_helper(
+        linear: unsafe fn(__m128i, __m128i) -> __m128i,
+        vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
+    ) {
+        let a = _mm512_set_epi64(
+            0xDCB4DB3657BF0B7D,
+            0x18DB0601068EDD9F,
+            0xB76B908233200DC5,
+            0xE478235FA8E22D5E,
+            0xAB05CFFA2621154C,
+            0x1171B47A186174C9,
+            0x8C6B6C0E7595CEC9,
+            0xBE3E7D4934E961BD,
+        );
+        let b = _mm512_set_epi64(
+            0x672F6F105A94CEA7,
+            0x8298B8FFCA5F829C,
+            0xA3927047B3FB61D8,
+            0x978093862CDE7187,
+            0xB1927AB22F31D0EC,
+            0xA9A5DA619BE4D7AF,
+            0xCA2590F56884FDC6,
+            0x19BE9F660038BDB5,
+        );
+
+        let mut a_decomp = [_mm_setzero_si128(); 4];
+        unroll! {a_decomp[4] = _mm512_extracti32x4_epi32::<4>(a);}
+        let mut b_decomp = [_mm_setzero_si128(); 4];
+        unroll! {b_decomp[4] = _mm512_extracti32x4_epi32::<4>(b);}
+
+        let r = vectorized(a, b);
+        let mut e_decomp = [_mm_setzero_si128(); 4];
+        for i in 0..4 {
+            e_decomp[i] = linear(a_decomp[i], b_decomp[i]);
+        }
+        unroll! {assert_eq_m128i(_mm512_extracti32x4_epi32::<4>(r),e_decomp[4]);}
+    }
+
+    // this function tests one of the possible 4 instances
+    // with different inputs across lanes for the VL version
+    #[target_feature(enable = "vpclmulqdq,avx512vl")]
+    unsafe fn verify_256_helper(
+        linear: unsafe fn(__m128i, __m128i) -> __m128i,
+        vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
+    ) {
+        let a = _mm512_set_epi64(
+            0xDCB4DB3657BF0B7D,
+            0x18DB0601068EDD9F,
+            0xB76B908233200DC5,
+            0xE478235FA8E22D5E,
+            0xAB05CFFA2621154C,
+            0x1171B47A186174C9,
+            0x8C6B6C0E7595CEC9,
+            0xBE3E7D4934E961BD,
+        );
+        let b = _mm512_set_epi64(
+            0x672F6F105A94CEA7,
+            0x8298B8FFCA5F829C,
+            0xA3927047B3FB61D8,
+            0x978093862CDE7187,
+            0xB1927AB22F31D0EC,
+            0xA9A5DA619BE4D7AF,
+            0xCA2590F56884FDC6,
+            0x19BE9F660038BDB5,
+        );
+
+        let mut a_decomp = [_mm_setzero_si128(); 2];
+        unroll! {a_decomp[2] = _mm512_extracti32x4_epi32::<2>(a);}
+        let mut b_decomp = [_mm_setzero_si128(); 2];
+        unroll! {b_decomp[2] = _mm512_extracti32x4_epi32::<2>(b);}
+
+        let r = vectorized(
+            _mm512_extracti64x4_epi64::<0>(a),
+            _mm512_extracti64x4_epi64::<0>(b),
+        );
+        let mut e_decomp = [_mm_setzero_si128(); 2];
+        for i in 0..2 {
+            e_decomp[i] = linear(a_decomp[i], b_decomp[i]);
+        }
+        unroll! {assert_eq_m128i(_mm256_extracti128_si256::<2>(r),e_decomp[2]);}
+    }
+
+    #[simd_test(enable = "vpclmulqdq,avx512f")]
+    unsafe fn test_mm512_clmulepi64_epi128() {
+        verify_kat_pclmul!(
+            _mm512_broadcast_i32x4,
+            _mm512_clmulepi64_epi128,
+            assert_eq_m512i
+        );
+
+        verify_512_helper(
+            |a, b| _mm_clmulepi64_si128::<0x00>(a, b),
+            |a, b| _mm512_clmulepi64_epi128::<0x00>(a, b),
+        );
+        verify_512_helper(
+            |a, b| _mm_clmulepi64_si128::<0x01>(a, b),
+            |a, b| _mm512_clmulepi64_epi128::<0x01>(a, b),
+        );
+        verify_512_helper(
+            |a, b| _mm_clmulepi64_si128::<0x10>(a, b),
+            |a, b| _mm512_clmulepi64_epi128::<0x10>(a, b),
+        );
+        verify_512_helper(
+            |a, b| _mm_clmulepi64_si128::<0x11>(a, b),
+            |a, b| _mm512_clmulepi64_epi128::<0x11>(a, b),
+        );
+    }
+
+    #[simd_test(enable = "vpclmulqdq,avx512vl")]
+    unsafe fn test_mm256_clmulepi64_epi128() {
+        verify_kat_pclmul!(
+            _mm256_broadcastsi128_si256,
+            _mm256_clmulepi64_epi128,
+            assert_eq_m256i
+        );
+
+        verify_256_helper(
+            |a, b| _mm_clmulepi64_si128::<0x00>(a, b),
+            |a, b| _mm256_clmulepi64_epi128::<0x00>(a, b),
+        );
+        verify_256_helper(
+            |a, b| _mm_clmulepi64_si128::<0x01>(a, b),
+            |a, b| _mm256_clmulepi64_epi128::<0x01>(a, b),
+        );
+        verify_256_helper(
+            |a, b| _mm_clmulepi64_si128::<0x10>(a, b),
+            |a, b| _mm256_clmulepi64_epi128::<0x10>(a, b),
+        );
+        verify_256_helper(
+            |a, b| _mm_clmulepi64_si128::<0x11>(a, b),
+            |a, b| _mm256_clmulepi64_epi128::<0x11>(a, b),
+        );
+    }
+}
diff --git a/library/stdarch/crates/core_arch/tests/cpu-detection.rs b/library/stdarch/crates/core_arch/tests/cpu-detection.rs
index 61f5f09053d..08caca73852 100644
--- a/library/stdarch/crates/core_arch/tests/cpu-detection.rs
+++ b/library/stdarch/crates/core_arch/tests/cpu-detection.rs
@@ -31,12 +31,9 @@ fn x86_all() {
         is_x86_feature_detected!("avx512vpopcntdq")
     );
     println!("avx512vbmi2 {:?}", is_x86_feature_detected!("avx512vbmi2"));
-    println!("avx512gfni {:?}", is_x86_feature_detected!("avx512gfni"));
-    println!("avx512vaes {:?}", is_x86_feature_detected!("avx512vaes"));
-    println!(
-        "avx512vpclmulqdq {:?}",
-        is_x86_feature_detected!("avx512vpclmulqdq")
-    );
+    println!("gfni {:?}", is_x86_feature_detected!("gfni"));
+    println!("vaes {:?}", is_x86_feature_detected!("vaes"));
+    println!("vpclmulqdq {:?}", is_x86_feature_detected!("vpclmulqdq"));
     println!("avx512vnni {:?}", is_x86_feature_detected!("avx512vnni"));
     println!(
         "avx512bitalg {:?}",
@@ -61,3 +58,15 @@ fn x86_all() {
     println!("xsaves: {:?}", is_x86_feature_detected!("xsaves"));
     println!("xsavec: {:?}", is_x86_feature_detected!("xsavec"));
 }
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[allow(deprecated)]
+fn x86_deprecated() {
+    println!("avx512gfni {:?}", is_x86_feature_detected!("avx512gfni"));
+    println!("avx512vaes {:?}", is_x86_feature_detected!("avx512vaes"));
+    println!(
+        "avx512vpclmulqdq {:?}",
+        is_x86_feature_detected!("avx512vpclmulqdq")
+    );
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 893e1a887f5..d0bf92d3edf 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -68,9 +68,9 @@ features! {
     /// * `"avx512vbmi"`
     /// * `"avx512vpopcntdq"`
     /// * `"avx512vbmi2"`
-    /// * `"avx512gfni"`
-    /// * `"avx512vaes"`
-    /// * `"avx512vpclmulqdq"`
+    /// * `"gfni"`
+    /// * `"vaes"`
+    /// * `"vpclmulqdq"`
     /// * `"avx512vnni"`
     /// * `"avx512bitalg"`
     /// * `"avx512bf16"`
@@ -95,6 +95,9 @@ features! {
     /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
     #[stable(feature = "simd_x86", since = "1.27.0")]
     @BIND_FEATURE_NAME: "abm"; "lzcnt"; // abm is a synonym for lzcnt
+    @BIND_FEATURE_NAME: "avx512gfni"; "gfni"; #[deprecated(since = "1.67.0", note = "the `avx512gfni` feature has been renamed to `gfni`")];
+    @BIND_FEATURE_NAME: "avx512vaes"; "vaes"; #[deprecated(since = "1.67.0", note = "the `avx512vaes` feature has been renamed to `vaes`")];
+    @BIND_FEATURE_NAME: "avx512vpclmulqdq"; "vpclmulqdq"; #[deprecated(since = "1.67.0", note = "the `avx512vpclmulqdq` feature has been renamed to `vpclmulqdq`")];
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] aes: "aes";
     /// AES (Advanced Encryption Standard New Instructions AES-NI)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] pclmulqdq: "pclmulqdq";
@@ -150,11 +153,11 @@ features! {
     /// Quadword)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi2: "avx512vbmi2";
     /// AVX-512 VBMI2 (Additional byte, word, dword and qword capabilities)
-    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512gfni: "avx512gfni";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] gfni: "gfni";
     /// AVX-512 GFNI (Galois Field New Instruction)
-    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vaes: "avx512vaes";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] vaes: "vaes";
     /// AVX-512 VAES (Vector AES instruction)
-    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpclmulqdq: "avx512vpclmulqdq";
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] vpclmulqdq: "vpclmulqdq";
     /// AVX-512 VPCLMULQDQ (Vector PCLMULQDQ instructions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vnni: "avx512vnni";
     /// AVX-512 VNNI (Vector Neural Network Instructions)
diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index a467f9db605..45feec79fc9 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -17,7 +17,7 @@ macro_rules! features {
       @CFG: $cfg:meta;
       @MACRO_NAME: $macro_name:ident;
       @MACRO_ATTRS: $(#[$macro_attrs:meta])*
-      $(@BIND_FEATURE_NAME: $bind_feature:tt; $feature_impl:tt; )*
+      $(@BIND_FEATURE_NAME: $bind_feature:tt; $feature_impl:tt; $(#[$deprecate_attr:meta];)?)*
       $(@NO_RUNTIME_DETECTION: $nort_feature:tt; )*
       $(@FEATURE: #[$stability_attr:meta] $feature:ident: $feature_lit:tt;
           $(implied by target_features: [$($target_feature_lit:tt),*];)?
@@ -35,7 +35,15 @@ macro_rules! features {
                 };
             )*
             $(
-                ($bind_feature) => { $crate::$macro_name!($feature_impl) };
+                ($bind_feature) => {
+                    {
+                        $(
+                            #[$deprecate_attr] macro_rules! deprecated_feature { {} => {}; }
+                            deprecated_feature! {};
+                        )?
+                        $crate::$macro_name!($feature_impl)
+                    }
+                };
             )*
             $(
                 ($nort_feature) => {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index ea5f595ec98..08f48cd17a9 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -211,10 +211,10 @@ pub(crate) fn detect_features() -> cache::Initializer {
                         enable(extended_features_ecx, 1, Feature::avx512vbmi);
                         enable(extended_features_ecx, 5, Feature::avx512bf16);
                         enable(extended_features_ecx, 6, Feature::avx512vbmi2);
-                        enable(extended_features_ecx, 8, Feature::avx512gfni);
+                        enable(extended_features_ecx, 8, Feature::gfni);
                         enable(extended_features_ecx, 8, Feature::avx512vp2intersect);
-                        enable(extended_features_ecx, 9, Feature::avx512vaes);
-                        enable(extended_features_ecx, 10, Feature::avx512vpclmulqdq);
+                        enable(extended_features_ecx, 9, Feature::vaes);
+                        enable(extended_features_ecx, 10, Feature::vpclmulqdq);
                         enable(extended_features_ecx, 11, Feature::avx512vnni);
                         enable(extended_features_ecx, 12, Feature::avx512bitalg);
                         enable(extended_features_ecx, 14, Feature::avx512vpopcntdq);
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 9daaa658044..02ad77a633c 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -132,12 +132,9 @@ fn x86_all() {
         is_x86_feature_detected!("avx512vpopcntdq")
     );
     println!("avx512vbmi2 {:?}", is_x86_feature_detected!("avx512vbmi2"));
-    println!("avx512gfni {:?}", is_x86_feature_detected!("avx512gfni"));
-    println!("avx512vaes {:?}", is_x86_feature_detected!("avx512vaes"));
-    println!(
-        "avx512vpclmulqdq {:?}",
-        is_x86_feature_detected!("avx512vpclmulqdq")
-    );
+    println!("gfni {:?}", is_x86_feature_detected!("gfni"));
+    println!("vaes {:?}", is_x86_feature_detected!("vaes"));
+    println!("vpclmulqdq {:?}", is_x86_feature_detected!("vpclmulqdq"));
     println!("avx512vnni {:?}", is_x86_feature_detected!("avx512vnni"));
     println!(
         "avx512bitalg {:?}",
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
index 59e9a62fdba..e481620c7cd 100644
--- a/library/stdarch/crates/std_detect/tests/x86-specific.rs
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -36,12 +36,9 @@ fn dump() {
         is_x86_feature_detected!("avx512vpopcntdq")
     );
     println!("avx512vbmi2 {:?}", is_x86_feature_detected!("avx512vbmi2"));
-    println!("avx512gfni {:?}", is_x86_feature_detected!("avx512gfni"));
-    println!("avx512vaes {:?}", is_x86_feature_detected!("avx512vaes"));
-    println!(
-        "avx512vpclmulqdq {:?}",
-        is_x86_feature_detected!("avx512vpclmulqdq")
-    );
+    println!("gfni {:?}", is_x86_feature_detected!("gfni"));
+    println!("vaes {:?}", is_x86_feature_detected!("vaes"));
+    println!("vpclmulqdq {:?}", is_x86_feature_detected!("vpclmulqdq"));
     println!("avx512vnni {:?}", is_x86_feature_detected!("avx512vnni"));
     println!(
         "avx512bitalg {:?}",
-- 
cgit 1.4.1-3-g733a5


From 39c2524e1b0ee07715d51bab1f9702b9b900a3d6 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 5 Jan 2023 12:54:07 -0500
Subject: Detect MOVBE (#1356)

---
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 3 +++
 library/stdarch/crates/std_detect/src/detect/os/x86.rs   | 1 +
 library/stdarch/crates/std_detect/tests/cpu-detection.rs | 1 +
 library/stdarch/crates/std_detect/tests/x86-specific.rs  | 2 ++
 4 files changed, 7 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index d0bf92d3edf..7e0af21a223 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -91,6 +91,7 @@ features! {
     /// * `"cmpxchg16b"`
     /// * `"adx"`
     /// * `"rtm"`
+    /// * `"movbe"`
     ///
     /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
     #[stable(feature = "simd_x86", since = "1.27.0")]
@@ -197,4 +198,6 @@ features! {
     /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rtm: "rtm";
     /// RTM, Intel (Restricted Transactional Memory)
+    @FEATURE: #[stable(feature = "movbe_target_feature", since = "1.67.0")] movbe: "movbe";
+    /// MOVBE (Move Data After Swapping Bytes)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 08f48cd17a9..46cde892195 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -111,6 +111,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
         enable(proc_info_ecx, 13, Feature::cmpxchg16b);
         enable(proc_info_ecx, 19, Feature::sse4_1);
         enable(proc_info_ecx, 20, Feature::sse4_2);
+        enable(proc_info_ecx, 22, Feature::movbe);
         enable(proc_info_ecx, 23, Feature::popcnt);
         enable(proc_info_ecx, 25, Feature::aes);
         enable(proc_info_ecx, 29, Feature::f16c);
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 02ad77a633c..b5cabd5040f 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -152,6 +152,7 @@ fn x86_all() {
     println!("abm: {:?}", is_x86_feature_detected!("abm"));
     println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt"));
     println!("tbm: {:?}", is_x86_feature_detected!("tbm"));
+    println!("movbe: {:?}", is_x86_feature_detected!("movbe"));
     println!("popcnt: {:?}", is_x86_feature_detected!("popcnt"));
     println!("fxsr: {:?}", is_x86_feature_detected!("fxsr"));
     println!("xsave: {:?}", is_x86_feature_detected!("xsave"));
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
index e481620c7cd..a7e9bd6f350 100644
--- a/library/stdarch/crates/std_detect/tests/x86-specific.rs
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -64,6 +64,7 @@ fn dump() {
     println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b"));
     println!("adx: {:?}", is_x86_feature_detected!("adx"));
     println!("rtm: {:?}", is_x86_feature_detected!("rtm"));
+    println!("movbe: {:?}", is_x86_feature_detected!("movbe"));
 }
 
 #[cfg(feature = "std_detect_env_override")]
@@ -152,4 +153,5 @@ fn compare_with_cupid() {
     );
     assert_eq!(is_x86_feature_detected!("adx"), information.adx(),);
     assert_eq!(is_x86_feature_detected!("rtm"), information.rtm(),);
+    assert_eq!(is_x86_feature_detected!("movbe"), information.movbe(),);
 }
-- 
cgit 1.4.1-3-g733a5


From 505d4e8d48397fca4491ab52afccf33b0b037fa2 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Tue, 24 Jan 2023 03:17:26 +0900
Subject: std_detect: Support detecting lse/dotprod/jsconv/rcpc on aarch64
 Windows

---
 .../std_detect/src/detect/os/windows/aarch64.rs      | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
index 051ad6d1bfc..119c905e03b 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
@@ -14,6 +14,10 @@ pub(crate) fn detect_features() -> cache::Initializer {
     const PF_ARM_NEON_INSTRUCTIONS_AVAILABLE: u32 = 19;
     const PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE: u32 = 30;
     const PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE: u32 = 31;
+    const PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE: u32 = 34;
+    const PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE: u32 = 43;
+    const PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE: u32 = 44;
+    const PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE: u32 = 45;
 
     extern "system" {
         pub fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL;
@@ -39,6 +43,22 @@ pub(crate) fn detect_features() -> cache::Initializer {
                 Feature::crc,
                 IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != FALSE,
             );
+            enable_feature(
+                Feature::lse,
+                IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
+            enable_feature(
+                Feature::dotprod,
+                IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
+            enable_feature(
+                Feature::jsconv,
+                IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
+            enable_feature(
+                Feature::rcpc,
+                IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
             // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and
             // pmull support
             enable_feature(
-- 
cgit 1.4.1-3-g733a5


From 674fd58f608a0bf4f551c82fad4a28e8297c5feb Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Thu, 26 Jan 2023 00:27:21 +0900
Subject: std_detect: Move aarch64 freebsd test to tests/cpu-detection.rs

---
 .../std_detect/src/detect/os/freebsd/aarch64.rs      | 18 ------------------
 .../stdarch/crates/std_detect/tests/cpu-detection.rs | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
index 7d972b373f8..ccc48f53605 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs
@@ -1,21 +1,3 @@
 //! Run-time feature detection for Aarch64 on FreeBSD.
 
 pub(crate) use super::super::aarch64::detect_features;
-
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn dump() {
-        println!("asimd: {:?}", is_aarch64_feature_detected!("asimd"));
-        println!("pmull: {:?}", is_aarch64_feature_detected!("pmull"));
-        println!("fp: {:?}", is_aarch64_feature_detected!("fp"));
-        println!("fp16: {:?}", is_aarch64_feature_detected!("fp16"));
-        println!("sve: {:?}", is_aarch64_feature_detected!("sve"));
-        println!("crc: {:?}", is_aarch64_feature_detected!("crc"));
-        println!("lse: {:?}", is_aarch64_feature_detected!("lse"));
-        println!("rdm: {:?}", is_aarch64_feature_detected!("rdm"));
-        println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc"));
-        println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod"));
-        println!("tme: {:?}", is_aarch64_feature_detected!("tme"));
-    }
-}
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index f1232e1145c..e090a205c44 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -97,6 +97,26 @@ fn aarch64_windows() {
     println!("sha2: {:?}", is_aarch64_feature_detected!("sha2"));
 }
 
+#[test]
+#[cfg(all(target_arch = "aarch64", target_os = "freebsd"))]
+fn aarch64_freebsd() {
+    println!("asimd: {:?}", is_aarch64_feature_detected!("asimd"));
+    println!("pmull: {:?}", is_aarch64_feature_detected!("pmull"));
+    println!("fp: {:?}", is_aarch64_feature_detected!("fp"));
+    println!("fp16: {:?}", is_aarch64_feature_detected!("fp16"));
+    println!("sve: {:?}", is_aarch64_feature_detected!("sve"));
+    println!("crc: {:?}", is_aarch64_feature_detected!("crc"));
+    println!("lse: {:?}", is_aarch64_feature_detected!("lse"));
+    println!("rdm: {:?}", is_aarch64_feature_detected!("rdm"));
+    println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc"));
+    println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod"));
+    println!("tme: {:?}", is_aarch64_feature_detected!("tme"));
+    println!("paca: {:?}", is_aarch64_feature_detected!("paca"));
+    println!("pacg: {:?}", is_aarch64_feature_detected!("pacg"));
+    println!("aes: {:?}", is_aarch64_feature_detected!("aes"));
+    println!("sha2: {:?}", is_aarch64_feature_detected!("sha2"));
+}
+
 #[test]
 #[cfg(all(target_arch = "powerpc", target_os = "linux"))]
 fn powerpc_linux() {
-- 
cgit 1.4.1-3-g733a5


From d3bb923c82da9df128a6faad659bdee9e2008821 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Thu, 26 Jan 2023 00:50:42 +0900
Subject: std_detect: Split os/aarch64.rs' detect_features into reading and
 parsing

---
 .../crates/std_detect/src/detect/os/aarch64.rs     | 135 +++++++++++----------
 1 file changed, 72 insertions(+), 63 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
index e0e62ee3393..bda5f1bf2de 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -23,77 +23,86 @@ use core::arch::asm;
 ///
 /// This will cause SIGILL if the current OS is not trapping the mrs instruction.
 pub(crate) fn detect_features() -> cache::Initializer {
-    let mut value = cache::Initializer::default();
+    // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
+    let aa64isar0: u64;
+    unsafe {
+        asm!(
+            "mrs {}, ID_AA64ISAR0_EL1",
+            out(reg) aa64isar0,
+            options(pure, nomem, preserves_flags, nostack)
+        );
+    }
 
-    {
-        let mut enable_feature = |f, enable| {
-            if enable {
-                value.set(f as u32);
-            }
-        };
+    // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1
+    let aa64isar1: u64;
+    unsafe {
+        asm!(
+            "mrs {}, ID_AA64ISAR1_EL1",
+            out(reg) aa64isar1,
+            options(pure, nomem, preserves_flags, nostack)
+        );
+    }
 
-        // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
-        let aa64isar0: u64;
-        unsafe {
-            asm!(
-                "mrs {}, ID_AA64ISAR0_EL1",
-                out(reg) aa64isar0,
-                options(pure, nomem, preserves_flags, nostack)
-            );
-        }
+    // ID_AA64PFR0_EL1 - Processor Feature Register 0
+    let aa64pfr0: u64;
+    unsafe {
+        asm!(
+            "mrs {}, ID_AA64PFR0_EL1",
+            out(reg) aa64pfr0,
+            options(pure, nomem, preserves_flags, nostack)
+        );
+    }
+
+    parse_system_registers(aa64isar0, aa64isar1, aa64pfr0)
+}
 
-        enable_feature(Feature::pmull, bits_shift(aa64isar0, 7, 4) >= 2);
-        enable_feature(Feature::tme, bits_shift(aa64isar0, 27, 24) == 1);
-        enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 1);
-        enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1);
+pub(crate) fn parse_system_registers(
+    aa64isar0: u64,
+    aa64isar1: u64,
+    aa64pfr0: u64,
+) -> cache::Initializer {
+    let mut value = cache::Initializer::default();
 
-        // ID_AA64PFR0_EL1 - Processor Feature Register 0
-        let aa64pfr0: u64;
-        unsafe {
-            asm!(
-                "mrs {}, ID_AA64PFR0_EL1",
-                out(reg) aa64pfr0,
-                options(pure, nomem, preserves_flags, nostack)
-            );
+    let mut enable_feature = |f, enable| {
+        if enable {
+            value.set(f as u32);
         }
+    };
 
-        let fp = bits_shift(aa64pfr0, 19, 16) < 0xF;
-        let fphp = bits_shift(aa64pfr0, 19, 16) >= 1;
-        let asimd = bits_shift(aa64pfr0, 23, 20) < 0xF;
-        let asimdhp = bits_shift(aa64pfr0, 23, 20) >= 1;
-        enable_feature(Feature::fp, fp);
-        enable_feature(Feature::fp16, fphp);
-        // SIMD support requires float support - if half-floats are
-        // supported, it also requires half-float support:
-        enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp));
-        // SIMD extensions require SIMD support:
-        enable_feature(Feature::aes, asimd && bits_shift(aa64isar0, 7, 4) >= 1);
-        let sha1 = bits_shift(aa64isar0, 11, 8) >= 1;
-        let sha2 = bits_shift(aa64isar0, 15, 12) >= 1;
-        enable_feature(Feature::sha2, asimd && sha1 && sha2);
-        enable_feature(Feature::rdm, asimd && bits_shift(aa64isar0, 31, 28) >= 1);
-        enable_feature(
-            Feature::dotprod,
-            asimd && bits_shift(aa64isar0, 47, 44) >= 1,
-        );
-        enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1);
+    // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
+    enable_feature(Feature::pmull, bits_shift(aa64isar0, 7, 4) >= 2);
+    enable_feature(Feature::tme, bits_shift(aa64isar0, 27, 24) == 1);
+    enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 1);
+    enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1);
 
-        // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1
-        let aa64isar1: u64;
-        unsafe {
-            asm!(
-                "mrs {}, ID_AA64ISAR1_EL1",
-                out(reg) aa64isar1,
-                options(pure, nomem, preserves_flags, nostack)
-            );
-        }
+    // ID_AA64PFR0_EL1 - Processor Feature Register 0
+    let fp = bits_shift(aa64pfr0, 19, 16) < 0xF;
+    let fphp = bits_shift(aa64pfr0, 19, 16) >= 1;
+    let asimd = bits_shift(aa64pfr0, 23, 20) < 0xF;
+    let asimdhp = bits_shift(aa64pfr0, 23, 20) >= 1;
+    enable_feature(Feature::fp, fp);
+    enable_feature(Feature::fp16, fphp);
+    // SIMD support requires float support - if half-floats are
+    // supported, it also requires half-float support:
+    enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp));
+    // SIMD extensions require SIMD support:
+    enable_feature(Feature::aes, asimd && bits_shift(aa64isar0, 7, 4) >= 1);
+    let sha1 = bits_shift(aa64isar0, 11, 8) >= 1;
+    let sha2 = bits_shift(aa64isar0, 15, 12) >= 1;
+    enable_feature(Feature::sha2, asimd && sha1 && sha2);
+    enable_feature(Feature::rdm, asimd && bits_shift(aa64isar0, 31, 28) >= 1);
+    enable_feature(
+        Feature::dotprod,
+        asimd && bits_shift(aa64isar0, 47, 44) >= 1,
+    );
+    enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1);
 
-        // Check for either APA or API field
-        enable_feature(Feature::paca, bits_shift(aa64isar1, 11, 4) >= 1);
-        enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1);
-        // Check for either GPA or GPI field
-        enable_feature(Feature::pacg, bits_shift(aa64isar1, 31, 24) >= 1);
-    }
+    // ID_AA64PFR0_EL1 - Processor Feature Register 0
+    // Check for either APA or API field
+    enable_feature(Feature::paca, bits_shift(aa64isar1, 11, 4) >= 1);
+    enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1);
+    // Check for either GPA or GPI field
+    enable_feature(Feature::pacg, bits_shift(aa64isar1, 31, 24) >= 1);
 
     value
 }
-- 
cgit 1.4.1-3-g733a5


From a36f5bd7c897e29f5f41bbff20e7d2aa6aa9810d Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Thu, 26 Jan 2023 22:01:33 +0900
Subject: std_detect: Support run-time detection on aarch64 OpenBSD

---
 .../stdarch/crates/std_detect/src/detect/mod.rs    |  6 +++
 .../crates/std_detect/src/detect/os/aarch64.rs     | 46 +++++++++---------
 .../std_detect/src/detect/os/openbsd/aarch64.rs    | 56 ++++++++++++++++++++++
 .../crates/std_detect/tests/cpu-detection.rs       |  7 ++-
 4 files changed, 91 insertions(+), 24 deletions(-)
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 9a135c90a39..db7018232d4 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -56,6 +56,12 @@ cfg_if! {
         mod aarch64;
         #[path = "os/freebsd/mod.rs"]
         mod os;
+    } else if #[cfg(all(target_os = "openbsd", target_arch = "aarch64", feature = "libc"))] {
+        #[allow(dead_code)] // we don't use code that calls the mrs instruction.
+        #[path = "os/aarch64.rs"]
+        mod aarch64;
+        #[path = "os/openbsd/aarch64.rs"]
+        mod os;
     } else if #[cfg(all(target_os = "windows", target_arch = "aarch64"))] {
         #[path = "os/windows/aarch64.rs"]
         mod os;
diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
index bda5f1bf2de..7f860f44ebe 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -53,13 +53,13 @@ pub(crate) fn detect_features() -> cache::Initializer {
         );
     }
 
-    parse_system_registers(aa64isar0, aa64isar1, aa64pfr0)
+    parse_system_registers(aa64isar0, aa64isar1, Some(aa64pfr0))
 }
 
 pub(crate) fn parse_system_registers(
     aa64isar0: u64,
     aa64isar1: u64,
-    aa64pfr0: u64,
+    aa64pfr0: Option<u64>,
 ) -> cache::Initializer {
     let mut value = cache::Initializer::default();
 
@@ -76,26 +76,28 @@ pub(crate) fn parse_system_registers(
     enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1);
 
     // ID_AA64PFR0_EL1 - Processor Feature Register 0
-    let fp = bits_shift(aa64pfr0, 19, 16) < 0xF;
-    let fphp = bits_shift(aa64pfr0, 19, 16) >= 1;
-    let asimd = bits_shift(aa64pfr0, 23, 20) < 0xF;
-    let asimdhp = bits_shift(aa64pfr0, 23, 20) >= 1;
-    enable_feature(Feature::fp, fp);
-    enable_feature(Feature::fp16, fphp);
-    // SIMD support requires float support - if half-floats are
-    // supported, it also requires half-float support:
-    enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp));
-    // SIMD extensions require SIMD support:
-    enable_feature(Feature::aes, asimd && bits_shift(aa64isar0, 7, 4) >= 1);
-    let sha1 = bits_shift(aa64isar0, 11, 8) >= 1;
-    let sha2 = bits_shift(aa64isar0, 15, 12) >= 1;
-    enable_feature(Feature::sha2, asimd && sha1 && sha2);
-    enable_feature(Feature::rdm, asimd && bits_shift(aa64isar0, 31, 28) >= 1);
-    enable_feature(
-        Feature::dotprod,
-        asimd && bits_shift(aa64isar0, 47, 44) >= 1,
-    );
-    enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1);
+    if let Some(aa64pfr0) = aa64pfr0 {
+        let fp = bits_shift(aa64pfr0, 19, 16) < 0xF;
+        let fphp = bits_shift(aa64pfr0, 19, 16) >= 1;
+        let asimd = bits_shift(aa64pfr0, 23, 20) < 0xF;
+        let asimdhp = bits_shift(aa64pfr0, 23, 20) >= 1;
+        enable_feature(Feature::fp, fp);
+        enable_feature(Feature::fp16, fphp);
+        // SIMD support requires float support - if half-floats are
+        // supported, it also requires half-float support:
+        enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp));
+        // SIMD extensions require SIMD support:
+        enable_feature(Feature::aes, asimd && bits_shift(aa64isar0, 7, 4) >= 1);
+        let sha1 = bits_shift(aa64isar0, 11, 8) >= 1;
+        let sha2 = bits_shift(aa64isar0, 15, 12) >= 1;
+        enable_feature(Feature::sha2, asimd && sha1 && sha2);
+        enable_feature(Feature::rdm, asimd && bits_shift(aa64isar0, 31, 28) >= 1);
+        enable_feature(
+            Feature::dotprod,
+            asimd && bits_shift(aa64isar0, 47, 44) >= 1,
+        );
+        enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1);
+    }
 
     // ID_AA64PFR0_EL1 - Processor Feature Register 0
     // Check for either APA or API field
diff --git a/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs
new file mode 100644
index 00000000000..068dcba7db7
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs
@@ -0,0 +1,56 @@
+//! Run-time feature detection for Aarch64 on OpenBSD.
+//!
+//! OpenBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl.
+//! https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8
+//! https://github.com/golang/go/commit/cd54ef1f61945459486e9eea2f016d99ef1da925
+
+use crate::detect::cache;
+use core::{mem::MaybeUninit, ptr};
+
+// Defined in sys/sysctl.h.
+// https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/sys/sysctl.h#L82
+const CTL_MACHDEP: libc::c_int = 7;
+// Defined in machine/cpu.h.
+// https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/arch/arm64/include/cpu.h#L25-L40
+const CPU_ID_AA64ISAR0: libc::c_int = 2;
+const CPU_ID_AA64ISAR1: libc::c_int = 3;
+const CPU_ID_AA64PFR0: libc::c_int = 8;
+
+/// Try to read the features from the system registers.
+pub(crate) fn detect_features() -> cache::Initializer {
+    // ID_AA64ISAR0_EL1 and ID_AA64ISAR1_EL1 are supported on OpenBSD 7.1+.
+    // https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8
+    // Others are supported on OpenBSD 7.3+.
+    // https://github.com/openbsd/src/commit/c7654cd65262d532212f65123ee3905ba200365c
+    // sysctl returns an unsupported error if operation is not supported,
+    // so we can safely use this function on older versions of OpenBSD.
+    let aa64isar0 = sysctl64(&[CTL_MACHDEP, CPU_ID_AA64ISAR0]).unwrap_or(0);
+    let aa64isar1 = sysctl64(&[CTL_MACHDEP, CPU_ID_AA64ISAR1]).unwrap_or(0);
+    // Do not use unwrap_or(0) because in fp and asimd fields, 0 indicates that
+    // the feature is available.
+    let aa64pfr0 = sysctl64(&[CTL_MACHDEP, CPU_ID_AA64PFR0]);
+
+    super::aarch64::parse_system_registers(aa64isar0, aa64isar1, aa64pfr0)
+}
+
+#[inline]
+fn sysctl64(mib: &[libc::c_int]) -> Option<u64> {
+    const OUT_LEN: libc::size_t = core::mem::size_of::<u64>();
+    let mut out = MaybeUninit::<u64>::uninit();
+    let mut out_len = OUT_LEN;
+    let res = unsafe {
+        libc::sysctl(
+            mib.as_ptr(),
+            mib.len() as libc::c_uint,
+            out.as_mut_ptr() as *mut libc::c_void,
+            &mut out_len,
+            ptr::null_mut(),
+            0,
+        )
+    };
+    if res == -1 || out_len != OUT_LEN {
+        return None;
+    }
+    // SAFETY: we've checked that sysctl was successful and `out` was filled.
+    Some(unsafe { out.assume_init() })
+}
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index e090a205c44..ec0875a355e 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -98,8 +98,11 @@ fn aarch64_windows() {
 }
 
 #[test]
-#[cfg(all(target_arch = "aarch64", target_os = "freebsd"))]
-fn aarch64_freebsd() {
+#[cfg(all(
+    target_arch = "aarch64",
+    any(target_os = "freebsd", target_os = "openbsd")
+))]
+fn aarch64_bsd() {
     println!("asimd: {:?}", is_aarch64_feature_detected!("asimd"));
     println!("pmull: {:?}", is_aarch64_feature_detected!("pmull"));
     println!("fp: {:?}", is_aarch64_feature_detected!("fp"));
-- 
cgit 1.4.1-3-g733a5


From 1829ee06f0932836901650bcac6087d00b115f9f Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Sat, 4 Feb 2023 03:23:09 +0900
Subject: std_detect: Always avoid dlsym on *-linux-gnu* targets (#1375)

---
 library/stdarch/crates/std_detect/README.md          |  2 ++
 .../crates/std_detect/src/detect/os/linux/auxvec.rs  | 20 +++++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md
index bea7d941a70..82f0c40d338 100644
--- a/library/stdarch/crates/std_detect/README.md
+++ b/library/stdarch/crates/std_detect/README.md
@@ -30,6 +30,8 @@ run-time feature detection. When this feature is disabled, `std_detect` assumes
 that [`getauxval`] is linked to the binary. If that is not the case the behavior
 is undefined.
 
+  Note: This feature is ignored on `*-linux-gnu*` targets, since all `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html)) have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html), and we can safely assume [`getauxval`] is linked to the binary.
+
 * `std_detect_file_io` (enabled by default, requires `std`): Enable to perform run-time feature
 detection using file APIs (e.g. `/proc/cpuinfo`, etc.) if other more performant
 methods fail. This feature requires `libstd` as a dependency, preventing the
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index d9e7b28ea09..8a9da7121b5 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -54,13 +54,21 @@ pub(crate) struct AuxVec {
 /// error, cpuinfo still can (and will) be used to try to perform run-time
 /// feature detecton on some platforms.
 ///
+/// Note: The `std_detect_dlsym_getauxval` cargo feature is ignored on `*-linux-gnu*` targets,
+/// since [all `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html))
+/// have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html),
+/// and we can safely assume [`getauxval`] is linked to the binary.
+///
 /// For more information about when `getauxval` is available check the great
 /// [`auxv` crate documentation][auxv_docs].
 ///
 /// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
 /// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/
 pub(crate) fn auxv() -> Result<AuxVec, ()> {
-    #[cfg(feature = "std_detect_dlsym_getauxval")]
+    #[cfg(all(
+        feature = "std_detect_dlsym_getauxval",
+        not(all(target_os = "linux", target_env = "gnu"))
+    ))]
     {
         // Try to call a dynamically-linked getauxval function.
         if let Ok(hwcap) = getauxval(AT_HWCAP) {
@@ -101,7 +109,10 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
         }
     }
 
-    #[cfg(not(feature = "std_detect_dlsym_getauxval"))]
+    #[cfg(any(
+        not(feature = "std_detect_dlsym_getauxval"),
+        all(target_os = "linux", target_env = "gnu")
+    ))]
     {
         // Targets with only AT_HWCAP:
         #[cfg(any(
@@ -154,7 +165,10 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
 /// Tries to read the `key` from the auxiliary vector by calling the
 /// dynamically-linked `getauxval` function. If the function is not linked,
 /// this function return `Err`.
-#[cfg(feature = "std_detect_dlsym_getauxval")]
+#[cfg(all(
+    feature = "std_detect_dlsym_getauxval",
+    not(all(target_os = "linux", target_env = "gnu"))
+))]
 fn getauxval(key: usize) -> Result<usize, ()> {
     use libc;
     pub type F = unsafe extern "C" fn(usize) -> usize;
-- 
cgit 1.4.1-3-g733a5


From dc4923457433196ed11af9fd43e9233428532230 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Sun, 12 Feb 2023 02:31:21 +0900
Subject: std_detect: Support run-time detection of FEAT_LSE2 on aarch64 BSD
 (#1379)

---
 .../crates/std_detect/src/detect/os/aarch64.rs      | 21 ++++++++++++++++++---
 .../std_detect/src/detect/os/openbsd/aarch64.rs     | 13 ++++++-------
 .../crates/std_detect/tests/cpu-detection.rs        |  1 +
 3 files changed, 25 insertions(+), 10 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
index 7f860f44ebe..5790f016834 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -15,6 +15,7 @@
 //!
 //! - [Zircon implementation](https://fuchsia.googlesource.com/zircon/+/master/kernel/arch/arm64/feature.cpp)
 //! - [Linux documentation](https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt)
+//! - [ARM documentation](https://developer.arm.com/documentation/ddi0601/2022-12/AArch64-Registers?lang=en)
 
 use crate::detect::{cache, Feature};
 use core::arch::asm;
@@ -43,6 +44,16 @@ pub(crate) fn detect_features() -> cache::Initializer {
         );
     }
 
+    // ID_AA64MMFR2_EL1 - AArch64 Memory Model Feature Register 2
+    let aa64mmfr2: u64;
+    unsafe {
+        asm!(
+            "mrs {}, ID_AA64MMFR2_EL1",
+            out(reg) aa64mmfr2,
+            options(pure, nomem, preserves_flags, nostack)
+        );
+    }
+
     // ID_AA64PFR0_EL1 - Processor Feature Register 0
     let aa64pfr0: u64;
     unsafe {
@@ -53,12 +64,13 @@ pub(crate) fn detect_features() -> cache::Initializer {
         );
     }
 
-    parse_system_registers(aa64isar0, aa64isar1, Some(aa64pfr0))
+    parse_system_registers(aa64isar0, aa64isar1, aa64mmfr2, Some(aa64pfr0))
 }
 
 pub(crate) fn parse_system_registers(
     aa64isar0: u64,
     aa64isar1: u64,
+    aa64mmfr2: u64,
     aa64pfr0: Option<u64>,
 ) -> cache::Initializer {
     let mut value = cache::Initializer::default();
@@ -72,7 +84,7 @@ pub(crate) fn parse_system_registers(
     // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
     enable_feature(Feature::pmull, bits_shift(aa64isar0, 7, 4) >= 2);
     enable_feature(Feature::tme, bits_shift(aa64isar0, 27, 24) == 1);
-    enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 1);
+    enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 2);
     enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1);
 
     // ID_AA64PFR0_EL1 - Processor Feature Register 0
@@ -99,13 +111,16 @@ pub(crate) fn parse_system_registers(
         enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1);
     }
 
-    // ID_AA64PFR0_EL1 - Processor Feature Register 0
+    // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1
     // Check for either APA or API field
     enable_feature(Feature::paca, bits_shift(aa64isar1, 11, 4) >= 1);
     enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1);
     // Check for either GPA or GPI field
     enable_feature(Feature::pacg, bits_shift(aa64isar1, 31, 24) >= 1);
 
+    // ID_AA64MMFR2_EL1 - AArch64 Memory Model Feature Register 2
+    enable_feature(Feature::lse2, bits_shift(aa64mmfr2, 35, 32) >= 1);
+
     value
 }
 
diff --git a/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs
index 068dcba7db7..cfe4ad10ad6 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs
@@ -7,13 +7,11 @@
 use crate::detect::cache;
 use core::{mem::MaybeUninit, ptr};
 
-// Defined in sys/sysctl.h.
-// https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/sys/sysctl.h#L82
-const CTL_MACHDEP: libc::c_int = 7;
 // Defined in machine/cpu.h.
 // https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/arch/arm64/include/cpu.h#L25-L40
 const CPU_ID_AA64ISAR0: libc::c_int = 2;
 const CPU_ID_AA64ISAR1: libc::c_int = 3;
+const CPU_ID_AA64MMFR2: libc::c_int = 7;
 const CPU_ID_AA64PFR0: libc::c_int = 8;
 
 /// Try to read the features from the system registers.
@@ -24,13 +22,14 @@ pub(crate) fn detect_features() -> cache::Initializer {
     // https://github.com/openbsd/src/commit/c7654cd65262d532212f65123ee3905ba200365c
     // sysctl returns an unsupported error if operation is not supported,
     // so we can safely use this function on older versions of OpenBSD.
-    let aa64isar0 = sysctl64(&[CTL_MACHDEP, CPU_ID_AA64ISAR0]).unwrap_or(0);
-    let aa64isar1 = sysctl64(&[CTL_MACHDEP, CPU_ID_AA64ISAR1]).unwrap_or(0);
+    let aa64isar0 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64ISAR0]).unwrap_or(0);
+    let aa64isar1 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64ISAR1]).unwrap_or(0);
+    let aa64mmfr2 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64MMFR2]).unwrap_or(0);
     // Do not use unwrap_or(0) because in fp and asimd fields, 0 indicates that
     // the feature is available.
-    let aa64pfr0 = sysctl64(&[CTL_MACHDEP, CPU_ID_AA64PFR0]);
+    let aa64pfr0 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64PFR0]);
 
-    super::aarch64::parse_system_registers(aa64isar0, aa64isar1, aa64pfr0)
+    super::aarch64::parse_system_registers(aa64isar0, aa64isar1, aa64mmfr2, aa64pfr0)
 }
 
 #[inline]
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index ec0875a355e..4fa1b717c8e 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -110,6 +110,7 @@ fn aarch64_bsd() {
     println!("sve: {:?}", is_aarch64_feature_detected!("sve"));
     println!("crc: {:?}", is_aarch64_feature_detected!("crc"));
     println!("lse: {:?}", is_aarch64_feature_detected!("lse"));
+    println!("lse2: {:?}", is_aarch64_feature_detected!("lse2"));
     println!("rdm: {:?}", is_aarch64_feature_detected!("rdm"));
     println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc"));
     println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod"));
-- 
cgit 1.4.1-3-g733a5


From e9d18c6b18f8e490f2d174748334c61456f30101 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Sun, 12 Feb 2023 13:57:57 +0900
Subject: std_detect: Workaround Exynos 9810 bug on aarch64 Android

Samsung Exynos 9810 has a bug that big and little cores have different
ISAs. And on older Android (pre-9), the kernel incorrectly reports
that features available only on some cores are available on all cores.

https://reviews.llvm.org/D114523
---
 .../crates/std_detect/src/detect/os/linux/aarch64.rs  | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index a75185d435f..ac8da8f8c67 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -6,6 +6,25 @@ use crate::detect::{bit, cache, Feature};
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from /proc/cpuinfo.
 pub(crate) fn detect_features() -> cache::Initializer {
+    #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+    {
+        // Samsung Exynos 9810 has a bug that big and little cores have different
+        // ISAs. And on older Android (pre-9), the kernel incorrectly reports
+        // that features available only on some cores are available on all cores.
+        // https://reviews.llvm.org/D114523
+        let mut arch = [0_u8; libc::PROP_VALUE_MAX as usize];
+        let len = unsafe {
+            libc::__system_property_get(
+                b"ro.arch\0".as_ptr() as *const libc::c_char,
+                arch.as_mut_ptr() as *mut libc::c_char,
+            )
+        };
+        // On Exynos, ro.arch is not available on Android 12+, but it is fine
+        // because Android 9+ includes the fix.
+        if len > 0 && arch.starts_with(b"exynos9810") {
+            return cache::Initializer::default();
+        }
+    }
     if let Ok(auxv) = auxvec::auxv() {
         let hwcap: AtHwcap = auxv.into();
         return hwcap.cache();
-- 
cgit 1.4.1-3-g733a5


From 2ebd1395ec6d3ce1bd2cbead500c421e3fba777b Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Sun, 12 Feb 2023 16:25:13 +0900
Subject: std_detect: Only check features that are known to be available on
 armv8.0 cores if CPU is Exynos 9810

---
 .../std_detect/src/detect/os/linux/aarch64.rs      | 36 +++++++++++++++++-----
 1 file changed, 28 insertions(+), 8 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index ac8da8f8c67..7a6f4f4515b 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -7,7 +7,7 @@ use crate::detect::{bit, cache, Feature};
 /// to read them from /proc/cpuinfo.
 pub(crate) fn detect_features() -> cache::Initializer {
     #[cfg(all(target_arch = "aarch64", target_os = "android"))]
-    {
+    let is_exynos9810 = {
         // Samsung Exynos 9810 has a bug that big and little cores have different
         // ISAs. And on older Android (pre-9), the kernel incorrectly reports
         // that features available only on some cores are available on all cores.
@@ -21,18 +21,19 @@ pub(crate) fn detect_features() -> cache::Initializer {
         };
         // On Exynos, ro.arch is not available on Android 12+, but it is fine
         // because Android 9+ includes the fix.
-        if len > 0 && arch.starts_with(b"exynos9810") {
-            return cache::Initializer::default();
-        }
-    }
+        len > 0 && arch.starts_with(b"exynos9810")
+    };
+    #[cfg(not(all(target_arch = "aarch64", target_os = "android")))]
+    let is_exynos9810 = false;
+
     if let Ok(auxv) = auxvec::auxv() {
         let hwcap: AtHwcap = auxv.into();
-        return hwcap.cache();
+        return hwcap.cache(is_exynos9810);
     }
     #[cfg(feature = "std_detect_file_io")]
     if let Ok(c) = super::cpuinfo::CpuInfo::new() {
         let hwcap: AtHwcap = c.into();
-        return hwcap.cache();
+        return hwcap.cache(is_exynos9810);
     }
     cache::Initializer::default()
 }
@@ -228,7 +229,7 @@ impl AtHwcap {
     ///
     /// The feature dependencies here come directly from LLVM's feature definintions:
     /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td
-    fn cache(self) -> cache::Initializer {
+    fn cache(self, is_exynos9810: bool) -> cache::Initializer {
         let mut value = cache::Initializer::default();
         {
             let mut enable_feature = |f, enable| {
@@ -237,6 +238,25 @@ impl AtHwcap {
                 }
             };
 
+            // Samsung Exynos 9810 has a bug that big and little cores have different
+            // ISAs. And on older Android (pre-9), the kernel incorrectly reports
+            // that features available only on some cores are available on all cores.
+            // So, only check features that are known to be available on exynos-m3:
+            // $ rustc --print cfg --target aarch64-linux-android -C target-cpu=exynos-m3 | grep target_feature
+            // See also https://github.com/rust-lang/stdarch/pull/1378#discussion_r1103748342.
+            if is_exynos9810 {
+                enable_feature(Feature::fp, self.fp);
+                enable_feature(Feature::crc, self.crc32);
+                // ASIMD support requires float support - if half-floats are
+                // supported, it also requires half-float support:
+                let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp);
+                enable_feature(Feature::asimd, asimd);
+                // Cryptographic extensions require ASIMD
+                enable_feature(Feature::aes, self.aes && asimd);
+                enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd);
+                return value;
+            }
+
             enable_feature(Feature::fp, self.fp);
             // Half-float support requires float support
             enable_feature(Feature::fp16, self.fp && self.fphp);
-- 
cgit 1.4.1-3-g733a5


From e6b7951ccde53e38126a771fc233169e2f381fd5 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Sun, 12 Feb 2023 16:36:31 +0900
Subject: std_detect: Remove extra cfg

This module is already `#[cfg(target_arch = "aarch64")]`.
---
 library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index 7a6f4f4515b..782317d3575 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -6,7 +6,7 @@ use crate::detect::{bit, cache, Feature};
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from /proc/cpuinfo.
 pub(crate) fn detect_features() -> cache::Initializer {
-    #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+    #[cfg(target_os = "android")]
     let is_exynos9810 = {
         // Samsung Exynos 9810 has a bug that big and little cores have different
         // ISAs. And on older Android (pre-9), the kernel incorrectly reports
@@ -23,7 +23,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
         // because Android 9+ includes the fix.
         len > 0 && arch.starts_with(b"exynos9810")
     };
-    #[cfg(not(all(target_arch = "aarch64", target_os = "android")))]
+    #[cfg(not(target_os = "android"))]
     let is_exynos9810 = false;
 
     if let Ok(auxv) = auxvec::auxv() {
-- 
cgit 1.4.1-3-g733a5


From 1c18225f32669955e3dc3fd928f272ea9ce8c719 Mon Sep 17 00:00:00 2001
From: bwmf2 <bwmf2@protonmail.com>
Date: Sat, 18 Feb 2023 14:04:11 +0100
Subject: Fix typo

---
 .../crates/core_arch/src/aarch64/neon/generated.rs |  4 +-
 .../core_arch/src/arm_shared/neon/generated.rs     | 12 ++--
 .../crates/core_arch/src/riscv_shared/mod.rs       |  8 +--
 library/stdarch/crates/core_arch/src/x86/avx2.rs   |  2 +-
 library/stdarch/crates/core_arch/src/x86/sse.rs    |  2 +-
 library/stdarch/crates/core_arch/src/x86/sse41.rs  |  2 +-
 .../std_detect/src/detect/os/linux/aarch64.rs      |  2 +-
 .../std_detect/src/detect/os/linux/auxvec.rs       |  2 +-
 library/stdarch/crates/stdarch-gen/neon.spec       | 18 ++---
 library/stdarch/crates/stdarch-test/src/lib.rs     |  2 +-
 .../stdarch/crates/stdarch-verify/x86-intel.xml    | 80 +++++++++++-----------
 11 files changed, 67 insertions(+), 67 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 86c5fbc5c50..170e515f5ac 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -16978,7 +16978,7 @@ pub unsafe fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x
     simd_add(a, simd_cast(f))
 }
 
-/// Singned saturating Absolute value
+/// Signed saturating Absolute value
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s64)
 #[inline]
@@ -16994,7 +16994,7 @@ pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
     vqabs_s64_(a)
 }
 
-/// Singned saturating Absolute value
+/// Signed saturating Absolute value
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s64)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index db073f4e87b..3f016221092 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -29382,7 +29382,7 @@ pub unsafe fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
     simd_add(a, simd_cast(e))
 }
 
-/// Singned saturating Absolute value
+/// Signed saturating Absolute value
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)
 #[inline]
@@ -29401,7 +29401,7 @@ pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t {
 vqabs_s8_(a)
 }
 
-/// Singned saturating Absolute value
+/// Signed saturating Absolute value
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)
 #[inline]
@@ -29420,7 +29420,7 @@ pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
 vqabsq_s8_(a)
 }
 
-/// Singned saturating Absolute value
+/// Signed saturating Absolute value
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)
 #[inline]
@@ -29439,7 +29439,7 @@ pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t {
 vqabs_s16_(a)
 }
 
-/// Singned saturating Absolute value
+/// Signed saturating Absolute value
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)
 #[inline]
@@ -29458,7 +29458,7 @@ pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
 vqabsq_s16_(a)
 }
 
-/// Singned saturating Absolute value
+/// Signed saturating Absolute value
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)
 #[inline]
@@ -29477,7 +29477,7 @@ pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t {
 vqabs_s32_(a)
 }
 
-/// Singned saturating Absolute value
+/// Signed saturating Absolute value
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs b/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs
index 0e35fe1f1f5..a41b88f6127 100644
--- a/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs
+++ b/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs
@@ -488,7 +488,7 @@ pub unsafe fn hinval_gvma_all() {
 /// Register `fcsr` is a 32-bit read/write register that selects the dynamic rounding mode
 /// for floating-point arithmetic operations and holds the accrued exception flag.
 ///
-/// Accoding to "F" Standard Extension for Single-Precision Floating-Point, Version 2.2,
+/// According to "F" Standard Extension for Single-Precision Floating-Point, Version 2.2,
 /// register `fcsr` is defined as:
 ///
 /// | Bit index | Meaning |
@@ -521,7 +521,7 @@ pub fn fscsr(value: u32) -> u32 {
 
 /// Reads the floating-point rounding mode register `frm`
 ///
-/// Accoding to "F" Standard Extension for Single-Precision Floating-Point, Version 2.2,
+/// According to "F" Standard Extension for Single-Precision Floating-Point, Version 2.2,
 /// the rounding mode field is defined as listed in the table below:
 ///
 /// | Rounding Mode | Mnemonic | Meaning |
@@ -558,8 +558,8 @@ pub fn fsrm(value: u32) -> u32 {
 /// The accrued exception flags indicate the exception conditions that have arisen
 /// on any floating-point arithmetic instruction since the field was last reset by software.
 ///
-/// Accoding to "F" Standard Extension for Single-Precision Floating-Point, Version 2.2,
-/// the accured exception flags is defined as a bit vector of 5 bits.
+/// According to "F" Standard Extension for Single-Precision Floating-Point, Version 2.2,
+/// the accrued exception flags is defined as a bit vector of 5 bits.
 /// The meaning of each binary bit is listed in the table below.
 ///
 /// | Bit index | Mnemonic | Meaning |
diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs
index 0276c6775e8..963831cbfe5 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs
@@ -2277,7 +2277,7 @@ pub unsafe fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
     )
 }
 
-/// Shuffles eight 32-bit foating-point elements in `a` across lanes using
+/// Shuffles eight 32-bit floating-point elements in `a` across lanes using
 /// the corresponding 32-bit integer index in `idx`.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar8x32_ps)
diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index cc64c692bd8..63cd775c52c 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -1380,7 +1380,7 @@ pub unsafe fn _mm_getcsr() -> u32 {
 
 /// Sets the MXCSR register with the 32-bit unsigned integer value.
 ///
-/// This register constrols how SIMD instructions handle floating point
+/// This register controls how SIMD instructions handle floating point
 /// operations. Modifying this register only affects the current thread.
 ///
 /// It contains several groups of flags:
diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs
index 12b9ce22ae4..be97c502a83 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse41.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs
@@ -689,7 +689,7 @@ pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 {
 
 /// Round the lower double-precision (64-bit) floating-point element in `b`
 /// up to an integer value, store the result as a double-precision
-/// floating-point element in the lower element of the intrisic result,
+/// floating-point element in the lower element of the intrinsic result,
 /// and copies the upper element from `a` to the upper element
 /// of the intrinsic result.
 ///
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index 782317d3575..caaa39f1463 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -227,7 +227,7 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
 impl AtHwcap {
     /// Initializes the cache from the feature -bits.
     ///
-    /// The feature dependencies here come directly from LLVM's feature definintions:
+    /// The feature dependencies here come directly from LLVM's feature definitions:
     /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td
     fn cache(self, is_exynos9810: bool) -> cache::Initializer {
         let mut value = cache::Initializer::default();
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 8a9da7121b5..11d9c103ec1 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -52,7 +52,7 @@ pub(crate) struct AuxVec {
 /// Note that run-time feature detection is not invoked for features that can
 /// be detected at compile-time. Also note that if this function returns an
 /// error, cpuinfo still can (and will) be used to try to perform run-time
-/// feature detecton on some platforms.
+/// feature detection on some platforms.
 ///
 /// Note: The `std_detect_dlsym_getauxval` cargo feature is ignored on `*-linux-gnu*` targets,
 /// since [all `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html))
diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec
index 58910f8c2ac..f2c1e200d09 100644
--- a/library/stdarch/crates/stdarch-gen/neon.spec
+++ b/library/stdarch/crates/stdarch-gen/neon.spec
@@ -14,7 +14,7 @@
 // Sections start with EXACTLY three slashes followed
 // by AT LEAST one space. Sections are used for two things:
 //
-// 1) they serve as the doc comment for the given intrinics.
+// 1) they serve as the doc comment for the given intrinsics.
 // 2) they reset all variables (name, fn, etc.)
 //
 // # Variables
@@ -29,16 +29,16 @@
 //           the function will exclusively be generated for
 //           aarch64.
 //           This is used to generate both aarch64 specific and
-//           shared intrinics by first only specifying th aarch64
+//           shared intrinsics by first only specifying th aarch64
 //           variant then the arm variant.
 //
-// arm     - The arm v7 intrinics used to checked for arm code
+// arm     - The arm v7 intrinsics used to checked for arm code
 //           generation. All neon functions available in arm are
-//           also available in aarch64. If no aarch64 intrinic was
+//           also available in aarch64. If no aarch64 intrinsic was
 //           set they are assumed to be the same.
-//           Intrinics ending with a `.` will have a size suffixes
+//           Intrinsics ending with a `.` will have a size suffixes
 //           added (such as `i8` or `i64`) that is not sign specific
-//           Intrinics ending with a `.s` will have a size suffixes
+//           Intrinsics ending with a `.s` will have a size suffixes
 //           added (such as `s8` or `u64`) that is sign specific
 //
 // a       - First input for tests, it gets scaled to the size of
@@ -7490,10 +7490,10 @@ aarch64 = sabal
 generate int64x2_t:int32x4_t:int32x4_t:int64x2_t
 
 ////////////////////
-// Singned saturating Absolute value
+// Signed saturating Absolute value
 ////////////////////
 
-/// Singned saturating Absolute value
+/// Signed saturating Absolute value
 name = vqabs
 a = MIN, MAX, -6, -5, -4, -3, -2, -1, 0, -127, 127, 1, 2, 3, 4, 5
 validate MAX, MAX, 6, 5, 4, 3, 2, 1, 0, 127, 127, 1, 2, 3, 4, 5
@@ -7504,7 +7504,7 @@ link-arm = vqabs._EXT_
 link-aarch64 = sqabs._EXT_
 generate int*_t
 
-/// Singned saturating Absolute value
+/// Signed saturating Absolute value
 name = vqabs
 a = MIN, -7
 validate MAX, 7
diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs
index e0cf46cb4db..61bbff2f9b7 100644
--- a/library/stdarch/crates/stdarch-test/src/lib.rs
+++ b/library/stdarch/crates/stdarch-test/src/lib.rs
@@ -115,7 +115,7 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
                 "cpuid" => 30,
 
                 // Apparently, on Windows, LLVM generates a bunch of
-                // saves/restores of xmm registers around these intstructions,
+                // saves/restores of xmm registers around these instructions,
                 // which exceeds the limit of 20 below. As it seems dictated by
                 // Windows's ABI (I believe?), we probably can't do much
                 // about it.
diff --git a/library/stdarch/crates/stdarch-verify/x86-intel.xml b/library/stdarch/crates/stdarch-verify/x86-intel.xml
index 264ecee0e64..c875b480906 100644
--- a/library/stdarch/crates/stdarch-verify/x86-intel.xml
+++ b/library/stdarch/crates/stdarch-verify/x86-intel.xml
@@ -350,7 +350,7 @@ zero_tileconfig_start()
 	<parameter type="__tile" varname="dst"/>
 	<parameter type="const void *" varname="base"/>
 	<parameter type="int" varname="stride" etype="UI32"/>
-	<description>Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst" using the tile configuration previously configured via "_tile_loadconfig".</description>
+	<description>Load tile rows from memory specified by "base" address and "stride" into destination tile "dst" using the tile configuration previously configured via "_tile_loadconfig".</description>
 	<operation>start := tileconfig.startRow
 IF start == 0 // not restarting, zero incoming state
 	tilezero(dst)
@@ -375,7 +375,7 @@ zero_tileconfig_start()
 	<parameter type="__tile" varname="dst"/>
 	<parameter type="const void *" varname="base"/>
 	<parameter type="int" varname="stride" etype="UI32"/>
-	<description>Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst" using the tile configuration previously configured via "_tile_loadconfig". This intrinsic provides a hint to the implementation that the data will likely not be reused in the near future and the data caching can be optimized accordingly.</description>
+	<description>Load tile rows from memory specified by "base" address and "stride" into destination tile "dst" using the tile configuration previously configured via "_tile_loadconfig". This intrinsic provides a hint to the implementation that the data will likely not be reused in the near future and the data caching can be optimized accordingly.</description>
 	<operation>start := tileconfig.startRow
 IF start == 0 // not restarting, zero incoming state
 	tilezero(dst)
@@ -409,7 +409,7 @@ zero_tileconfig_start()
 	<parameter type="__tile" varname="src" />
 	<parameter type="void *" varname="base"/>
 	<parameter type="int" varname="stride" etype="UI32"/>
-	<description>Store the tile specified by "src" to memory specifieid by "base" address and "stride" using the tile configuration previously configured via "_tile_loadconfig".</description>
+	<description>Store the tile specified by "src" to memory specified by "base" address and "stride" using the tile configuration previously configured via "_tile_loadconfig".</description>
 	<operation>start := tileconfig.startRow
 DO WHILE start &lt; src.rows
 	memptr := base + start * stride
@@ -102068,7 +102068,7 @@ dst[MAX:512] := 0
 	<category>Trigonometry</category>
 	<return type="__m512" varname="dst" etype="FP32"/>
 	<parameter type="__m512" varname="a" etype="FP32"/>
-	<description>Compute the inverse hyperblic tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians.</description>
+	<description>Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians.</description>
 	<operation>
 FOR j := 0 to 15
 	i := j*32
@@ -124016,7 +124016,7 @@ ESAC
 	<return type="void"/>
 	<parameter type="char const*" varname="p" etype="UI8"/>
 	<parameter type="int" varname="i" etype="IMM" immwidth="2"/>
-	<description>Fetch the line of data from memory that contains address "p" to a location in the cache heirarchy specified by the locality hint "i".</description>
+	<description>Fetch the line of data from memory that contains address "p" to a location in the cache hierarchy specified by the locality hint "i".</description>
 	<instruction name="VPREFETCH0" form="m8"/>
 	<instruction name="VPREFETCH1" form="m8"/>
 	<instruction name="VPREFETCH2" form="m8"/>
@@ -127958,7 +127958,7 @@ dst[MAX:512] := 0
 	<parameter type="void const *" varname="mt" etype="FP64"/>
 	<parameter type="_MM_UPCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PD"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elemenst are skipped when the corresponding mask bit is not set).</description>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>
 DEFINE UPCONVERT(addr, offset, convertTo) {
 	CASE conv OF
@@ -128000,7 +128000,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512i" varname="v1" etype="UI32"/>
 	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
 	<operation>DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
 	_MM_DOWNCONV_EPI32_NONE:
@@ -128065,7 +128065,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512i" varname="v1" etype="UI32"/>
 	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresonding mask bit is not set).</description>
+	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
 	_MM_DOWNCONV_EPI32_NONE:
@@ -128131,7 +128131,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512i" varname="v1" etype="UI32"/>
 	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
 	<operation>
 DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
@@ -128193,7 +128193,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512i" varname="v1" etype="UI32"/>
 	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are written to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are written to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>
 DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
@@ -128256,7 +128256,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512i" varname="v1" etype="UI64"/>
 	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
 	<operation>DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
 	_MM_UPCONV_EPI64_NONE:
@@ -128303,7 +128303,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512i" varname="v1" etype="UI64"/>
 	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (mt-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresonding mask bit is not set).</description>
+	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (mt-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
 	_MM_UPCONV_EPI64_NONE:
@@ -128351,7 +128351,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512i" varname="v1" etype="UI64"/>
 	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
 	<operation>
 DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
@@ -128395,7 +128395,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512i" varname="v1" etype="UI64"/>
 	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped whent he corresponding mask bit is not set).</description>
+	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>
 DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
@@ -128440,7 +128440,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512" varname="v1" etype="FP32"/>
 	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
 	<operation>DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
 	_MM_UPCONV_PS_NONE:
@@ -128509,7 +128509,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512" varname="v1" etype="FP32"/>
 	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
 	_MM_UPCONV_PS_NONE:
@@ -128579,7 +128579,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512" varname="v1" etype="FP32"/>
 	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
 	<operation>
 DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
@@ -128645,7 +128645,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512" varname="v1" etype="FP32"/>
 	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>
 DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
@@ -128712,7 +128712,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512d" varname="v1" etype="FP64"/>
 	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
 	<operation>DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
 	_MM_UPCONV_PD_NONE:
@@ -128759,7 +128759,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512d" varname="v1" etype="FP64"/>
 	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
 	_MM_UPCONV_PD_NONE:
@@ -128807,7 +128807,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512d" varname="v1" etype="FP64"/>
 	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
 	<operation>
 DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
@@ -128851,7 +128851,7 @@ dst[MAX:512] := 0
 	<parameter type="__m512d" varname="v1" etype="FP64"/>
 	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
-	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>
 DEFINE DOWNCONVERT(element, convertTo) {
 	CASE convertTo OF
@@ -129460,7 +129460,7 @@ ENDFOR
 	<return type="void"/>
 	<parameter type="void*" varname="mt" etype="UI32" memwidth="512"/>
 	<parameter type="__m512i" varname="v1" etype="UI32"/>
-	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
+	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt").</description>
 	<operation>
 storeOffset := 0
 addr := mt
@@ -129484,7 +129484,7 @@ ENDFOR
 	<parameter type="void*" varname="mt" etype="UI32" memwidth="512"/>
 	<parameter type="__mmask16" varname="k" etype="MASK"/>
 	<parameter type="__m512i" varname="v1" etype="UI32"/>
-	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>
 storeOffset := 0
 addr := mt
@@ -129509,7 +129509,7 @@ ENDFOR
 	<return type="void"/>
 	<parameter type="void*" varname="mt" etype="UI64" memwidth="512"/>
 	<parameter type="__m512i" varname="v1" etype="UI64"/>
-	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
+	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
 	<operation>
 storeOffset := 0
 foundNext64BytesBoundary := 0
@@ -129537,7 +129537,7 @@ ENDFOR
 	<parameter type="void*" varname="mt" etype="UI64" memwidth="512"/>
 	<parameter type="__mmask8" varname="k" etype="MASK"/>
 	<parameter type="__m512i" varname="v1" etype="UI64"/>
-	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>
 storeOffset := 0
 foundNext64BytesBoundary := 0
@@ -129566,7 +129566,7 @@ ENDFOR
 	<return type="void"/>
 	<parameter type="void*" varname="mt" etype="UI64" memwidth="512"/>
 	<parameter type="__m512i" varname="v1" etype="UI64"/>
-	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
+	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt").</description>
 	<operation>
 storeOffset := 0
 addr := mt
@@ -129590,7 +129590,7 @@ ENDFOR
 	<parameter type="void*" varname="mt" etype="UI64" memwidth="512"/>
 	<parameter type="__mmask8" varname="k" etype="MASK"/>
 	<parameter type="__m512i" varname="v1" etype="UI64"/>
-	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>
 storeOffset := 0
 addr := mt
@@ -129615,7 +129615,7 @@ ENDFOR
 	<return type="void"/>
 	<parameter type="void*" varname="mt" etype="FP32" memwidth="512"/>
 	<parameter type="__m512" varname="v1" etype="FP32"/>
-	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
+	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
 	<operation>
 storeOffset := 0
 foundNext64BytesBoundary := 0
@@ -129643,7 +129643,7 @@ ENDFOR
 	<parameter type="void*" varname="mt" etype="FP32" memwidth="512"/>
 	<parameter type="__mmask16" varname="k" etype="MASK"/>
 	<parameter type="__m512" varname="v1" etype="FP32"/>
-	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>
 storeOffset := 0
 foundNext64BytesBoundary := 0
@@ -129672,7 +129672,7 @@ ENDFOR
 	<return type="void"/>
 	<parameter type="void*" varname="mt" etype="FP32" memwidth="512"/>
 	<parameter type="__m512" varname="v1" etype="FP32"/>
-	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
+	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt").</description>
 	<operation>
 storeOffset := 0
 addr := mt
@@ -129696,7 +129696,7 @@ ENDFOR
 	<parameter type="void*" varname="mt" etype="FP32" memwidth="512"/>
 	<parameter type="__mmask16" varname="k" etype="MASK"/>
 	<parameter type="__m512" varname="v1" etype="FP32"/>
-	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>
 storeOffset := 0
 addr := mt
@@ -129721,7 +129721,7 @@ ENDFOR
 	<return type="void"/>
 	<parameter type="void*" varname="mt" etype="FP64" memwidth="512"/>
 	<parameter type="__m512d" varname="v1" etype="FP64"/>
-	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
+	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
 	<operation>
 storeOffset := 0
 foundNext64BytesBoundary := 0
@@ -129749,7 +129749,7 @@ ENDFOR
 	<parameter type="void*" varname="mt" etype="FP64" memwidth="512"/>
 	<parameter type="__mmask8" varname="k" etype="MASK"/>
 	<parameter type="__m512d" varname="v1" etype="FP64"/>
-	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>
 storeOffset := 0
 foundNext64BytesBoundary := 0
@@ -129778,7 +129778,7 @@ ENDFOR
 	<return type="void"/>
 	<parameter type="void*" varname="mt" etype="FP64" memwidth="512"/>
 	<parameter type="__m512d" varname="v1" etype="FP64"/>
-	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
+	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt").</description>
 	<operation>
 storeOffset := 0
 addr := mt
@@ -129802,7 +129802,7 @@ ENDFOR
 	<parameter type="void*" varname="mt" etype="FP64" memwidth="512"/>
 	<parameter type="__mmask8" varname="k" etype="MASK"/>
 	<parameter type="__m512d" varname="v1" etype="FP64"/>
-	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address following "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
 	<operation>
 storeOffset := 0
 addr := mt
@@ -131673,7 +131673,7 @@ ENDFOR
 	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
 	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
 	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
-	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "a" using "conv" and stores them in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale". Elements are written to memory using writemask "k" (elements are not stored to memory when the corresponding mask bit is not set; the memory location is left unchagned). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "a" using "conv" and stores them in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale". Elements are written to memory using writemask "k" (elements are not stored to memory when the corresponding mask bit is not set; the memory location is left unchanged). "hint" indicates to the processor whether the data is non-temporal.</description>
 	<operation>
 FOR j := 0 to 7
 	i := j*64
@@ -135543,7 +135543,7 @@ ENDFOR
 	<return type="void"/>
 	<parameter type="char const*" varname="p" etype="UI8"/>
 	<parameter type="int" varname="i" etype="IMM" immwidth="2"/>
-	<description>Fetch the line of data from memory that contains address "p" to a location in the cache heirarchy specified by the locality hint "i".</description>
+	<description>Fetch the line of data from memory that contains address "p" to a location in the cache hierarchy specified by the locality hint "i".</description>
 	<instruction name="PREFETCHWT1" form="m8" xed="PREFETCHWT1_MEMu8"/>
 	<header>xmmintrin.h</header>
 </intrinsic>
@@ -136102,7 +136102,7 @@ MXCSR := a[31:0]
 	<return type="void"/>
 	<parameter type="char const*" varname="p" etype="UI8"/>
 	<parameter type="int" varname="i" etype="IMM" immwidth="2"/>
-	<description>Fetch the line of data from memory that contains address "p" to a location in the cache heirarchy specified by the locality hint "i".</description>
+	<description>Fetch the line of data from memory that contains address "p" to a location in the cache hierarchy specified by the locality hint "i".</description>
 	<instruction name="PREFETCHNTA" form="m8" xed="PREFETCHNTA_MEMmprefetch"/>
 	<instruction name="PREFETCHT0" form="m8" xed="PREFETCHT0_MEMmprefetch"/>
 	<instruction name="PREFETCHT1" form="m8" xed="PREFETCHT1_MEMmprefetch"/>
@@ -148134,4 +148134,4 @@ ENDFOR
 	<instruction name="XRSTORS64" form="m8" xed="XRSTORS64_MEMmxsave"/>
 	<header>immintrin.h</header>
 </intrinsic>
-</intrinsics_list>
\ No newline at end of file
+</intrinsics_list>
-- 
cgit 1.4.1-3-g733a5


From 8f9ed37be7c9e9cf2bb9e427d1c1f5a793a0f2e8 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Thu, 2 Mar 2023 03:41:28 +0900
Subject: std_detect: Support run-time detection of fp on aarch64 Windows

According to google/cpu_features, IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE)
returns whether fp is available.
https://github.com/google/cpu_features/blob/a6bf4f9031ec601f905660b8cef82d7478b33d64/src/impl_aarch64_windows.c#L112-L113
---
 .../std_detect/src/detect/os/windows/aarch64.rs    | 24 ++++++++++------------
 .../crates/std_detect/tests/cpu-detection.rs       |  1 +
 2 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
index 119c905e03b..faded671cc0 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
@@ -11,6 +11,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
     // The following Microsoft documents isn't updated for aarch64.
     // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
     // These are defined in winnt.h of Windows SDK
+    const PF_ARM_VFP_32_REGISTERS_AVAILABLE: u32 = 18;
     const PF_ARM_NEON_INSTRUCTIONS_AVAILABLE: u32 = 19;
     const PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE: u32 = 30;
     const PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE: u32 = 31;
@@ -31,10 +32,14 @@ pub(crate) fn detect_features() -> cache::Initializer {
             }
         };
 
-        // Some features such Feature::fp may be supported on current CPU,
+        // Some features may be supported on current CPU,
         // but no way to detect it by OS API.
         // Also, we require unsafe block for the extern "system" calls.
         unsafe {
+            enable_feature(
+                Feature::fp,
+                IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) != FALSE,
+            );
             enable_feature(
                 Feature::asimd,
                 IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != FALSE,
@@ -61,18 +66,11 @@ pub(crate) fn detect_features() -> cache::Initializer {
             );
             // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and
             // pmull support
-            enable_feature(
-                Feature::aes,
-                IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE,
-            );
-            enable_feature(
-                Feature::pmull,
-                IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE,
-            );
-            enable_feature(
-                Feature::sha2,
-                IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE,
-            );
+            let crypto =
+                IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE;
+            enable_feature(Feature::aes, crypto);
+            enable_feature(Feature::pmull, crypto);
+            enable_feature(Feature::sha2, crypto);
         }
     }
     value
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index ff0b0722e69..e0ba88f0614 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -87,6 +87,7 @@ fn aarch64_linux() {
 #[cfg(all(target_arch = "aarch64", target_os = "windows"))]
 fn aarch64_windows() {
     println!("asimd: {:?}", is_aarch64_feature_detected!("asimd"));
+    println!("fp: {:?}", is_aarch64_feature_detected!("fp"));
     println!("crc: {:?}", is_aarch64_feature_detected!("crc"));
     println!("lse: {:?}", is_aarch64_feature_detected!("lse"));
     println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod"));
-- 
cgit 1.4.1-3-g733a5


From ba58f917696d4ca2a18ab01177b19c35c3bc8b91 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Thu, 2 Mar 2023 03:21:34 +0900
Subject: std_detect: Support run-time detection of crc/aes/sha2/crypto on arm
 FreeBSD

---
 .../crates/std_detect/src/detect/os/freebsd/arm.rs | 22 ++++++++++++++++++++--
 .../crates/std_detect/tests/cpu-detection.rs       |  7 +++++--
 2 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
index 4c9d763b446..97ede1d26b7 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
@@ -3,6 +3,15 @@
 use super::auxvec;
 use crate::detect::{cache, Feature};
 
+// Defined in machine/elf.h.
+// https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/arm/include/elf.h
+const HWCAP_NEON: usize = 0x00001000;
+const HWCAP2_AES: usize = 0x00000001;
+const HWCAP2_PMULL: usize = 0x00000002;
+const HWCAP2_SHA1: usize = 0x00000004;
+const HWCAP2_SHA2: usize = 0x00000008;
+const HWCAP2_CRC32: usize = 0x00000010;
+
 /// Try to read the features from the auxiliary vector
 pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
@@ -13,8 +22,17 @@ pub(crate) fn detect_features() -> cache::Initializer {
     };
 
     if let Ok(auxv) = auxvec::auxv() {
-        enable_feature(&mut value, Feature::neon, auxv.hwcap & 0x00001000 != 0);
-        enable_feature(&mut value, Feature::pmull, auxv.hwcap2 & 0x00000002 != 0);
+        enable_feature(&mut value, Feature::neon, auxv.hwcap & HWCAP_NEON != 0);
+        let pmull = auxv.hwcap2 & HWCAP2_PMULL != 0;
+        enable_feature(&mut value, Feature::pmull, pmull);
+        enable_feature(&mut value, Feature::crc, auxv.hwcap2 & HWCAP2_CRC32 != 0);
+        let aes = auxv.hwcap2 & HWCAP2_AES != 0;
+        enable_feature(&mut value, Feature::aes, aes);
+        // SHA2 requires SHA1 & SHA2 features
+        let sha1 = auxv.hwcap2 & HWCAP2_SHA1 != 0;
+        let sha2 = auxv.hwcap2 & HWCAP2_SHA2 != 0;
+        enable_feature(&mut value, Feature::sha2, sha1 && sha2);
+        enable_feature(&mut value, Feature::crypto, aes && pmull && sha1 && sha2);
         return value;
     }
     value
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index e0ba88f0614..eb3a3e40940 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -20,8 +20,11 @@ fn all() {
 }
 
 #[test]
-#[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android")))]
-fn arm_linux() {
+#[cfg(all(
+    target_arch = "arm",
+    any(target_os = "linux", target_os = "android", target_os = "freebsd"),
+))]
+fn arm_linux_or_freebsd() {
     println!("neon: {}", is_arm_feature_detected!("neon"));
     println!("pmull: {}", is_arm_feature_detected!("pmull"));
     println!("crc: {}", is_arm_feature_detected!("crc"));
-- 
cgit 1.4.1-3-g733a5


From 1f0116fea95d42ec02a1927c69aabc7305969dcf Mon Sep 17 00:00:00 2001
From: Chase Wilson <me@chasewilson.dev>
Date: Fri, 30 Dec 2022 17:33:45 -0600
Subject: Implemented is_x86_feature_detected!(ermsb)

---
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 3 +++
 library/stdarch/crates/std_detect/src/detect/os/x86.rs   | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 7e0af21a223..828ac5c38a2 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -92,6 +92,7 @@ features! {
     /// * `"adx"`
     /// * `"rtm"`
     /// * `"movbe"`
+    /// * `"ermsb"`
     ///
     /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
     #[stable(feature = "simd_x86", since = "1.27.0")]
@@ -200,4 +201,6 @@ features! {
     /// RTM, Intel (Restricted Transactional Memory)
     @FEATURE: #[stable(feature = "movbe_target_feature", since = "1.67.0")] movbe: "movbe";
     /// MOVBE (Move Data After Swapping Bytes)
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ermsb: "ermsb";
+    /// ERMSB, Enhanced REP MOVSB and STOSB
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 46cde892195..d8afc1aca4f 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -129,6 +129,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
         enable(extended_features_ebx, 3, Feature::bmi1);
         enable(extended_features_ebx, 8, Feature::bmi2);
 
+        enable(extended_features_ebx, 9, Feature::ermsb);
+
         // `XSAVE` and `AVX` support:
         let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
         if cpu_xsave {
-- 
cgit 1.4.1-3-g733a5


From e41918d76cecd96110089a9d3f0b60a9283e0071 Mon Sep 17 00:00:00 2001
From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com>
Date: Thu, 9 Mar 2023 21:02:29 +0000
Subject: Allow `internal_features`

---
 library/stdarch/crates/std_detect/src/lib.rs | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index c0e0de0dd5e..c0819218cb8 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -19,6 +19,9 @@
 #![deny(clippy::missing_inline_in_public_items)]
 #![cfg_attr(test, allow(unused_imports))]
 #![no_std]
+// FIXME(Nilstrieb): Remove this once the compiler in stdarch CI has the internal_features lint.
+#![allow(unknown_lints)]
+#![allow(internal_features)]
 
 #[cfg(test)]
 #[macro_use]
-- 
cgit 1.4.1-3-g733a5


From 0af4d8a30782705ef476ff1577cd163705959357 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Fri, 24 Mar 2023 19:11:16 +0000
Subject: std_detect: Unbreak auxv_from_file

Discovered on powerpc64 where it crashes since it has more entries.
---
 .../stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 11d9c103ec1..71be2f9104f 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -189,13 +189,12 @@ fn getauxval(key: usize) -> Result<usize, ()> {
 pub(super) fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
     let file = super::read_file(file)?;
 
-    // See <https://github.com/torvalds/linux/blob/v3.19/include/uapi/linux/auxvec.h>.
+    // See <https://github.com/torvalds/linux/blob/v5.15/include/uapi/linux/auxvec.h>.
     //
-    // The auxiliary vector contains at most 32 (key,value) fields: from
-    // `AT_EXECFN = 31` to `AT_NULL = 0`. That is, a buffer of
-    // 2*32 `usize` elements is enough to read the whole vector.
-    let mut buf = [0_usize; 64];
-    let len = core::mem::size_of_val(&buf).max(file.len());
+    // The auxiliary vector contains at most 34 (key,value) fields: from
+    // `AT_MINSIGSTKSZ` to `AT_NULL`, but its number may increase.
+    let len = file.len();
+    let mut buf = alloc::vec![0_usize; 1 + len / core::mem::size_of::<usize>()];
     unsafe {
         core::ptr::copy_nonoverlapping(file.as_ptr(), buf.as_mut_ptr() as *mut u8, len);
     }
@@ -206,7 +205,7 @@ pub(super) fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
 /// Tries to interpret the `buffer` as an auxiliary vector. If that fails, this
 /// function returns `Err`.
 #[cfg(feature = "std_detect_file_io")]
-fn auxv_from_buf(buf: &[usize; 64]) -> Result<AuxVec, ()> {
+fn auxv_from_buf(buf: &[usize]) -> Result<AuxVec, ()> {
     // Targets with only AT_HWCAP:
     #[cfg(any(
         target_arch = "riscv32",
-- 
cgit 1.4.1-3-g733a5


From 1661f06cd05d85a28081a5e30aa535841d083c34 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Sat, 1 Apr 2023 22:48:42 +0900
Subject: std_detect: Always avoid dlsym on *-android* targets

---
 library/stdarch/crates/std_detect/README.md             |  9 +++++++--
 .../crates/std_detect/src/detect/os/linux/auxvec.rs     | 17 +++++++++++------
 2 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md
index 71f474d658e..5211771047f 100644
--- a/library/stdarch/crates/std_detect/README.md
+++ b/library/stdarch/crates/std_detect/README.md
@@ -30,14 +30,19 @@ run-time feature detection. When this feature is disabled, `std_detect` assumes
 that [`getauxval`] is linked to the binary. If that is not the case the behavior
 is undefined.
 
-  Note: This feature is ignored on `*-linux-gnu*` targets, since all `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html)) have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html), and we can safely assume [`getauxval`] is linked to the binary.
+  Note: This feature is ignored on `*-linux-gnu*` and `*-android*` targets
+  because we can safely assume `getauxval` is linked to the binary.
+  * `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html))
+  have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html).
+  * `*-android*` targets ([since Rust 1.68](https://blog.rust-lang.org/2023/01/09/android-ndk-update-r25.html))
+  have the minimum supported API level higher than [Android 4.3 (API level 18) that added `getauxval`](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49).
 
 * `std_detect_file_io` (enabled by default, requires `std`): Enable to perform run-time feature
 detection using file APIs (e.g. `/proc/cpuinfo`, etc.) if other more performant
 methods fail. This feature requires `libstd` as a dependency, preventing the
 crate from working on applications in which `std` is not available.
 
-[`getauxval`]: http://man7.org/linux/man-pages/man3/getauxval.3.html
+[`getauxval`]: https://man7.org/linux/man-pages/man3/getauxval.3.html
 
 # Platform support
 
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 71be2f9104f..966dbf3c98b 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -54,10 +54,13 @@ pub(crate) struct AuxVec {
 /// error, cpuinfo still can (and will) be used to try to perform run-time
 /// feature detection on some platforms.
 ///
-/// Note: The `std_detect_dlsym_getauxval` cargo feature is ignored on `*-linux-gnu*` targets,
-/// since [all `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html))
-/// have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html),
-/// and we can safely assume [`getauxval`] is linked to the binary.
+///  Note: The `std_detect_dlsym_getauxval` cargo feature is ignored on
+/// `*-linux-gnu*` and `*-android*` targets because we can safely assume `getauxval`
+/// is linked to the binary.
+/// - `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html))
+///   have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html).
+/// - `*-android*` targets ([since Rust 1.68](https://blog.rust-lang.org/2023/01/09/android-ndk-update-r25.html))
+///   have the minimum supported API level higher than [Android 4.3 (API level 18) that added `getauxval`](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49).
 ///
 /// For more information about when `getauxval` is available check the great
 /// [`auxv` crate documentation][auxv_docs].
@@ -67,7 +70,8 @@ pub(crate) struct AuxVec {
 pub(crate) fn auxv() -> Result<AuxVec, ()> {
     #[cfg(all(
         feature = "std_detect_dlsym_getauxval",
-        not(all(target_os = "linux", target_env = "gnu"))
+        not(all(target_os = "linux", target_env = "gnu")),
+        not(target_os = "android"),
     ))]
     {
         // Try to call a dynamically-linked getauxval function.
@@ -111,7 +115,8 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
 
     #[cfg(any(
         not(feature = "std_detect_dlsym_getauxval"),
-        all(target_os = "linux", target_env = "gnu")
+        all(target_os = "linux", target_env = "gnu"),
+        target_os = "android",
     ))]
     {
         // Targets with only AT_HWCAP:
-- 
cgit 1.4.1-3-g733a5


From 221eb8898647504750d895a903be81ec52cda20c Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Sat, 1 Apr 2023 23:45:10 +0900
Subject: std_detect: Remove support for arm crypto target feature

---
 library/stdarch/crates/std_detect/src/detect/arch/arm.rs |  2 --
 .../crates/std_detect/src/detect/os/freebsd/arm.rs       |  7 ++-----
 .../stdarch/crates/std_detect/src/detect/os/linux/arm.rs | 16 ----------------
 library/stdarch/crates/std_detect/tests/cpu-detection.rs |  1 -
 4 files changed, 2 insertions(+), 24 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index 897dc314c79..a7dea27fb3f 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -17,8 +17,6 @@ features! {
     /// Polynomial Multiply
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crc: "crc";
     /// CRC32 (Cyclic Redundancy Check)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crypto: "crypto";
-    /// Crypto: AES + PMULL + SHA1 + SHA256. Prefer using the individual features where possible.
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] aes: "aes";
     /// FEAT_AES (AES instructions)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sha2: "sha2";
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
index 97ede1d26b7..d904eaebd14 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
@@ -23,16 +23,13 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
     if let Ok(auxv) = auxvec::auxv() {
         enable_feature(&mut value, Feature::neon, auxv.hwcap & HWCAP_NEON != 0);
-        let pmull = auxv.hwcap2 & HWCAP2_PMULL != 0;
-        enable_feature(&mut value, Feature::pmull, pmull);
+        enable_feature(&mut value, Feature::pmull, auxv.hwcap2 & HWCAP2_PMULL != 0);
         enable_feature(&mut value, Feature::crc, auxv.hwcap2 & HWCAP2_CRC32 != 0);
-        let aes = auxv.hwcap2 & HWCAP2_AES != 0;
-        enable_feature(&mut value, Feature::aes, aes);
+        enable_feature(&mut value, Feature::aes, auxv.hwcap2 & HWCAP2_AES != 0);
         // SHA2 requires SHA1 & SHA2 features
         let sha1 = auxv.hwcap2 & HWCAP2_SHA1 != 0;
         let sha2 = auxv.hwcap2 & HWCAP2_SHA2 != 0;
         enable_feature(&mut value, Feature::sha2, sha1 && sha2);
-        enable_feature(&mut value, Feature::crypto, aes && pmull && sha1 && sha2);
         return value;
     }
     value
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
index 7383e487f1c..7601cf0a841 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
@@ -20,14 +20,6 @@ pub(crate) fn detect_features() -> cache::Initializer {
         enable_feature(&mut value, Feature::neon, bit::test(auxv.hwcap, 12));
         enable_feature(&mut value, Feature::pmull, bit::test(auxv.hwcap2, 1));
         enable_feature(&mut value, Feature::crc, bit::test(auxv.hwcap2, 4));
-        enable_feature(
-            &mut value,
-            Feature::crypto,
-            bit::test(auxv.hwcap2, 0)
-                && bit::test(auxv.hwcap2, 1)
-                && bit::test(auxv.hwcap2, 2)
-                && bit::test(auxv.hwcap2, 3),
-        );
         enable_feature(&mut value, Feature::aes, bit::test(auxv.hwcap2, 0));
         // SHA2 requires SHA1 & SHA2 features
         enable_feature(
@@ -47,14 +39,6 @@ pub(crate) fn detect_features() -> cache::Initializer {
         );
         enable_feature(&mut value, Feature::pmull, c.field("Features").has("pmull"));
         enable_feature(&mut value, Feature::crc, c.field("Features").has("crc32"));
-        enable_feature(
-            &mut value,
-            Feature::crypto,
-            c.field("Features").has("aes")
-                && c.field("Features").has("pmull")
-                && c.field("Features").has("sha1")
-                && c.field("Features").has("sha2"),
-        );
         enable_feature(&mut value, Feature::aes, c.field("Features").has("aes"));
         enable_feature(
             &mut value,
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index eb3a3e40940..38bdb5bbd1b 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -28,7 +28,6 @@ fn arm_linux_or_freebsd() {
     println!("neon: {}", is_arm_feature_detected!("neon"));
     println!("pmull: {}", is_arm_feature_detected!("pmull"));
     println!("crc: {}", is_arm_feature_detected!("crc"));
-    println!("crypto: {}", is_arm_feature_detected!("crypto"));
     println!("aes: {}", is_arm_feature_detected!("aes"));
     println!("sha2: {}", is_arm_feature_detected!("sha2"));
 }
-- 
cgit 1.4.1-3-g733a5


From 5d4192a9d6353bc469aa4e860e213f2048175680 Mon Sep 17 00:00:00 2001
From: Urgau <urgau@numericable.fr>
Date: Tue, 18 Apr 2023 10:52:00 +0200
Subject: Remove useless drop (clippy drop_ref and drop_copy lint)

---
 library/stdarch/crates/core_arch/src/wasm32/simd128.rs          | 4 ++--
 library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
index 5220fa74fb4..e974d9e56b2 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@@ -4719,7 +4719,7 @@ pub mod tests {
                         let v2_v128: v128 = mem::transmute(v2);
                         let v3_v128 = super::$f(v1_v128, v2_v128);
                         let mut v3 = [$($vec1)*];
-                        drop(v3);
+                        let _ignore = v3;
                         v3 = mem::transmute(v3_v128);
 
                         for (i, actual) in v3.iter().enumerate() {
@@ -4746,7 +4746,7 @@ pub mod tests {
                         let v1_v128: v128 = mem::transmute(v1);
                         let v2_v128 = super::$f(v1_v128);
                         let mut v2 = [$($vec1)*];
-                        drop(v2);
+                        let _ignore = v2;
                         v2 = mem::transmute(v2_v128);
 
                         for (i, actual) in v2.iter().enumerate() {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 966dbf3c98b..1c0a6fcf07d 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -109,7 +109,9 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
                     }
                 }
             }
-            drop(hwcap);
+
+            // Intentionnaly not used
+            let _ = hwcap;
         }
     }
 
@@ -251,7 +253,8 @@ fn auxv_from_buf(buf: &[usize]) -> Result<AuxVec, ()> {
             return Ok(AuxVec { hwcap, hwcap2 });
         }
     }
-    drop(buf);
+    // Suppress unused variable
+    let _ = buf;
     Err(())
 }
 
-- 
cgit 1.4.1-3-g733a5


From 38c1a1619086ba079d24d2e48a31a81452ae18cf Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Mon, 24 Apr 2023 22:13:58 +0900
Subject: std_detect: Do not use libc::getauxval on 32-bit Android

libc crate currently doesn't provide it.
---
 library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 1c0a6fcf07d..8bc0b30c3c6 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -71,7 +71,8 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
     #[cfg(all(
         feature = "std_detect_dlsym_getauxval",
         not(all(target_os = "linux", target_env = "gnu")),
-        not(target_os = "android"),
+        // TODO: libc crate currently doesn't provide getauxval on 32-bit Android.
+        not(all(target_os = "android", target_pointer_width = "64")),
     ))]
     {
         // Try to call a dynamically-linked getauxval function.
@@ -118,7 +119,8 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
     #[cfg(any(
         not(feature = "std_detect_dlsym_getauxval"),
         all(target_os = "linux", target_env = "gnu"),
-        target_os = "android",
+        // TODO: libc crate currently doesn't provide getauxval on 32-bit Android.
+        all(target_os = "android", target_pointer_width = "64"),
     ))]
     {
         // Targets with only AT_HWCAP:
-- 
cgit 1.4.1-3-g733a5


From a9fecd8456bdfe1f1234dc26534fe0059cd24862 Mon Sep 17 00:00:00 2001
From: Jacob Bramley <jacob.bramley@arm.com>
Date: Wed, 31 May 2023 15:08:51 +0100
Subject: Support AArch32 Neon dotprod intrinsics.

Note that the feature detection requires a recent Linux kernel (v6.2).
---
 .../crates/core_arch/src/aarch64/neon/generated.rs | 236 +++------------------
 .../core_arch/src/arm_shared/neon/generated.rs     | 216 +++++++++++++++++++
 .../stdarch/crates/intrinsic-test/missing_arm.txt  |   8 -
 .../crates/std_detect/src/detect/arch/arm.rs       |   4 +-
 .../crates/std_detect/src/detect/os/linux/arm.rs   |   8 +
 .../crates/std_detect/tests/cpu-detection.rs       |  19 +-
 library/stdarch/crates/stdarch-gen/neon.spec       |  50 ++++-
 7 files changed, 305 insertions(+), 236 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 25c119cbe34..72fdceb77b1 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -10557,80 +10557,7 @@ pub unsafe fn vcmlaq_rot270_laneq_f32<const LANE: i32>(a: float32x4_t, b: float3
     vcmlaq_rot270_f32(a, b, c)
 }
 
-/// Dot product arithmetic
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(sdot))]
-pub unsafe fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sdot.v2i32.v8i8")]
-        fn vdot_s32_(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
-    }
-    vdot_s32_(a, b, c)
-}
-
-/// Dot product arithmetic
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_s32)
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(sdot))]
-pub unsafe fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sdot.v4i32.v16i8")]
-        fn vdotq_s32_(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
-    }
-    vdotq_s32_(a, b, c)
-}
-
-/// Dot product arithmetic
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_u32)
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(udot))]
-pub unsafe fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.udot.v2i32.v8i8")]
-        fn vdot_u32_(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t;
-    }
-    vdot_u32_(a, b, c)
-}
-
-/// Dot product arithmetic
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_u32)
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(udot))]
-pub unsafe fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.udot.v4i32.v16i8")]
-        fn vdotq_u32_(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
-    }
-    vdotq_u32_(a, b, c)
-}
-
-/// Dot product arithmetic
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int8x8_t = simd_shuffle!(c, c, [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
-    vdot_s32(a, b, c)
-}
-
-/// Dot product arithmetic
+/// Dot product arithmetic (indexed)
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)
 #[inline]
@@ -10639,24 +10566,12 @@ pub unsafe fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn vdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    let c: int8x8_t = simd_shuffle!(c, c, [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
-    vdot_s32(a, b, c)
-}
-
-/// Dot product arithmetic
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int8x16_t = simd_shuffle!(c, c, [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
-    vdotq_s32(a, b, c)
+    let c: int32x4_t = transmute(c);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vdot_s32(a, b, transmute(c))
 }
 
-/// Dot product arithmetic
+/// Dot product arithmetic (indexed)
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_s32)
 #[inline]
@@ -10665,24 +10580,12 @@ pub unsafe fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn vdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    let c: int8x16_t = simd_shuffle!(c, c, [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
-    vdotq_s32(a, b, c)
-}
-
-/// Dot product arithmetic
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(udot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: uint8x8_t = simd_shuffle!(c, c, [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
-    vdot_u32(a, b, c)
+    let c: int32x4_t = transmute(c);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vdotq_s32(a, b, transmute(c))
 }
 
-/// Dot product arithmetic
+/// Dot product arithmetic (indexed)
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_u32)
 #[inline]
@@ -10691,24 +10594,12 @@ pub unsafe fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uin
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn vdot_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    let c: uint8x8_t = simd_shuffle!(c, c, [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
-    vdot_u32(a, b, c)
-}
-
-/// Dot product arithmetic
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(udot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: uint8x16_t = simd_shuffle!(c, c, [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
-    vdotq_u32(a, b, c)
+    let c: uint32x4_t = transmute(c);
+    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vdot_u32(a, b, transmute(c))
 }
 
-/// Dot product arithmetic
+/// Dot product arithmetic (indexed)
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_u32)
 #[inline]
@@ -10717,8 +10608,9 @@ pub unsafe fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: u
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn vdotq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    let c: uint8x16_t = simd_shuffle!(c, c, [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
-    vdotq_u32(a, b, c)
+    let c: uint32x4_t = transmute(c);
+    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vdotq_u32(a, b, transmute(c))
 }
 
 /// Maximum (vector)
@@ -23759,122 +23651,42 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdot_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32x2 = i32x2::new(31, 176);
-        let r: i32x2 = transmute(vdot_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdotq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 1, 2);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let c: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32x4 = i32x4::new(31, 176, 31, 176);
-        let r: i32x4 = transmute(vdotq_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdot_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32x2 = u32x2::new(31, 176);
-        let r: u32x2 = transmute(vdot_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdotq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 1, 2);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let c: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32x4 = u32x4::new(31, 176, 31, 176);
-        let r: u32x4 = transmute(vdotq_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdot_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32x2 = i32x2::new(31, 72);
-        let r: i32x2 = transmute(vdot_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon,dotprod")]
     unsafe fn test_vdot_laneq_s32() {
         let a: i32x2 = i32x2::new(1, 2);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x8 = i8x8::new(-1, 2, 3, 4, 5, 6, 7, 8);
         let c: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32x2 = i32x2::new(31, 72);
+        let e: i32x2 = i32x2::new(29, 72);
         let r: i32x2 = transmute(vdot_laneq_s32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdotq_lane_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 1, 2);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32x4 = i32x4::new(31, 72, 31, 72);
-        let r: i32x4 = transmute(vdotq_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon,dotprod")]
     unsafe fn test_vdotq_laneq_s32() {
         let a: i32x4 = i32x4::new(1, 2, 1, 2);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x16 = i8x16::new(-1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
         let c: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32x4 = i32x4::new(31, 72, 31, 72);
+        let e: i32x4 = i32x4::new(29, 72, 31, 72);
         let r: i32x4 = transmute(vdotq_laneq_s32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdot_lane_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32x2 = u32x2::new(31, 72);
-        let r: u32x2 = transmute(vdot_lane_u32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon,dotprod")]
     unsafe fn test_vdot_laneq_u32() {
         let a: u32x2 = u32x2::new(1, 2);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u8x8 = u8x8::new(255, 2, 3, 4, 5, 6, 7, 8);
         let c: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32x2 = u32x2::new(31, 72);
+        let e: u32x2 = u32x2::new(285, 72);
         let r: u32x2 = transmute(vdot_laneq_u32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdotq_lane_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 1, 2);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32x4 = u32x4::new(31, 72, 31, 72);
-        let r: u32x4 = transmute(vdotq_lane_u32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon,dotprod")]
     unsafe fn test_vdotq_laneq_u32() {
         let a: u32x4 = u32x4::new(1, 2, 1, 2);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u8x16 = u8x16::new(255, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
         let c: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32x4 = u32x4::new(31, 72, 31, 72);
+        let e: u32x4 = u32x4::new(285, 72, 31, 72);
         let r: u32x4 = transmute(vdotq_laneq_u32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 6382607f990..2f3be778aee 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -18837,6 +18837,142 @@ pub unsafe fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
     simd_sub(c, d)
 }
 
+/// Dot product arithmetic (vector)
+///
+/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sdot))]
+pub unsafe fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v2i32.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sdot.v2i32.v8i8")]
+        fn vdot_s32_(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
+    }
+vdot_s32_(a, b, c)
+}
+
+/// Dot product arithmetic (vector)
+///
+/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_s32)
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sdot))]
+pub unsafe fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v4i32.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sdot.v4i32.v16i8")]
+        fn vdotq_s32_(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
+    }
+vdotq_s32_(a, b, c)
+}
+
+/// Dot product arithmetic (vector)
+///
+/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_u32)
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(udot))]
+pub unsafe fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v2i32.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.udot.v2i32.v8i8")]
+        fn vdot_u32_(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t;
+    }
+vdot_u32_(a, b, c)
+}
+
+/// Dot product arithmetic (vector)
+///
+/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_u32)
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(udot))]
+pub unsafe fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v4i32.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.udot.v4i32.v16i8")]
+        fn vdotq_u32_(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
+    }
+vdotq_u32_(a, b, c)
+}
+
+/// Dot product arithmetic (indexed)
+///
+/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sdot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = transmute(c);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vdot_s32(a, b, transmute(c))
+}
+
+/// Dot product arithmetic (indexed)
+///
+/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sdot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = transmute(c);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vdotq_s32(a, b, transmute(c))
+}
+
+/// Dot product arithmetic (indexed)
+///
+/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(udot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: uint32x2_t = transmute(c);
+    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vdot_u32(a, b, transmute(c))
+}
+
+/// Dot product arithmetic (indexed)
+///
+/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(udot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: uint32x2_t = transmute(c);
+    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vdotq_u32(a, b, transmute(c))
+}
+
 /// Maximum (vector)
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)
@@ -39239,6 +39375,86 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon,dotprod")]
+    unsafe fn test_vdot_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x2 = i32x2::new(31, 176);
+        let r: i32x2 = transmute(vdot_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,dotprod")]
+    unsafe fn test_vdotq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 1, 2);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x4 = i32x4::new(31, 176, 31, 176);
+        let r: i32x4 = transmute(vdotq_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,dotprod")]
+    unsafe fn test_vdot_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x2 = u32x2::new(31, 176);
+        let r: u32x2 = transmute(vdot_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,dotprod")]
+    unsafe fn test_vdotq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 1, 2);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x4 = u32x4::new(31, 176, 31, 176);
+        let r: u32x4 = transmute(vdotq_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,dotprod")]
+    unsafe fn test_vdot_lane_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i8x8 = i8x8::new(-1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x2 = i32x2::new(29, 72);
+        let r: i32x2 = transmute(vdot_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,dotprod")]
+    unsafe fn test_vdotq_lane_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 1, 2);
+        let b: i8x16 = i8x16::new(-1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x4 = i32x4::new(29, 72, 31, 72);
+        let r: i32x4 = transmute(vdotq_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,dotprod")]
+    unsafe fn test_vdot_lane_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u8x8 = u8x8::new(255, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x2 = u32x2::new(285, 72);
+        let r: u32x2 = transmute(vdot_lane_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,dotprod")]
+    unsafe fn test_vdotq_lane_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 1, 2);
+        let b: u8x16 = u8x16::new(255, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x4 = u32x4::new(285, 72, 31, 72);
+        let r: u32x4 = transmute(vdotq_lane_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vmax_s8() {
         let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
diff --git a/library/stdarch/crates/intrinsic-test/missing_arm.txt b/library/stdarch/crates/intrinsic-test/missing_arm.txt
index 07524b67790..7439cd6e664 100644
--- a/library/stdarch/crates/intrinsic-test/missing_arm.txt
+++ b/library/stdarch/crates/intrinsic-test/missing_arm.txt
@@ -160,14 +160,6 @@ vcvtpq_s32_f32
 vcvtpq_u32_f32
 vcvtp_s32_f32
 vcvtp_u32_f32
-vdot_lane_s32
-vdot_lane_u32
-vdotq_lane_s32
-vdotq_lane_u32
-vdotq_s32
-vdotq_u32
-vdot_s32
-vdot_u32
 vqdmulh_lane_s16
 vqdmulh_lane_s32
 vqdmulhq_lane_s16
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index a7dea27fb3f..fd332e0b2ca 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -22,5 +22,7 @@ features! {
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sha2: "sha2";
     /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions)
     @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] i8mm: "i8mm";
-    /// FEAT_I8MM
+    /// FEAT_I8MM (integer matrix multiplication, plus ASIMD support)
+    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dotprod: "dotprod";
+    /// FEAT_DotProd (Vector Dot-Product - ASIMDDP)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
index 7601cf0a841..4dc9590e18b 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
@@ -17,6 +17,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
     //
     // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm/include/uapi/asm/hwcap.h
     if let Ok(auxv) = auxvec::auxv() {
+        enable_feature(&mut value, Feature::i8mm, bit::test(auxv.hwcap, 27));
+        enable_feature(&mut value, Feature::dotprod, bit::test(auxv.hwcap, 24));
         enable_feature(&mut value, Feature::neon, bit::test(auxv.hwcap, 12));
         enable_feature(&mut value, Feature::pmull, bit::test(auxv.hwcap2, 1));
         enable_feature(&mut value, Feature::crc, bit::test(auxv.hwcap2, 4));
@@ -37,6 +39,12 @@ pub(crate) fn detect_features() -> cache::Initializer {
             Feature::neon,
             c.field("Features").has("neon") && !has_broken_neon(&c),
         );
+        enable_feature(&mut value, Feature::i8mm, c.field("Features").has("i8mm"));
+        enable_feature(
+            &mut value,
+            Feature::dotprod,
+            c.field("Features").has("asimddp"),
+        );
         enable_feature(&mut value, Feature::pmull, c.field("Features").has("pmull"));
         enable_feature(&mut value, Feature::crc, c.field("Features").has("crc32"));
         enable_feature(&mut value, Feature::aes, c.field("Features").has("aes"));
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 38bdb5bbd1b..f93212d24f6 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -20,16 +20,25 @@ fn all() {
 }
 
 #[test]
-#[cfg(all(
-    target_arch = "arm",
-    any(target_os = "linux", target_os = "android", target_os = "freebsd"),
-))]
-fn arm_linux_or_freebsd() {
+#[cfg(all(target_arch = "arm", any(target_os = "freebsd"),))]
+fn arm_freebsd() {
+    println!("neon: {}", is_arm_feature_detected!("neon"));
+    println!("pmull: {}", is_arm_feature_detected!("pmull"));
+    println!("crc: {}", is_arm_feature_detected!("crc"));
+    println!("aes: {}", is_arm_feature_detected!("aes"));
+    println!("sha2: {}", is_arm_feature_detected!("sha2"));
+}
+
+#[test]
+#[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android"),))]
+fn arm_linux() {
     println!("neon: {}", is_arm_feature_detected!("neon"));
     println!("pmull: {}", is_arm_feature_detected!("pmull"));
     println!("crc: {}", is_arm_feature_detected!("crc"));
     println!("aes: {}", is_arm_feature_detected!("aes"));
     println!("sha2: {}", is_arm_feature_detected!("sha2"));
+    println!("dotprod: {}", is_arm_feature_detected!("dotprod"));
+    println!("i8mm: {}", is_arm_feature_detected!("i8mm"));
 }
 
 #[test]
diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec
index 5aaa7305b58..b3ca359e6f9 100644
--- a/library/stdarch/crates/stdarch-gen/neon.spec
+++ b/library/stdarch/crates/stdarch-gen/neon.spec
@@ -4723,7 +4723,7 @@ aarch64 = fcmla
 generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
 generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
 
-/// Dot product arithmetic
+/// Dot product arithmetic (vector)
 name = vdot
 out-suffix
 a = 1, 2, 1, 2
@@ -4732,35 +4732,65 @@ c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
 validate 31, 176, 31, 176
 target = dotprod
 
+arm = vsdot
 aarch64 = sdot
+link-arm = sdot._EXT_._EXT3_
 link-aarch64 = sdot._EXT_._EXT3_
 generate int32x2_t:int8x8_t:int8x8_t:int32x2_t, int32x4_t:int8x16_t:int8x16_t:int32x4_t
 
+arm = vudot
 aarch64 = udot
+link-arm = udot._EXT_._EXT3_
 link-aarch64 = udot._EXT_._EXT3_
 generate uint32x2_t:uint8x8_t:uint8x8_t:uint32x2_t, uint32x4_t:uint8x16_t:uint8x16_t:uint32x4_t
 
-/// Dot product arithmetic
+/// Dot product arithmetic (indexed)
 name = vdot
 out-lane-suffixes
 constn = LANE
 multi_fn = static_assert_imm-in2_dot-LANE
-multi_fn = simd_shuffle!, c:in_t, c, c, {base-4-LANE}
-multi_fn = vdot-out-noext, a, b, c
+multi_fn = transmute, c:merge4_t2, c
+multi_fn = simd_shuffle!, c:out_t, c, c, {dup-out_len-LANE as u32}
+multi_fn = vdot-out-noext, a, b, {transmute, c}
 a = 1, 2, 1, 2
-b = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
+b = -1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
 c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
 n = 0
-validate 31, 72, 31, 72
+validate 29, 72, 31, 72
 target = dotprod
 
+// Only AArch64 has the laneq forms.
 aarch64 = sdot
-generate int32x2_t:int8x8_t:int8x8_t:int32x2_t, int32x2_t:int8x8_t:int8x16_t:int32x2_t
-generate int32x4_t:int8x16_t:int8x8_t:int32x4_t, int32x4_t:int8x16_t:int8x16_t:int32x4_t
+generate int32x2_t:int8x8_t:int8x16_t:int32x2_t
+generate int32x4_t:int8x16_t:int8x16_t:int32x4_t
+
+arm = vsdot
+generate int32x2_t:int8x8_t:int8x8_t:int32x2_t
+generate int32x4_t:int8x16_t:int8x8_t:int32x4_t
+
+/// Dot product arithmetic (indexed)
+name = vdot
+out-lane-suffixes
+constn = LANE
+multi_fn = static_assert_imm-in2_dot-LANE
+multi_fn = transmute, c:merge4_t2, c
+multi_fn = simd_shuffle!, c:out_t, c, c, {dup-out_len-LANE as u32}
+multi_fn = vdot-out-noext, a, b, {transmute, c}
+a = 1, 2, 1, 2
+b = 255, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
+c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
+n = 0
+validate 285, 72, 31, 72
+target = dotprod
 
+// Only AArch64 has the laneq forms.
 aarch64 = udot
-generate uint32x2_t:uint8x8_t:uint8x8_t:uint32x2_t, uint32x2_t:uint8x8_t:uint8x16_t:uint32x2_t
-generate uint32x4_t:uint8x16_t:uint8x8_t:uint32x4_t, uint32x4_t:uint8x16_t:uint8x16_t:uint32x4_t
+generate uint32x2_t:uint8x8_t:uint8x16_t:uint32x2_t
+generate uint32x4_t:uint8x16_t:uint8x16_t:uint32x4_t
+
+arm = vudot
+generate uint32x2_t:uint8x8_t:uint8x8_t:uint32x2_t
+generate uint32x4_t:uint8x16_t:uint8x8_t:uint32x4_t
 
 /// Maximum (vector)
 name = vmax
-- 
cgit 1.4.1-3-g733a5


From 5161de5da42ea0832ea6002d691dba4883ac19ba Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Sat, 12 Aug 2023 00:26:16 +0200
Subject: Add `#![allow(internal_unstable)]`

This is required to avoid build failures when using rustc features
intended only for use by the standard library.
---
 library/stdarch/crates/core_arch/src/lib.rs  | 1 +
 library/stdarch/crates/std_detect/src/lib.rs | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index fae7651da39..27dad8e2413 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -2,6 +2,7 @@
 #![allow(improper_ctypes_definitions)]
 #![allow(dead_code)]
 #![allow(unused_features)]
+#![allow(internal_features)]
 #![deny(rust_2018_idioms)]
 #![feature(
     custom_inner_attributes,
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index c0819218cb8..7fdfb872ec9 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -19,8 +19,6 @@
 #![deny(clippy::missing_inline_in_public_items)]
 #![cfg_attr(test, allow(unused_imports))]
 #![no_std]
-// FIXME(Nilstrieb): Remove this once the compiler in stdarch CI has the internal_features lint.
-#![allow(unknown_lints)]
 #![allow(internal_features)]
 
 #[cfg(test)]
-- 
cgit 1.4.1-3-g733a5


From 6f33e9c36b45320149f189c53ab31806569946b8 Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Tue, 10 Oct 2023 12:43:38 +0000
Subject: Remove use of auv crate from tests

---
 .../std_detect/src/detect/os/linux/auxvec.rs       | 59 ++++------------------
 1 file changed, 11 insertions(+), 48 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 8bc0b30c3c6..ee46aa1ac73 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -19,6 +19,7 @@ pub(crate) const AT_HWCAP2: usize = 26;
 /// If an entry cannot be read all the bits in the bitfield are set to zero.
 /// This should be interpreted as all the features being disabled.
 #[derive(Debug, Copy, Clone)]
+#[cfg_attr(test, derive(PartialEq))]
 pub(crate) struct AuxVec {
     pub hwcap: usize,
     #[cfg(any(
@@ -174,9 +175,12 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
 /// Tries to read the `key` from the auxiliary vector by calling the
 /// dynamically-linked `getauxval` function. If the function is not linked,
 /// this function return `Err`.
-#[cfg(all(
-    feature = "std_detect_dlsym_getauxval",
-    not(all(target_os = "linux", target_env = "gnu"))
+#[cfg(any(
+    test,
+    all(
+        feature = "std_detect_dlsym_getauxval",
+        not(all(target_os = "linux", target_env = "gnu"))
+    )
 ))]
 fn getauxval(key: usize) -> Result<usize, ()> {
     use libc;
@@ -262,35 +266,8 @@ fn auxv_from_buf(buf: &[usize]) -> Result<AuxVec, ()> {
 
 #[cfg(test)]
 mod tests {
-    extern crate auxv as auxv_crate;
     use super::*;
 
-    // Reads the Auxiliary Vector key from /proc/self/auxv
-    // using the auxv crate.
-    #[cfg(feature = "std_detect_file_io")]
-    fn auxv_crate_getprocfs(key: usize) -> Option<usize> {
-        use self::auxv_crate::procfs::search_procfs_auxv;
-        use self::auxv_crate::AuxvType;
-        let k = key as AuxvType;
-        match search_procfs_auxv(&[k]) {
-            Ok(v) => Some(v[&k] as usize),
-            Err(_) => None,
-        }
-    }
-
-    // Reads the Auxiliary Vector key from getauxval()
-    // using the auxv crate.
-    #[cfg(not(any(target_arch = "mips", target_arch = "mips64")))]
-    fn auxv_crate_getauxval(key: usize) -> Option<usize> {
-        use self::auxv_crate::getauxval::Getauxval;
-        use self::auxv_crate::AuxvType;
-        let q = auxv_crate::getauxval::NativeGetauxval {};
-        match q.getauxval(key as AuxvType) {
-            Ok(v) => Some(v as usize),
-            Err(_) => None,
-        }
-    }
-
     // FIXME: on mips/mips64 getauxval returns 0, and /proc/self/auxv
     // does not always contain the AT_HWCAP key under qemu.
     #[cfg(any(
@@ -301,7 +278,7 @@ mod tests {
     #[test]
     fn auxv_crate() {
         let v = auxv();
-        if let Some(hwcap) = auxv_crate_getauxval(AT_HWCAP) {
+        if let Ok(hwcap) = getauxval(AT_HWCAP) {
             let rt_hwcap = v.expect("failed to find hwcap key").hwcap;
             assert_eq!(rt_hwcap, hwcap);
         }
@@ -314,7 +291,7 @@ mod tests {
             target_arch = "powerpc64"
         ))]
         {
-            if let Some(hwcap2) = auxv_crate_getauxval(AT_HWCAP2) {
+            if let Ok(hwcap2) = getauxval(AT_HWCAP2) {
                 let rt_hwcap2 = v.expect("failed to find hwcap2 key").hwcap2;
                 assert_eq!(rt_hwcap2, hwcap2);
             }
@@ -391,22 +368,8 @@ mod tests {
     #[test]
     #[cfg(feature = "std_detect_file_io")]
     fn auxv_crate_procfs() {
-        let v = auxv();
-        if let Some(hwcap) = auxv_crate_getprocfs(AT_HWCAP) {
-            assert_eq!(v.unwrap().hwcap, hwcap);
-        }
-
-        // Targets with AT_HWCAP and AT_HWCAP2:
-        #[cfg(any(
-            target_arch = "aarch64",
-            target_arch = "arm",
-            target_arch = "powerpc",
-            target_arch = "powerpc64"
-        ))]
-        {
-            if let Some(hwcap2) = auxv_crate_getprocfs(AT_HWCAP2) {
-                assert_eq!(v.unwrap().hwcap2, hwcap2);
-            }
+        if let Ok(procfs_auxv) = auxv_from_file("/proc/self/auxv") {
+            assert_eq!(auxv().unwrap(), procfs_auxv);
         }
     }
 }
-- 
cgit 1.4.1-3-g733a5


From b2c5bc96962e7f1473afc7e04e876126a7205f5c Mon Sep 17 00:00:00 2001
From: Eduardo Sánchez Muñoz <eduardosm-dev@e64.io>
Date: Tue, 10 Oct 2023 20:22:26 +0200
Subject: Remove unneeded transmutes

(or replace them with safe versions)
---
 .../crates/core_arch/src/x86/avx512bitalg.rs       |  12 +-
 .../stdarch/crates/core_arch/src/x86/avx512bw.rs   | 104 +++----
 .../stdarch/crates/core_arch/src/x86/avx512f.rs    | 333 ++++++++-------------
 library/stdarch/crates/core_arch/src/x86/sse.rs    |  70 ++---
 library/stdarch/crates/core_arch/src/x86/sse2.rs   |  24 +-
 library/stdarch/crates/core_arch/src/x86/sse41.rs  |   6 +-
 .../stdarch/crates/core_arch/src/x86_64/avx512f.rs |  60 ++--
 .../stdarch/crates/std_detect/src/detect/os/x86.rs |   6 +-
 8 files changed, 237 insertions(+), 378 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
index 92e572eb159..ce4e402a8c5 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
@@ -311,7 +311,7 @@ pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __
 #[target_feature(enable = "avx512bitalg")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
-    transmute(bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0))
+    bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0)
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -326,7 +326,7 @@ pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64
 #[target_feature(enable = "avx512bitalg")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
-    transmute(bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k))
+    bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k)
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -338,7 +338,7 @@ pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
-    transmute(bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0))
+    bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0)
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -353,7 +353,7 @@ pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
-    transmute(bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k))
+    bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k)
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -365,7 +365,7 @@ pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
-    transmute(bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0))
+    bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0)
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -380,7 +380,7 @@ pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub unsafe fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
-    transmute(bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k))
+    bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k)
 }
 
 #[cfg(test)]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
index 3640235396a..0b4a56d3650 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -3703,8 +3703,7 @@ pub unsafe fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) ->
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_u16x32();
     let b = b.as_u16x32();
-    let r = vpcmpuw(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
-    transmute(r)
+    vpcmpuw(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
 }
 
 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -3722,8 +3721,7 @@ pub unsafe fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_u16x32();
     let b = b.as_u16x32();
-    let r = vpcmpuw(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpuw(a, b, IMM8, k1)
 }
 
 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -3737,8 +3735,7 @@ pub unsafe fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) ->
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_u16x16();
     let b = b.as_u16x16();
-    let r = vpcmpuw256(a, b, IMM8, 0b11111111_11111111);
-    transmute(r)
+    vpcmpuw256(a, b, IMM8, 0b11111111_11111111)
 }
 
 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -3756,8 +3753,7 @@ pub unsafe fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_u16x16();
     let b = b.as_u16x16();
-    let r = vpcmpuw256(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpuw256(a, b, IMM8, k1)
 }
 
 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -3771,8 +3767,7 @@ pub unsafe fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __m
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_u16x8();
     let b = b.as_u16x8();
-    let r = vpcmpuw128(a, b, IMM8, 0b11111111);
-    transmute(r)
+    vpcmpuw128(a, b, IMM8, 0b11111111)
 }
 
 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -3790,8 +3785,7 @@ pub unsafe fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_u16x8();
     let b = b.as_u16x8();
-    let r = vpcmpuw128(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpuw128(a, b, IMM8, k1)
 }
 
 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -3805,13 +3799,12 @@ pub unsafe fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> _
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_u8x64();
     let b = b.as_u8x64();
-    let r = vpcmpub(
+    vpcmpub(
         a,
         b,
         IMM8,
         0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
-    );
-    transmute(r)
+    )
 }
 
 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -3829,8 +3822,7 @@ pub unsafe fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_u8x64();
     let b = b.as_u8x64();
-    let r = vpcmpub(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpub(a, b, IMM8, k1)
 }
 
 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -3844,8 +3836,7 @@ pub unsafe fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> _
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_u8x32();
     let b = b.as_u8x32();
-    let r = vpcmpub256(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
-    transmute(r)
+    vpcmpub256(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
 }
 
 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -3863,8 +3854,7 @@ pub unsafe fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_u8x32();
     let b = b.as_u8x32();
-    let r = vpcmpub256(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpub256(a, b, IMM8, k1)
 }
 
 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -3878,8 +3868,7 @@ pub unsafe fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mm
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_u8x16();
     let b = b.as_u8x16();
-    let r = vpcmpub128(a, b, IMM8, 0b11111111_11111111);
-    transmute(r)
+    vpcmpub128(a, b, IMM8, 0b11111111_11111111)
 }
 
 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -3897,8 +3886,7 @@ pub unsafe fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_u8x16();
     let b = b.as_u8x16();
-    let r = vpcmpub128(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpub128(a, b, IMM8, k1)
 }
 
 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -3912,8 +3900,7 @@ pub unsafe fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) ->
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_i16x32();
     let b = b.as_i16x32();
-    let r = vpcmpw(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
-    transmute(r)
+    vpcmpw(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
 }
 
 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -3931,8 +3918,7 @@ pub unsafe fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_i16x32();
     let b = b.as_i16x32();
-    let r = vpcmpw(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpw(a, b, IMM8, k1)
 }
 
 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -3946,8 +3932,7 @@ pub unsafe fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) ->
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_i16x16();
     let b = b.as_i16x16();
-    let r = vpcmpw256(a, b, IMM8, 0b11111111_11111111);
-    transmute(r)
+    vpcmpw256(a, b, IMM8, 0b11111111_11111111)
 }
 
 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -3965,8 +3950,7 @@ pub unsafe fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_i16x16();
     let b = b.as_i16x16();
-    let r = vpcmpw256(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpw256(a, b, IMM8, k1)
 }
 
 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -3980,8 +3964,7 @@ pub unsafe fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __m
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_i16x8();
     let b = b.as_i16x8();
-    let r = vpcmpw128(a, b, IMM8, 0b11111111);
-    transmute(r)
+    vpcmpw128(a, b, IMM8, 0b11111111)
 }
 
 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -3999,8 +3982,7 @@ pub unsafe fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_i16x8();
     let b = b.as_i16x8();
-    let r = vpcmpw128(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpw128(a, b, IMM8, k1)
 }
 
 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4014,13 +3996,12 @@ pub unsafe fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> _
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_i8x64();
     let b = b.as_i8x64();
-    let r = vpcmpb(
+    vpcmpb(
         a,
         b,
         IMM8,
         0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
-    );
-    transmute(r)
+    )
 }
 
 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4038,8 +4019,7 @@ pub unsafe fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_i8x64();
     let b = b.as_i8x64();
-    let r = vpcmpb(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpb(a, b, IMM8, k1)
 }
 
 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4053,8 +4033,7 @@ pub unsafe fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> _
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_i8x32();
     let b = b.as_i8x32();
-    let r = vpcmpb256(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
-    transmute(r)
+    vpcmpb256(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
 }
 
 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4072,8 +4051,7 @@ pub unsafe fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_i8x32();
     let b = b.as_i8x32();
-    let r = vpcmpb256(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpb256(a, b, IMM8, k1)
 }
 
 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -4087,8 +4065,7 @@ pub unsafe fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mm
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_i8x16();
     let b = b.as_i8x16();
-    let r = vpcmpb128(a, b, IMM8, 0b11111111_11111111);
-    transmute(r)
+    vpcmpb128(a, b, IMM8, 0b11111111_11111111)
 }
 
 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -4106,8 +4083,7 @@ pub unsafe fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(
     static_assert_uimm_bits!(IMM8, 3);
     let a = a.as_i8x16();
     let b = b.as_i8x16();
-    let r = vpcmpb128(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpb128(a, b, IMM8, k1)
 }
 
 /// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
@@ -8566,7 +8542,7 @@ pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
-    transmute(a + b)
+    a + b
 }
 
 /// Add 64-bit masks in a and b, and store the result in k.
@@ -8575,7 +8551,7 @@ pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
-    transmute(a + b)
+    a + b
 }
 
 /// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
@@ -8584,7 +8560,7 @@ pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
-    transmute(a & b)
+    a & b
 }
 
 /// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
@@ -8593,7 +8569,7 @@ pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
-    transmute(a & b)
+    a & b
 }
 
 /// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
@@ -8602,7 +8578,7 @@ pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 {
-    transmute(a ^ 0b11111111_11111111_11111111_11111111)
+    a ^ 0b11111111_11111111_11111111_11111111
 }
 
 /// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
@@ -8611,7 +8587,7 @@ pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 {
-    transmute(a ^ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111)
+    a ^ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
 }
 
 /// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
@@ -8620,7 +8596,7 @@ pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
-    transmute(_knot_mask32(a) & b)
+    _knot_mask32(a) & b
 }
 
 /// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
@@ -8629,7 +8605,7 @@ pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
-    transmute(_knot_mask64(a) & b)
+    _knot_mask64(a) & b
 }
 
 /// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
@@ -8638,7 +8614,7 @@ pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
-    transmute(a | b)
+    a | b
 }
 
 /// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
@@ -8647,7 +8623,7 @@ pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
-    transmute(a | b)
+    a | b
 }
 
 /// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
@@ -8656,7 +8632,7 @@ pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
-    transmute(a ^ b)
+    a ^ b
 }
 
 /// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
@@ -8665,7 +8641,7 @@ pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
-    transmute(a ^ b)
+    a ^ b
 }
 
 /// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
@@ -8674,7 +8650,7 @@ pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
-    transmute(_knot_mask32(a ^ b))
+    _knot_mask32(a ^ b)
 }
 
 /// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
@@ -8683,7 +8659,7 @@ pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
-    transmute(_knot_mask64(a ^ b))
+    _knot_mask64(a ^ b)
 }
 
 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index 5412237ca10..2801352924a 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -17144,7 +17144,7 @@ pub unsafe fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
     if IMM8 >= 32 {
         _mm512_setzero_si512()
     } else {
-        transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8 as u32)))
+        transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8)))
     }
 }
 
@@ -20132,7 +20132,7 @@ pub unsafe fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m5
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vperm))] //should be vpermd
 pub unsafe fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
-    transmute(_mm256_permutevar8x32_epi32(a, idx)) // llvm use llvm.x86.avx2.permd
+    _mm256_permutevar8x32_epi32(a, idx) // llvm use llvm.x86.avx2.permd
 }
 
 /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -20284,7 +20284,7 @@ pub unsafe fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vpermps))]
 pub unsafe fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
-    transmute(_mm256_permutevar8x32_ps(a, idx)) //llvm.x86.avx2.permps
+    _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
 }
 
 /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -23943,7 +23943,7 @@ pub unsafe fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(vmovd))]
 pub unsafe fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
     let extract: i32 = simd_extract(a.as_i32x16(), 0);
-    transmute(extract)
+    extract
 }
 
 /// Broadcast the low packed 32-bit integer from a to all elements of dst.
@@ -25744,7 +25744,7 @@ pub unsafe fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
 pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
-    transmute(a & b)
+    a & b
 }
 
 /// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
@@ -25754,7 +25754,7 @@ pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
 pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
-    transmute(a & b)
+    a & b
 }
 
 /// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
@@ -25764,7 +25764,7 @@ pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
 pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
-    transmute(a | b)
+    a | b
 }
 
 /// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
@@ -25774,7 +25774,7 @@ pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
 pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
-    transmute(a | b)
+    a | b
 }
 
 /// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
@@ -25784,7 +25784,7 @@ pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
 pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
-    transmute(a ^ b)
+    a ^ b
 }
 
 /// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
@@ -25794,7 +25794,7 @@ pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
 pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
-    transmute(a ^ b)
+    a ^ b
 }
 
 /// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
@@ -25803,7 +25803,7 @@ pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 pub unsafe fn _knot_mask16(a: __mmask16) -> __mmask16 {
-    transmute(a ^ 0b11111111_11111111)
+    a ^ 0b11111111_11111111
 }
 
 /// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
@@ -25812,7 +25812,7 @@ pub unsafe fn _knot_mask16(a: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 pub unsafe fn _mm512_knot(a: __mmask16) -> __mmask16 {
-    transmute(a ^ 0b11111111_11111111)
+    a ^ 0b11111111_11111111
 }
 
 /// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
@@ -25862,8 +25862,7 @@ pub unsafe fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
 pub unsafe fn _mm512_kmov(a: __mmask16) -> __mmask16 {
-    let r: u16 = a;
-    transmute(r)
+    a
 }
 
 /// Converts integer mask into bitmask, storing the result in dst.
@@ -25872,8 +25871,7 @@ pub unsafe fn _mm512_kmov(a: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
 pub unsafe fn _mm512_int2mask(mask: i32) -> __mmask16 {
-    let r: u16 = mask as u16;
-    transmute(r)
+    mask as u16
 }
 
 /// Converts bit mask k1 into an integer value, storing the results in dst.
@@ -25883,8 +25881,7 @@ pub unsafe fn _mm512_int2mask(mask: i32) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
 pub unsafe fn _mm512_mask2int(k1: __mmask16) -> i32 {
-    let r: i32 = k1 as i32;
-    transmute(r)
+    k1 as i32
 }
 
 /// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
@@ -25896,7 +25893,7 @@ pub unsafe fn _mm512_mask2int(k1: __mmask16) -> i32 {
 pub unsafe fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
     let a = a & 0b00000000_11111111;
     let b = b & 0b11111111_00000000;
-    transmute(a | b)
+    a | b
 }
 
 /// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
@@ -32352,8 +32349,7 @@ pub unsafe fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -
     if (k & 0b00000001) != 0 {
         mov = simd_extract(b, 0);
     }
-    let r = simd_insert(a, 0, mov);
-    transmute(r)
+    simd_insert(a, 0, mov)
 }
 
 /// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -32367,8 +32363,7 @@ pub unsafe fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     if (k & 0b00000001) != 0 {
         mov = simd_extract(b, 0);
     }
-    let r = simd_insert(a, 0, mov);
-    transmute(r)
+    simd_insert(a, 0, mov)
 }
 
 /// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -32383,8 +32378,7 @@ pub unsafe fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d
     if (k & 0b00000001) != 0 {
         mov = simd_extract(b, 0);
     }
-    let r = simd_insert(a, 0, mov);
-    transmute(r)
+    simd_insert(a, 0, mov)
 }
 
 /// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -32398,8 +32392,7 @@ pub unsafe fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d
     if (k & 0b00000001) != 0 {
         mov = simd_extract(b, 0);
     }
-    let r = simd_insert(a, 0, mov);
-    transmute(r)
+    simd_insert(a, 0, mov)
 }
 
 /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -32416,8 +32409,7 @@ pub unsafe fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) ->
         let extractb: f32 = simd_extract(b, 0);
         add = extracta + extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -32433,8 +32425,7 @@ pub unsafe fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
         let extractb: f32 = simd_extract(b, 0);
         add = extracta + extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -32451,8 +32442,7 @@ pub unsafe fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d)
         let extractb: f64 = simd_extract(b, 0);
         add = extracta + extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -32468,8 +32458,7 @@ pub unsafe fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
         let extractb: f64 = simd_extract(b, 0);
         add = extracta + extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -32486,8 +32475,7 @@ pub unsafe fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) ->
         let extractb: f32 = simd_extract(b, 0);
         add = extracta - extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -32503,8 +32491,7 @@ pub unsafe fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
         let extractb: f32 = simd_extract(b, 0);
         add = extracta - extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -32521,8 +32508,7 @@ pub unsafe fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d)
         let extractb: f64 = simd_extract(b, 0);
         add = extracta - extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -32538,8 +32524,7 @@ pub unsafe fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
         let extractb: f64 = simd_extract(b, 0);
         add = extracta - extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -32556,8 +32541,7 @@ pub unsafe fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) ->
         let extractb: f32 = simd_extract(b, 0);
         add = extracta * extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -32573,8 +32557,7 @@ pub unsafe fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
         let extractb: f32 = simd_extract(b, 0);
         add = extracta * extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -32591,8 +32574,7 @@ pub unsafe fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d)
         let extractb: f64 = simd_extract(b, 0);
         add = extracta * extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -32608,8 +32590,7 @@ pub unsafe fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
         let extractb: f64 = simd_extract(b, 0);
         add = extracta * extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -32626,8 +32607,7 @@ pub unsafe fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) ->
         let extractb: f32 = simd_extract(b, 0);
         add = extracta / extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -32643,8 +32623,7 @@ pub unsafe fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
         let extractb: f32 = simd_extract(b, 0);
         add = extracta / extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -32661,8 +32640,7 @@ pub unsafe fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d)
         let extractb: f64 = simd_extract(b, 0);
         add = extracta / extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -32678,8 +32656,7 @@ pub unsafe fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
         let extractb: f64 = simd_extract(b, 0);
         add = extracta / extractb;
     }
-    let r = simd_insert(a, 0, add);
-    transmute(r)
+    simd_insert(a, 0, add)
 }
 
 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -33587,8 +33564,7 @@ pub unsafe fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) ->
         let extractc: f32 = simd_extract(c, 0);
         fmadd = vfmadd132ss(fmadd, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fmadd);
-    transmute(r)
+    simd_insert(a, 0, fmadd)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -33605,8 +33581,7 @@ pub unsafe fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -
         let extractc: f32 = simd_extract(c, 0);
         fmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fmadd);
-    transmute(r)
+    simd_insert(a, 0, fmadd)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
@@ -33622,8 +33597,7 @@ pub unsafe fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -
         let extractb: f32 = simd_extract(b, 0);
         fmadd = vfmadd132ss(extracta, extractb, fmadd, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(c, 0, fmadd);
-    transmute(r)
+    simd_insert(c, 0, fmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -33639,8 +33613,7 @@ pub unsafe fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d)
         let extractc: f64 = simd_extract(c, 0);
         fmadd = vfmadd132sd(fmadd, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fmadd);
-    transmute(r)
+    simd_insert(a, 0, fmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -33657,8 +33630,7 @@ pub unsafe fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d
         let extractc: f64 = simd_extract(c, 0);
         fmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fmadd);
-    transmute(r)
+    simd_insert(a, 0, fmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
@@ -33674,8 +33646,7 @@ pub unsafe fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8
         let extractb: f64 = simd_extract(b, 0);
         fmadd = vfmadd132sd(extracta, extractb, fmadd, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(c, 0, fmadd);
-    transmute(r)
+    simd_insert(c, 0, fmadd)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -33692,8 +33663,7 @@ pub unsafe fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) ->
         let extractc = -extractc;
         fmsub = vfmadd132ss(fmsub, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fmsub);
-    transmute(r)
+    simd_insert(a, 0, fmsub)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -33711,8 +33681,7 @@ pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -
         let extractc = -extractc;
         fmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fmsub);
-    transmute(r)
+    simd_insert(a, 0, fmsub)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
@@ -33729,8 +33698,7 @@ pub unsafe fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -
         let extractc = -fmsub;
         fmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(c, 0, fmsub);
-    transmute(r)
+    simd_insert(c, 0, fmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -33747,8 +33715,7 @@ pub unsafe fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d)
         let extractc = -extractc;
         fmsub = vfmadd132sd(fmsub, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fmsub);
-    transmute(r)
+    simd_insert(a, 0, fmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -33766,8 +33733,7 @@ pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d
         let extractc = -extractc;
         fmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fmsub);
-    transmute(r)
+    simd_insert(a, 0, fmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
@@ -33784,8 +33750,7 @@ pub unsafe fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8
         let extractc = -fmsub;
         fmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(c, 0, fmsub);
-    transmute(r)
+    simd_insert(c, 0, fmsub)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -33802,8 +33767,7 @@ pub unsafe fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -
         let extractc: f32 = simd_extract(c, 0);
         fnmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fnmadd);
-    transmute(r)
+    simd_insert(a, 0, fnmadd)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -33821,8 +33785,7 @@ pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128)
         let extractc: f32 = simd_extract(c, 0);
         fnmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fnmadd);
-    transmute(r)
+    simd_insert(a, 0, fnmadd)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
@@ -33839,8 +33802,7 @@ pub unsafe fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8)
         let extractb: f32 = simd_extract(b, 0);
         fnmadd = vfmadd132ss(extracta, extractb, fnmadd, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(c, 0, fnmadd);
-    transmute(r)
+    simd_insert(c, 0, fnmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -33857,8 +33819,7 @@ pub unsafe fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d
         let extractc: f64 = simd_extract(c, 0);
         fnmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fnmadd);
-    transmute(r)
+    simd_insert(a, 0, fnmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -33876,8 +33837,7 @@ pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128
         let extractc: f64 = simd_extract(c, 0);
         fnmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fnmadd);
-    transmute(r)
+    simd_insert(a, 0, fnmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
@@ -33894,8 +33854,7 @@ pub unsafe fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask
         let extractb: f64 = simd_extract(b, 0);
         fnmadd = vfmadd132sd(extracta, extractb, fnmadd, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(c, 0, fnmadd);
-    transmute(r)
+    simd_insert(c, 0, fnmadd)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -33913,8 +33872,7 @@ pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -
         let extractc = -extractc;
         fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fnmsub);
-    transmute(r)
+    simd_insert(a, 0, fnmsub)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -33933,8 +33891,7 @@ pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128)
         let extractc = -extractc;
         fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fnmsub);
-    transmute(r)
+    simd_insert(a, 0, fnmsub)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
@@ -33952,8 +33909,7 @@ pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8)
         let extractc = -fnmsub;
         fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(c, 0, fnmsub);
-    transmute(r)
+    simd_insert(c, 0, fnmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -33971,8 +33927,7 @@ pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d
         let extractc = -extractc;
         fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fnmsub);
-    transmute(r)
+    simd_insert(a, 0, fnmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -33991,8 +33946,7 @@ pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128
         let extractc = -extractc;
         fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(a, 0, fnmsub);
-    transmute(r)
+    simd_insert(a, 0, fnmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
@@ -34010,8 +33964,7 @@ pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask
         let extractc = -fnmsub;
         fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
     }
-    let r = simd_insert(c, 0, fnmsub);
-    transmute(r)
+    simd_insert(c, 0, fnmsub)
 }
 
 /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -35705,8 +35658,7 @@ pub unsafe fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: _
     let extractb: f32 = simd_extract(b, 0);
     let extractc: f32 = simd_extract(c, 0);
     let r = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
-    let r = simd_insert(a, 0, r);
-    transmute(r)
+    simd_insert(a, 0, r)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -35736,8 +35688,7 @@ pub unsafe fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
         let extractc: f32 = simd_extract(c, 0);
         fmadd = vfmadd132ss(fmadd, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fmadd);
-    transmute(r)
+    simd_insert(a, 0, fmadd)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -35768,8 +35719,7 @@ pub unsafe fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
         let extractc: f32 = simd_extract(c, 0);
         fmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fmadd);
-    transmute(r)
+    simd_insert(a, 0, fmadd)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
@@ -35799,8 +35749,7 @@ pub unsafe fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
         let extractb: f32 = simd_extract(b, 0);
         fmadd = vfmadd132ss(extracta, extractb, fmadd, ROUNDING);
     }
-    let r = simd_insert(c, 0, fmadd);
-    transmute(r)
+    simd_insert(c, 0, fmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
@@ -35827,8 +35776,7 @@ pub unsafe fn _mm_fmadd_round_sd<const ROUNDING: i32>(
     let extractb: f64 = simd_extract(b, 0);
     let extractc: f64 = simd_extract(c, 0);
     let fmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
-    let r = simd_insert(a, 0, fmadd);
-    transmute(r)
+    simd_insert(a, 0, fmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
@@ -35858,8 +35806,7 @@ pub unsafe fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
         let extractc: f64 = simd_extract(c, 0);
         fmadd = vfmadd132sd(fmadd, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fmadd);
-    transmute(r)
+    simd_insert(a, 0, fmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
@@ -35890,8 +35837,7 @@ pub unsafe fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
         let extractc: f64 = simd_extract(c, 0);
         fmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fmadd);
-    transmute(r)
+    simd_insert(a, 0, fmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
@@ -35921,8 +35867,7 @@ pub unsafe fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
         let extractb: f64 = simd_extract(b, 0);
         fmadd = vfmadd132sd(extracta, extractb, fmadd, ROUNDING);
     }
-    let r = simd_insert(c, 0, fmadd);
-    transmute(r)
+    simd_insert(c, 0, fmadd)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -35946,8 +35891,7 @@ pub unsafe fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: _
     let extractc: f32 = simd_extract(c, 0);
     let extractc = -extractc;
     let fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
-    let r = simd_insert(a, 0, fmsub);
-    transmute(r)
+    simd_insert(a, 0, fmsub)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -35978,8 +35922,7 @@ pub unsafe fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
         let extractc = -extractc;
         fmsub = vfmadd132ss(fmsub, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fmsub);
-    transmute(r)
+    simd_insert(a, 0, fmsub)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -36011,8 +35954,7 @@ pub unsafe fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
         let extractc = -extractc;
         fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fmsub);
-    transmute(r)
+    simd_insert(a, 0, fmsub)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
@@ -36043,8 +35985,7 @@ pub unsafe fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
         let extractc = -fmsub;
         fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(c, 0, fmsub);
-    transmute(r)
+    simd_insert(c, 0, fmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
@@ -36072,8 +36013,7 @@ pub unsafe fn _mm_fmsub_round_sd<const ROUNDING: i32>(
     let extractc: f64 = simd_extract(c, 0);
     let extractc = -extractc;
     let fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
-    let r = simd_insert(a, 0, fmsub);
-    transmute(r)
+    simd_insert(a, 0, fmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
@@ -36104,8 +36044,7 @@ pub unsafe fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
         let extractc = -extractc;
         fmsub = vfmadd132sd(fmsub, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fmsub);
-    transmute(r)
+    simd_insert(a, 0, fmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
@@ -36137,8 +36076,7 @@ pub unsafe fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
         let extractc = -extractc;
         fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fmsub);
-    transmute(r)
+    simd_insert(a, 0, fmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
@@ -36169,8 +36107,7 @@ pub unsafe fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
         let extractc = -fmsub;
         fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(c, 0, fmsub);
-    transmute(r)
+    simd_insert(c, 0, fmsub)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -36194,8 +36131,7 @@ pub unsafe fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c:
     let extractb: f32 = simd_extract(b, 0);
     let extractc: f32 = simd_extract(c, 0);
     let fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
-    let r = simd_insert(a, 0, fnmadd);
-    transmute(r)
+    simd_insert(a, 0, fnmadd)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -36226,8 +36162,7 @@ pub unsafe fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
         let extractc: f32 = simd_extract(c, 0);
         fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fnmadd);
-    transmute(r)
+    simd_insert(a, 0, fnmadd)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -36259,8 +36194,7 @@ pub unsafe fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
         let extractc: f32 = simd_extract(c, 0);
         fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fnmadd);
-    transmute(r)
+    simd_insert(a, 0, fnmadd)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
@@ -36291,8 +36225,7 @@ pub unsafe fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
         let extractb: f32 = simd_extract(b, 0);
         fnmadd = vfmadd132ss(extracta, extractb, fnmadd, ROUNDING);
     }
-    let r = simd_insert(c, 0, fnmadd);
-    transmute(r)
+    simd_insert(c, 0, fnmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
@@ -36320,8 +36253,7 @@ pub unsafe fn _mm_fnmadd_round_sd<const ROUNDING: i32>(
     let extractb: f64 = simd_extract(b, 0);
     let extractc: f64 = simd_extract(c, 0);
     let fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
-    let r = simd_insert(a, 0, fnmadd);
-    transmute(r)
+    simd_insert(a, 0, fnmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
@@ -36352,8 +36284,7 @@ pub unsafe fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
         let extractc: f64 = simd_extract(c, 0);
         fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fnmadd);
-    transmute(r)
+    simd_insert(a, 0, fnmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
@@ -36385,8 +36316,7 @@ pub unsafe fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
         let extractc: f64 = simd_extract(c, 0);
         fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fnmadd);
-    transmute(r)
+    simd_insert(a, 0, fnmadd)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
@@ -36417,8 +36347,7 @@ pub unsafe fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
         let extractb: f64 = simd_extract(b, 0);
         fnmadd = vfmadd132sd(extracta, extractb, fnmadd, ROUNDING);
     }
-    let r = simd_insert(c, 0, fnmadd);
-    transmute(r)
+    simd_insert(c, 0, fnmadd)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -36443,8 +36372,7 @@ pub unsafe fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c:
     let extractc: f32 = simd_extract(c, 0);
     let extractc = -extractc;
     let fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
-    let r = simd_insert(a, 0, fnmsub);
-    transmute(r)
+    simd_insert(a, 0, fnmsub)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -36476,8 +36404,7 @@ pub unsafe fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
         let extractc = -extractc;
         fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fnmsub);
-    transmute(r)
+    simd_insert(a, 0, fnmsub)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -36510,8 +36437,7 @@ pub unsafe fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
         let extractc = -extractc;
         fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fnmsub);
-    transmute(r)
+    simd_insert(a, 0, fnmsub)
 }
 
 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
@@ -36543,8 +36469,7 @@ pub unsafe fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
         let extractc = -fnmsub;
         fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(c, 0, fnmsub);
-    transmute(r)
+    simd_insert(c, 0, fnmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
@@ -36573,8 +36498,7 @@ pub unsafe fn _mm_fnmsub_round_sd<const ROUNDING: i32>(
     let extractc: f64 = simd_extract(c, 0);
     let extractc = -extractc;
     let fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
-    let r = simd_insert(a, 0, fnmsub);
-    transmute(r)
+    simd_insert(a, 0, fnmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
@@ -36606,8 +36530,7 @@ pub unsafe fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
         let extractc = -extractc;
         fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fnmsub);
-    transmute(r)
+    simd_insert(a, 0, fnmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
@@ -36640,8 +36563,7 @@ pub unsafe fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
         let extractc = -extractc;
         fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(a, 0, fnmsub);
-    transmute(r)
+    simd_insert(a, 0, fnmsub)
 }
 
 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
@@ -36673,8 +36595,7 @@ pub unsafe fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
         let extractc = -fnmsub;
         fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
     }
-    let r = simd_insert(c, 0, fnmsub);
-    transmute(r)
+    simd_insert(c, 0, fnmsub)
 }
 
 /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
@@ -37168,8 +37089,7 @@ pub unsafe fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(
 pub unsafe fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
     static_assert_rounding!(ROUNDING);
     let a = a.as_f32x4();
-    let r = vcvtss2si(a, ROUNDING);
-    transmute(r)
+    vcvtss2si(a, ROUNDING)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
@@ -37188,8 +37108,7 @@ pub unsafe fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
 pub unsafe fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
     static_assert_rounding!(ROUNDING);
     let a = a.as_f32x4();
-    let r = vcvtss2si(a, ROUNDING);
-    transmute(r)
+    vcvtss2si(a, ROUNDING)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
@@ -37208,8 +37127,7 @@ pub unsafe fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
 pub unsafe fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
     static_assert_rounding!(ROUNDING);
     let a = a.as_f32x4();
-    let r = vcvtss2usi(a, ROUNDING);
-    transmute(r)
+    vcvtss2usi(a, ROUNDING)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
@@ -37219,7 +37137,7 @@ pub unsafe fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtss2si))]
 pub unsafe fn _mm_cvtss_i32(a: __m128) -> i32 {
-    transmute(vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+    vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
@@ -37229,7 +37147,7 @@ pub unsafe fn _mm_cvtss_i32(a: __m128) -> i32 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtss2usi))]
 pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 {
-    transmute(vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+    vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
@@ -37248,8 +37166,7 @@ pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 {
 pub unsafe fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
     static_assert_rounding!(ROUNDING);
     let a = a.as_f64x2();
-    let r = vcvtsd2si(a, ROUNDING);
-    transmute(r)
+    vcvtsd2si(a, ROUNDING)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
@@ -37268,8 +37185,7 @@ pub unsafe fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
 pub unsafe fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
     static_assert_rounding!(ROUNDING);
     let a = a.as_f64x2();
-    let r = vcvtsd2si(a, ROUNDING);
-    transmute(r)
+    vcvtsd2si(a, ROUNDING)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
@@ -37288,8 +37204,7 @@ pub unsafe fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
 pub unsafe fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
     static_assert_rounding!(ROUNDING);
     let a = a.as_f64x2();
-    let r = vcvtsd2usi(a, ROUNDING);
-    transmute(r)
+    vcvtsd2usi(a, ROUNDING)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
@@ -37299,7 +37214,7 @@ pub unsafe fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtsd2si))]
 pub unsafe fn _mm_cvtsd_i32(a: __m128d) -> i32 {
-    transmute(vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+    vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
@@ -37309,7 +37224,7 @@ pub unsafe fn _mm_cvtsd_i32(a: __m128d) -> i32 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtsd2usi))]
 pub unsafe fn _mm_cvtsd_u32(a: __m128d) -> u32 {
-    transmute(vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+    vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -37382,8 +37297,7 @@ pub unsafe fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m
 #[cfg_attr(test, assert_instr(vcvtsi2ss))]
 pub unsafe fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
     let b = b as f32;
-    let r = simd_insert(a, 0, b);
-    transmute(r)
+    simd_insert(a, 0, b)
 }
 
 /// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@@ -37394,8 +37308,7 @@ pub unsafe fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
 #[cfg_attr(test, assert_instr(vcvtsi2sd))]
 pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
     let b = b as f64;
-    let r = simd_insert(a, 0, b);
-    transmute(r)
+    simd_insert(a, 0, b)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
@@ -37409,8 +37322,7 @@ pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
 pub unsafe fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
     static_assert_sae!(SAE);
     let a = a.as_f32x4();
-    let r = vcvtss2si(a, SAE);
-    transmute(r)
+    vcvtss2si(a, SAE)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
@@ -37424,8 +37336,7 @@ pub unsafe fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
 pub unsafe fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
     static_assert_sae!(SAE);
     let a = a.as_f32x4();
-    let r = vcvtss2si(a, SAE);
-    transmute(r)
+    vcvtss2si(a, SAE)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
@@ -37439,8 +37350,7 @@ pub unsafe fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
 pub unsafe fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
     static_assert_sae!(SAE);
     let a = a.as_f32x4();
-    let r = vcvtss2usi(a, SAE);
-    transmute(r)
+    vcvtss2usi(a, SAE)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
@@ -37450,7 +37360,7 @@ pub unsafe fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtss2si))]
 pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 {
-    transmute(vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+    vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
@@ -37460,7 +37370,7 @@ pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtss2usi))]
 pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 {
-    transmute(vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+    vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
@@ -37474,8 +37384,7 @@ pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 {
 pub unsafe fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
     static_assert_sae!(SAE);
     let a = a.as_f64x2();
-    let r = vcvtsd2si(a, SAE);
-    transmute(r)
+    vcvtsd2si(a, SAE)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
@@ -37489,8 +37398,7 @@ pub unsafe fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
 pub unsafe fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
     static_assert_sae!(SAE);
     let a = a.as_f64x2();
-    let r = vcvtsd2si(a, SAE);
-    transmute(r)
+    vcvtsd2si(a, SAE)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
@@ -37504,8 +37412,7 @@ pub unsafe fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
 pub unsafe fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
     static_assert_sae!(SAE);
     let a = a.as_f64x2();
-    let r = vcvtsd2usi(a, SAE);
-    transmute(r)
+    vcvtsd2usi(a, SAE)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
@@ -37515,7 +37422,7 @@ pub unsafe fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtsd2si))]
 pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 {
-    transmute(vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+    vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
@@ -37525,7 +37432,7 @@ pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtsd2usi))]
 pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 {
-    transmute(vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+    vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -37536,8 +37443,7 @@ pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 {
 #[cfg_attr(test, assert_instr(vcvtusi2ss))]
 pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
     let b = b as f32;
-    let r = simd_insert(a, 0, b);
-    transmute(r)
+    simd_insert(a, 0, b)
 }
 
 /// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@@ -37548,8 +37454,7 @@ pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
 #[cfg_attr(test, assert_instr(vcvtusi2sd))]
 pub unsafe fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
     let b = b as f64;
-    let r = simd_insert(a, 0, b);
-    transmute(r)
+    simd_insert(a, 0, b)
 }
 
 /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
@@ -37565,8 +37470,7 @@ pub unsafe fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: _
     static_assert_mantissas_sae!(SAE);
     let a = a.as_f32x4();
     let b = b.as_f32x4();
-    let r = vcomiss(a, b, IMM5, SAE);
-    transmute(r)
+    vcomiss(a, b, IMM5, SAE)
 }
 
 /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
@@ -37582,8 +37486,7 @@ pub unsafe fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b:
     static_assert_mantissas_sae!(SAE);
     let a = a.as_f64x2();
     let b = b.as_f64x2();
-    let r = vcomisd(a, b, IMM5, SAE);
-    transmute(r)
+    vcomisd(a, b, IMM5, SAE)
 }
 
 /// Equal
diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index c325e3939e1..67d20512d75 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -2176,12 +2176,12 @@ mod tests {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
         let r: u32x4 = transmute(_mm_cmpeq_ss(a, b));
-        let e: u32x4 = transmute(_mm_setr_ps(transmute(0u32), 2.0, 3.0, 4.0));
+        let e: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0), 2.0, 3.0, 4.0));
         assert_eq!(r, e);
 
         let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
         let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
-        let e2: u32x4 = transmute(_mm_setr_ps(transmute(0xffffffffu32), 2.0, 3.0, 4.0));
+        let e2: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0));
         assert_eq!(r2, e2);
     }
 
@@ -2197,15 +2197,15 @@ mod tests {
         let d1 = !0u32; // a.extract(0) < d.extract(0)
 
         let rb: u32x4 = transmute(_mm_cmplt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
         assert_eq!(rb, eb);
 
         let rc: u32x4 = transmute(_mm_cmplt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
         assert_eq!(rc, ec);
 
         let rd: u32x4 = transmute(_mm_cmplt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
         assert_eq!(rd, ed);
     }
 
@@ -2221,15 +2221,15 @@ mod tests {
         let d1 = !0u32; // a.extract(0) <= d.extract(0)
 
         let rb: u32x4 = transmute(_mm_cmple_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
         assert_eq!(rb, eb);
 
         let rc: u32x4 = transmute(_mm_cmple_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
         assert_eq!(rc, ec);
 
         let rd: u32x4 = transmute(_mm_cmple_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
         assert_eq!(rd, ed);
     }
 
@@ -2245,15 +2245,15 @@ mod tests {
         let d1 = 0u32; // a.extract(0) > d.extract(0)
 
         let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
         assert_eq!(rb, eb);
 
         let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
         assert_eq!(rc, ec);
 
         let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
         assert_eq!(rd, ed);
     }
 
@@ -2269,15 +2269,15 @@ mod tests {
         let d1 = 0u32; // a.extract(0) >= d.extract(0)
 
         let rb: u32x4 = transmute(_mm_cmpge_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
         assert_eq!(rb, eb);
 
         let rc: u32x4 = transmute(_mm_cmpge_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
         assert_eq!(rc, ec);
 
         let rd: u32x4 = transmute(_mm_cmpge_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
         assert_eq!(rd, ed);
     }
 
@@ -2293,15 +2293,15 @@ mod tests {
         let d1 = !0u32; // a.extract(0) != d.extract(0)
 
         let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
         assert_eq!(rb, eb);
 
         let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
         assert_eq!(rc, ec);
 
         let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
         assert_eq!(rd, ed);
     }
 
@@ -2322,15 +2322,15 @@ mod tests {
         let d1 = 0u32; // a.extract(0) >= d.extract(0)
 
         let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
         assert_eq!(rb, eb);
 
         let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
         assert_eq!(rc, ec);
 
         let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
         assert_eq!(rd, ed);
     }
 
@@ -2351,15 +2351,15 @@ mod tests {
         let d1 = 0u32; // a.extract(0) > d.extract(0)
 
         let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
         assert_eq!(rb, eb);
 
         let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
         assert_eq!(rc, ec);
 
         let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
         assert_eq!(rd, ed);
     }
 
@@ -2380,15 +2380,15 @@ mod tests {
         let d1 = !0u32; // a.extract(0) <= d.extract(0)
 
         let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
         assert_eq!(rb, eb);
 
         let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
         assert_eq!(rc, ec);
 
         let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
         assert_eq!(rd, ed);
     }
 
@@ -2409,15 +2409,15 @@ mod tests {
         let d1 = !0u32; // a.extract(0) < d.extract(0)
 
         let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
         assert_eq!(rb, eb);
 
         let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
         assert_eq!(rc, ec);
 
         let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
         assert_eq!(rd, ed);
     }
 
@@ -2433,15 +2433,15 @@ mod tests {
         let d1 = !0u32; // a.extract(0) ord d.extract(0)
 
         let rb: u32x4 = transmute(_mm_cmpord_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
         assert_eq!(rb, eb);
 
         let rc: u32x4 = transmute(_mm_cmpord_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
         assert_eq!(rc, ec);
 
         let rd: u32x4 = transmute(_mm_cmpord_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
         assert_eq!(rd, ed);
     }
 
@@ -2457,15 +2457,15 @@ mod tests {
         let d1 = 0u32; // a.extract(0) unord d.extract(0)
 
         let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
         assert_eq!(rb, eb);
 
         let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
         assert_eq!(rc, ec);
 
         let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
         assert_eq!(rd, ed);
     }
 
diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs
index 0ef4e7dbc73..e784c407de2 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@@ -4255,7 +4255,7 @@ mod tests {
     #[simd_test(enable = "sse2")]
     unsafe fn test_mm_cmpeq_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
         let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
         assert_eq_m128i(r, e);
     }
@@ -4263,7 +4263,7 @@ mod tests {
     #[simd_test(enable = "sse2")]
     unsafe fn test_mm_cmplt_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
         let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
         assert_eq_m128i(r, e);
     }
@@ -4271,7 +4271,7 @@ mod tests {
     #[simd_test(enable = "sse2")]
     unsafe fn test_mm_cmple_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
         let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
         assert_eq_m128i(r, e);
     }
@@ -4279,7 +4279,7 @@ mod tests {
     #[simd_test(enable = "sse2")]
     unsafe fn test_mm_cmpgt_sd() {
         let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
         let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
         assert_eq_m128i(r, e);
     }
@@ -4287,7 +4287,7 @@ mod tests {
     #[simd_test(enable = "sse2")]
     unsafe fn test_mm_cmpge_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
         let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
         assert_eq_m128i(r, e);
     }
@@ -4295,7 +4295,7 @@ mod tests {
     #[simd_test(enable = "sse2")]
     unsafe fn test_mm_cmpord_sd() {
         let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
-        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
         let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
         assert_eq_m128i(r, e);
     }
@@ -4303,7 +4303,7 @@ mod tests {
     #[simd_test(enable = "sse2")]
     unsafe fn test_mm_cmpunord_sd() {
         let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
         let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
         assert_eq_m128i(r, e);
     }
@@ -4311,7 +4311,7 @@ mod tests {
     #[simd_test(enable = "sse2")]
     unsafe fn test_mm_cmpneq_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
         let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
         assert_eq_m128i(r, e);
     }
@@ -4319,7 +4319,7 @@ mod tests {
     #[simd_test(enable = "sse2")]
     unsafe fn test_mm_cmpnlt_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
-        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
         let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
         assert_eq_m128i(r, e);
     }
@@ -4327,7 +4327,7 @@ mod tests {
     #[simd_test(enable = "sse2")]
     unsafe fn test_mm_cmpnle_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
         let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
         assert_eq_m128i(r, e);
     }
@@ -4335,7 +4335,7 @@ mod tests {
     #[simd_test(enable = "sse2")]
     unsafe fn test_mm_cmpngt_sd() {
         let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
         let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
         assert_eq_m128i(r, e);
     }
@@ -4343,7 +4343,7 @@ mod tests {
     #[simd_test(enable = "sse2")]
     unsafe fn test_mm_cmpnge_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
         let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
         assert_eq_m128i(r, e);
     }
diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs
index 6351aa45fff..6d33238b08f 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse41.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs
@@ -201,7 +201,7 @@ pub unsafe fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
     static_assert_uimm_bits!(IMM8, 2);
-    transmute(simd_extract::<_, f32>(a, IMM8 as u32))
+    simd_extract::<_, f32>(a, IMM8 as u32).to_bits() as i32
 }
 
 /// Extracts an 8-bit integer from `a`, selected with `IMM8`. Returns a 32-bit
@@ -1259,9 +1259,9 @@ mod tests {
     #[simd_test(enable = "sse4.1")]
     unsafe fn test_mm_extract_ps() {
         let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
-        let r: f32 = transmute(_mm_extract_ps::<1>(a));
+        let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32);
         assert_eq!(r, 1.0);
-        let r: f32 = transmute(_mm_extract_ps::<3>(a));
+        let r: f32 = f32::from_bits(_mm_extract_ps::<3>(a) as u32);
         assert_eq!(r, 3.0);
     }
 
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
index 68f3327677e..bace11d13f8 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
@@ -33,7 +33,7 @@ pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtss2usi))]
 pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 {
-    transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+    vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.
@@ -43,7 +43,7 @@ pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtsd2usi))]
 pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 {
-    transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+    vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -54,8 +54,7 @@ pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 {
 #[cfg_attr(test, assert_instr(vcvtsi2ss))]
 pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
     let b = b as f32;
-    let r = simd_insert(a, 0, b);
-    transmute(r)
+    simd_insert(a, 0, b)
 }
 
 /// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@@ -66,8 +65,7 @@ pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
 #[cfg_attr(test, assert_instr(vcvtsi2sd))]
 pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
     let b = b as f64;
-    let r = simd_insert(a, 0, b);
-    transmute(r)
+    simd_insert(a, 0, b)
 }
 
 /// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -78,8 +76,7 @@ pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
 #[cfg_attr(test, assert_instr(vcvtusi2ss))]
 pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
     let b = b as f32;
-    let r = simd_insert(a, 0, b);
-    transmute(r)
+    simd_insert(a, 0, b)
 }
 
 /// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@@ -90,8 +87,7 @@ pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
 #[cfg_attr(test, assert_instr(vcvtusi2sd))]
 pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
     let b = b as f64;
-    let r = simd_insert(a, 0, b);
-    transmute(r)
+    simd_insert(a, 0, b)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
@@ -101,7 +97,7 @@ pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtsd2si))]
 pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
-    transmute(vcvtsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+    vcvtsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
@@ -111,7 +107,7 @@ pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtsd2usi))]
 pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
-    transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+    vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
@@ -121,7 +117,7 @@ pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtss2si))]
 pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
-    transmute(vcvtss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+    vcvtss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
@@ -131,7 +127,7 @@ pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtss2usi))]
 pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 {
-    transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+    vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@@ -270,8 +266,7 @@ pub unsafe fn _mm_cvt_roundu64_ss<const ROUNDING: i32>(a: __m128, b: u64) -> __m
 pub unsafe fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
     static_assert_rounding!(ROUNDING);
     let a = a.as_f64x2();
-    let r = vcvtsd2si64(a, ROUNDING);
-    transmute(r)
+    vcvtsd2si64(a, ROUNDING)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
@@ -290,8 +285,7 @@ pub unsafe fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
 pub unsafe fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
     static_assert_rounding!(ROUNDING);
     let a = a.as_f64x2();
-    let r = vcvtsd2si64(a, ROUNDING);
-    transmute(r)
+    vcvtsd2si64(a, ROUNDING)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
@@ -310,8 +304,7 @@ pub unsafe fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
 pub unsafe fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
     static_assert_rounding!(ROUNDING);
     let a = a.as_f64x2();
-    let r = vcvtsd2usi64(a, ROUNDING);
-    transmute(r)
+    vcvtsd2usi64(a, ROUNDING)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
@@ -330,8 +323,7 @@ pub unsafe fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
 pub unsafe fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
     static_assert_rounding!(ROUNDING);
     let a = a.as_f32x4();
-    let r = vcvtss2si64(a, ROUNDING);
-    transmute(r)
+    vcvtss2si64(a, ROUNDING)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
@@ -350,8 +342,7 @@ pub unsafe fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
 pub unsafe fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
     static_assert_rounding!(ROUNDING);
     let a = a.as_f32x4();
-    let r = vcvtss2si64(a, ROUNDING);
-    transmute(r)
+    vcvtss2si64(a, ROUNDING)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
@@ -370,8 +361,7 @@ pub unsafe fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
 pub unsafe fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
     static_assert_rounding!(ROUNDING);
     let a = a.as_f32x4();
-    let r = vcvtss2usi64(a, ROUNDING);
-    transmute(r)
+    vcvtss2usi64(a, ROUNDING)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@@ -385,8 +375,7 @@ pub unsafe fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
 pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
     static_assert_sae!(SAE);
     let a = a.as_f64x2();
-    let r = vcvtsd2si64(a, SAE);
-    transmute(r)
+    vcvtsd2si64(a, SAE)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@@ -400,8 +389,7 @@ pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
 pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
     static_assert_sae!(SAE);
     let a = a.as_f64x2();
-    let r = vcvtsd2si64(a, SAE);
-    transmute(r)
+    vcvtsd2si64(a, SAE)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
@@ -415,8 +403,7 @@ pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
 pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
     static_assert_sae!(SAE);
     let a = a.as_f64x2();
-    let r = vcvtsd2usi64(a, SAE);
-    transmute(r)
+    vcvtsd2usi64(a, SAE)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@@ -430,8 +417,7 @@ pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
 pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
     static_assert_sae!(SAE);
     let a = a.as_f32x4();
-    let r = vcvtss2si64(a, SAE);
-    transmute(r)
+    vcvtss2si64(a, SAE)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@@ -445,8 +431,7 @@ pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
 pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
     static_assert_sae!(SAE);
     let a = a.as_f32x4();
-    let r = vcvtss2si64(a, SAE);
-    transmute(r)
+    vcvtss2si64(a, SAE)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
@@ -460,8 +445,7 @@ pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
 pub unsafe fn _mm_cvtt_roundss_u64<const SAE: i32>(a: __m128) -> u64 {
     static_assert_sae!(SAE);
     let a = a.as_f32x4();
-    let r = vcvtss2usi64(a, SAE);
-    transmute(r)
+    vcvtss2usi64(a, SAE)
 }
 
 #[allow(improper_ctypes)]
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index d8afc1aca4f..d8dd84db497 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -49,11 +49,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
             ecx,
             edx,
         } = __cpuid(0);
-        let vendor_id: [[u8; 4]; 3] = [
-            mem::transmute(ebx),
-            mem::transmute(edx),
-            mem::transmute(ecx),
-        ];
+        let vendor_id: [[u8; 4]; 3] = [ebx.to_ne_bytes(), edx.to_ne_bytes(), ecx.to_ne_bytes()];
         let vendor_id: [u8; 12] = mem::transmute(vendor_id);
         (max_basic_leaf, vendor_id)
     };
-- 
cgit 1.4.1-3-g733a5


From 21599975ff748f6de91bfa7b16822bb93d4549d3 Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Thu, 4 May 2023 12:01:06 +0100
Subject: Add tracking issues for feature detection

---
 .../crates/std_detect/src/detect/arch/arm.rs       |  16 ++--
 .../crates/std_detect/src/detect/arch/mips.rs      |   4 +-
 .../crates/std_detect/src/detect/arch/mips64.rs    |   4 +-
 .../crates/std_detect/src/detect/arch/mod.rs       |   9 ++
 .../crates/std_detect/src/detect/arch/powerpc.rs   |   8 +-
 .../crates/std_detect/src/detect/arch/powerpc64.rs |   8 +-
 .../crates/std_detect/src/detect/arch/riscv.rs     | 100 ++++++++++-----------
 .../stdarch/crates/std_detect/src/detect/macros.rs |   8 +-
 .../stdarch/crates/std_detect/src/detect/mod.rs    |   3 +-
 library/stdarch/crates/std_detect/src/lib.rs       |   6 +-
 10 files changed, 89 insertions(+), 77 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index fd332e0b2ca..50e77fde59a 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -6,23 +6,23 @@ features! {
     @MACRO_NAME: is_arm_feature_detected;
     @MACRO_ATTRS:
     /// Checks if `arm` feature is enabled.
-    #[unstable(feature = "stdsimd", issue = "27731")]
+    #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")]
     @NO_RUNTIME_DETECTION: "v7";
     @NO_RUNTIME_DETECTION: "vfp2";
     @NO_RUNTIME_DETECTION: "vfp3";
     @NO_RUNTIME_DETECTION: "vfp4";
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] neon: "neon";
+    @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] neon: "neon";
     /// ARM Advanced SIMD (NEON) - Aarch32
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] pmull: "pmull";
+    @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] pmull: "pmull";
     /// Polynomial Multiply
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crc: "crc";
+    @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] crc: "crc";
     /// CRC32 (Cyclic Redundancy Check)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] aes: "aes";
+    @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] aes: "aes";
     /// FEAT_AES (AES instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sha2: "sha2";
+    @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] sha2: "sha2";
     /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] i8mm: "i8mm";
+    @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] i8mm: "i8mm";
     /// FEAT_I8MM (integer matrix multiplication, plus ASIMD support)
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] dotprod: "dotprod";
+    @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] dotprod: "dotprod";
     /// FEAT_DotProd (Vector Dot-Product - ASIMDDP)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
index ae27d0093c6..e185fdfcaac 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs
@@ -6,7 +6,7 @@ features! {
     @MACRO_NAME: is_mips_feature_detected;
     @MACRO_ATTRS:
     /// Checks if `mips` feature is enabled.
-    #[unstable(feature = "stdsimd", issue = "27731")]
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] msa: "msa";
+    #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")]
+    @FEATURE: #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] msa: "msa";
     /// MIPS SIMD Architecture (MSA)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
index 7182ec2da4e..69fe4869d30 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs
@@ -6,7 +6,7 @@ features! {
     @MACRO_NAME: is_mips64_feature_detected;
     @MACRO_ATTRS:
     /// Checks if `mips64` feature is enabled.
-    #[unstable(feature = "stdsimd", issue = "27731")]
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] msa: "msa";
+    #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")]
+    @FEATURE: #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] msa: "msa";
     /// MIPS SIMD Architecture (MSA)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
index 81a1f23e873..af8222dc2b2 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
@@ -22,20 +22,28 @@ mod mips64;
 
 cfg_if! {
     if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
+        #[stable(feature = "simd_x86", since = "1.27.0")]
         pub use x86::*;
     } else if #[cfg(target_arch = "arm")] {
+        #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")]
         pub use arm::*;
     } else if #[cfg(target_arch = "aarch64")] {
+        #[stable(feature = "simd_aarch64", since = "1.60.0")]
         pub use aarch64::*;
     } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] {
+        #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")]
         pub use riscv::*;
     } else if #[cfg(target_arch = "powerpc")] {
+        #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")]
         pub use powerpc::*;
     } else if #[cfg(target_arch = "powerpc64")] {
+        #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")]
         pub use powerpc64::*;
     } else if #[cfg(target_arch = "mips")] {
+        #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")]
         pub use mips::*;
     } else if #[cfg(target_arch = "mips64")] {
+        #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")]
         pub use mips64::*;
     } else {
         // Unimplemented architecture:
@@ -44,6 +52,7 @@ cfg_if! {
             Null
         }
         #[doc(hidden)]
+        #[unstable(feature = "stdarch_internal", issue = "none")]
         pub mod __is_feature_detected {}
 
         impl Feature {
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
index d135cd95de2..fc2ac8963f0 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
@@ -6,11 +6,11 @@ features! {
     @MACRO_NAME: is_powerpc_feature_detected;
     @MACRO_ATTRS:
     /// Checks if `powerpc` feature is enabled.
-    #[unstable(feature = "stdsimd", issue = "27731")]
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] altivec: "altivec";
+    #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")]
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] altivec: "altivec";
     /// Altivec
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] vsx: "vsx";
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] vsx: "vsx";
     /// VSX
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] power8: "power8";
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8: "power8";
     /// Power8
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
index 773afd6ceb4..579bdc50ca8 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
@@ -6,11 +6,11 @@ features! {
     @MACRO_NAME: is_powerpc64_feature_detected;
     @MACRO_ATTRS:
     /// Checks if `powerpc` feature is enabled.
-    #[unstable(feature = "stdsimd", issue = "27731")]
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] altivec: "altivec";
+    #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")]
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] altivec: "altivec";
     /// Altivec
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] vsx: "vsx";
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] vsx: "vsx";
     /// VSX
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] power8: "power8";
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8: "power8";
     /// Power8
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 5ea36e7c1c9..1d2d32a014f 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -99,108 +99,108 @@ features! {
     /// * Zkt: `"zkt"`
     ///
     /// [ISA manual]: https://github.com/riscv/riscv-isa-manual/
-    #[unstable(feature = "stdsimd", issue = "27731")]
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rv32i: "rv32i";
+    #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")]
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32i: "rv32i";
     /// RV32I Base Integer Instruction Set
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zifencei: "zifencei";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zifencei: "zifencei";
     /// "Zifencei" Instruction-Fetch Fence
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zihintpause: "zihintpause";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintpause: "zihintpause";
     /// "Zihintpause" Pause Hint
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rv64i: "rv64i";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv64i: "rv64i";
     /// RV64I Base Integer Instruction Set
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] m: "m";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] m: "m";
     /// "M" Standard Extension for Integer Multiplication and Division
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] a: "a";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] a: "a";
     /// "A" Standard Extension for Atomic Instructions
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zicsr: "zicsr";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicsr: "zicsr";
     /// "Zicsr", Control and Status Register (CSR) Instructions
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zicntr: "zicntr";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicntr: "zicntr";
     /// "Zicntr", Standard Extension for Base Counters and Timers
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zihpm: "zihpm";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihpm: "zihpm";
     /// "Zihpm", Standard Extension for Hardware Performance Counters
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] f: "f";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] f: "f";
     /// "F" Standard Extension for Single-Precision Floating-Point
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] d: "d";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] d: "d";
     /// "D" Standard Extension for Double-Precision Floating-Point
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] q: "q";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] q: "q";
     /// "Q" Standard Extension for Quad-Precision Floating-Point
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] c: "c";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] c: "c";
     /// "C" Standard Extension for Compressed Instructions
 
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zfinx: "zfinx";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfinx: "zfinx";
     /// "Zfinx" Standard Extension for Single-Precision Floating-Point in Integer Registers
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zdinx: "zdinx";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zdinx: "zdinx";
     /// "Zdinx" Standard Extension for Double-Precision Floating-Point in Integer Registers
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zhinx: "zhinx";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zhinx: "zhinx";
     /// "Zhinx" Standard Extension for Half-Precision Floating-Point in Integer Registers
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zhinxmin: "zhinxmin";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zhinxmin: "zhinxmin";
     /// "Zhinxmin" Standard Extension for Minimal Half-Precision Floating-Point in Integer Registers
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] ztso: "ztso";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] ztso: "ztso";
     /// "Ztso" Standard Extension for Total Store Ordering
 
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rv32e: "rv32e";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32e: "rv32e";
     /// RV32E Base Integer Instruction Set
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rv128i: "rv128i";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv128i: "rv128i";
     /// RV128I Base Integer Instruction Set
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zfh: "zfh";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfh: "zfh";
     /// "Zfh" Standard Extension for 16-Bit Half-Precision Floating-Point
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zfhmin: "zfhmin";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfhmin: "zfhmin";
     /// "Zfhmin" Standard Extension for Minimal Half-Precision Floating-Point Support
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] b: "b";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] b: "b";
     /// "B" Standard Extension for Bit Manipulation
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] j: "j";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] j: "j";
     /// "J" Standard Extension for Dynamically Translated Languages
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] p: "p";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] p: "p";
     /// "P" Standard Extension for Packed-SIMD Instructions
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] v: "v";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] v: "v";
     /// "V" Standard Extension for Vector Operations
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zam: "zam";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zam: "zam";
     /// "Zam" Standard Extension for Misaligned Atomics
 
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] s: "s";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] s: "s";
     /// Supervisor-Level ISA
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] svnapot: "svnapot";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svnapot: "svnapot";
     /// "Svnapot" Standard Extension for NAPOT Translation Contiguity
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] svpbmt: "svpbmt";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svpbmt: "svpbmt";
     /// "Svpbmt" Standard Extension for Page-Based Memory Types
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] svinval: "svinval";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svinval: "svinval";
     /// "Svinval" Standard Extension for Fine-Grained Address-Translation Cache Invalidation
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] h: "h";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] h: "h";
     /// Hypervisor Extension
 
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zba: "zba";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zba: "zba";
     /// "Zba" Standard Extension for Address Generation Instructions
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbb: "zbb";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zbb: "zbb";
     /// "Zbb" Standard Extension for Basic Bit-Manipulation
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbc: "zbc";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zbc: "zbc";
     /// "Zbc" Standard Extension for Carry-less Multiplication
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbs: "zbs";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zbs: "zbs";
     /// "Zbs" Standard Extension for Single-Bit instructions
 
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbkb: "zbkb";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zbkb: "zbkb";
     /// "Zbkb" Standard Extension for Bitmanip instructions for Cryptography
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbkc: "zbkc";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zbkc: "zbkc";
     /// "Zbkc" Standard Extension for Carry-less multiply instructions
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbkx: "zbkx";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zbkx: "zbkx";
     /// "Zbkx" Standard Extension for Crossbar permutation instructions
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zknd: "zknd";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zknd: "zknd";
     /// "Zknd" Standard Extension for NIST Suite: AES Decryption
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zkne: "zkne";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zkne: "zkne";
     /// "Zkne" Standard Extension for NIST Suite: AES Encryption
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zknh: "zknh";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zknh: "zknh";
     /// "Zknh" Standard Extension for NIST Suite: Hash Function Instructions
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zksed: "zksed";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zksed: "zksed";
     /// "Zksed" Standard Extension for ShangMi Suite: SM4 Block Cipher Instructions
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zksh: "zksh";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zksh: "zksh";
     /// "Zksh" Standard Extension for ShangMi Suite: SM3 Hash Function Instructions
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zkr: "zkr";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zkr: "zkr";
     /// "Zkr" Standard Extension for Entropy Source Extension
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zkn: "zkn";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zkn: "zkn";
     /// "Zkn" Standard Extension for NIST Algorithm Suite
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zks: "zks";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zks: "zks";
     /// "Zks" Standard Extension for ShangMi Algorithm Suite
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zk: "zk";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zk: "zk";
     /// "Zk" Standard Extension for Standard scalar cryptography extension
-    @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zkt: "zkt";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zkt: "zkt";
     /// "Zkt" Standard Extension for Data Independent Execution Latency
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 45feec79fc9..d1bc990dd2c 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -1,5 +1,6 @@
 #[macro_export]
-#[allow_internal_unstable(stdsimd)]
+#[allow_internal_unstable(stdarch_internal)]
+#[unstable(feature = "stdarch_internal", issue = "none")]
 macro_rules! detect_feature {
     ($feature:tt, $feature_lit:tt) => {
         $crate::detect_feature!($feature, $feature_lit : $feature_lit)
@@ -25,7 +26,7 @@ macro_rules! features {
     ) => {
         #[macro_export]
         $(#[$macro_attrs])*
-        #[allow_internal_unstable(stdsimd_internal, stdsimd)]
+        #[allow_internal_unstable(stdarch_internal, stdsimd)]
         #[cfg($cfg)]
         #[doc(cfg($cfg))]
         macro_rules! $macro_name {
@@ -120,7 +121,7 @@ macro_rules! features {
         #[allow(non_camel_case_types)]
         #[derive(Copy, Clone)]
         #[repr(u8)]
-        #[unstable(feature = "stdsimd_internal", issue = "none")]
+        #[unstable(feature = "stdarch_internal", issue = "none")]
         #[cfg($cfg)]
         pub(crate) enum Feature {
             $(
@@ -157,6 +158,7 @@ macro_rules! features {
         /// to change.
         #[doc(hidden)]
         #[cfg($cfg)]
+        #[unstable(feature = "stdarch_internal", issue = "none")]
         pub mod __is_feature_detected {
             $(
 
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index db7018232d4..75a2f70db8e 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -27,6 +27,7 @@ mod arch;
 // This module needs to be public because the `is_{arch}_feature_detected!`
 // macros expand calls to items within it in user crates.
 #[doc(hidden)]
+#[unstable(feature = "stdarch_internal", issue = "none")]
 pub use self::arch::__is_feature_detected;
 
 pub(crate) use self::arch::Feature;
@@ -81,7 +82,7 @@ fn check_for(x: Feature) -> bool {
 /// Returns an `Iterator<Item=(&'static str, bool)>` where
 /// `Item.0` is the feature name, and `Item.1` is a `bool` which
 /// is `true` if the feature is supported by the host and `false` otherwise.
-#[unstable(feature = "stdsimd", issue = "27731")]
+#[unstable(feature = "stdarch_internal", issue = "none")]
 pub fn features() -> impl Iterator<Item = (&'static str, bool)> {
     cfg_if! {
         if #[cfg(any(
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 7fdfb872ec9..14359686f3c 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -12,8 +12,8 @@
 //! * `powerpc`: [`is_powerpc_feature_detected`]
 //! * `powerpc64`: [`is_powerpc64_feature_detected`]
 
-#![unstable(feature = "stdsimd", issue = "27731")]
-#![feature(staged_api, stdsimd, doc_cfg, allow_internal_unstable)]
+#![stable(feature = "stdsimd", since = "1.27.0")]
+#![feature(staged_api, doc_cfg, allow_internal_unstable)]
 #![deny(rust_2018_idioms)]
 #![allow(clippy::shadow_reuse)]
 #![deny(clippy::missing_inline_in_public_items)]
@@ -31,5 +31,5 @@ extern crate std;
 extern crate alloc;
 
 #[doc(hidden)]
-#[unstable(feature = "stdsimd", issue = "27731")]
+#[stable(feature = "stdsimd", since = "1.27.0")]
 pub mod detect;
-- 
cgit 1.4.1-3-g733a5


From b8ba57f310aaf3e5a02637f31735e93cb221c907 Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Thu, 26 Oct 2023 18:11:28 +0100
Subject: Cleanup last uses of the stdsimd feature

---
 library/stdarch/crates/core_arch/README.md                       | 2 --
 library/stdarch/crates/intrinsic-test/src/main.rs                | 1 -
 library/stdarch/crates/std_detect/README.md                      | 2 +-
 library/stdarch/crates/std_detect/src/detect/macros.rs           | 2 +-
 library/stdarch/crates/std_detect/src/lib.rs                     | 2 ++
 library/stdarch/crates/std_detect/tests/cpu-detection.rs         | 2 +-
 library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs | 2 +-
 library/stdarch/crates/std_detect/tests/x86-specific.rs          | 1 -
 library/stdarch/examples/connect5.rs                             | 2 +-
 library/stdarch/examples/hex.rs                                  | 2 +-
 library/stdarch/examples/wasm.rs                                 | 1 -
 11 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/README.md b/library/stdarch/crates/core_arch/README.md
index a5f490bcf5d..fc18a5759db 100644
--- a/library/stdarch/crates/core_arch/README.md
+++ b/library/stdarch/crates/core_arch/README.md
@@ -7,8 +7,6 @@ The `core::arch` module implements architecture-dependent intrinsics (e.g. SIMD)
 
 `core::arch` is available as part of `libcore` and it is re-exported by
 `libstd`. Prefer using it via `core::arch` or `std::arch` than via this crate.
-Unstable features are often available in nightly Rust via the
-`feature(stdsimd)`.
 
 Using `core::arch` via this crate requires nightly Rust, and it can (and does)
 break often. The only cases in which you should consider using it via this crate
diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs
index 15bc021c757..162d3a72bd3 100644
--- a/library/stdarch/crates/intrinsic-test/src/main.rs
+++ b/library/stdarch/crates/intrinsic-test/src/main.rs
@@ -166,7 +166,6 @@ fn generate_rust_program(notices: &str, intrinsic: &Intrinsic, a32: bool) -> Str
     format!(
         r#"{notices}#![feature(simd_ffi)]
 #![feature(link_llvm_intrinsics)]
-#![feature(stdsimd)]
 #![allow(overflowing_literals)]
 #![allow(non_upper_case_globals)]
 use core_arch::arch::{target_arch}::*;
diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md
index 5211771047f..3611daaf40f 100644
--- a/library/stdarch/crates/std_detect/README.md
+++ b/library/stdarch/crates/std_detect/README.md
@@ -9,7 +9,7 @@ supports certain features, like SIMD instructions.
 
 `std::detect` APIs are available as part of `libstd`. Prefer using it via the
 standard library than through this crate. Unstable features of `std::detect` are
-available on nightly Rust behind the `feature(stdsimd)` feature-gate.
+available on nightly Rust behind various feature-gates.
 
 If you need run-time feature detection in `#[no_std]` environments, Rust `core`
 library cannot help you. By design, Rust `core` is platform independent, but
diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index d1bc990dd2c..5321ef2252e 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -26,7 +26,7 @@ macro_rules! features {
     ) => {
         #[macro_export]
         $(#[$macro_attrs])*
-        #[allow_internal_unstable(stdarch_internal, stdsimd)]
+        #[allow_internal_unstable(stdarch_internal)]
         #[cfg($cfg)]
         #[doc(cfg($cfg))]
         macro_rules! $macro_name {
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 14359686f3c..1ff42000992 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -20,6 +20,8 @@
 #![cfg_attr(test, allow(unused_imports))]
 #![no_std]
 #![allow(internal_features)]
+// Temporary hack: needed to build against toolchains from before the mass feature renaming.
+#![feature(stdsimd)]
 
 #[cfg(test)]
 #[macro_use]
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index f93212d24f6..01a9040da84 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -1,4 +1,4 @@
-#![feature(stdsimd)]
+#![feature(stdarch_internal, stdarch_arm_feature_detection)]
 #![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)]
 #![cfg(any(
     target_arch = "arm",
diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
index cd597af73cb..3a5d062f82b 100644
--- a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
+++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
@@ -1,4 +1,4 @@
-#![feature(stdsimd)]
+#![feature(stdarch_arm_feature_detection)]
 #![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)]
 
 #[cfg(any(
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
index 38512c758d4..54bcab7b1ec 100644
--- a/library/stdarch/crates/std_detect/tests/x86-specific.rs
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -1,4 +1,3 @@
-#![feature(stdsimd)]
 #![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 
 extern crate cupid;
diff --git a/library/stdarch/examples/connect5.rs b/library/stdarch/examples/connect5.rs
index 805108c24ce..6298255421a 100644
--- a/library/stdarch/examples/connect5.rs
+++ b/library/stdarch/examples/connect5.rs
@@ -28,7 +28,7 @@
 //! You should see a game self-playing. In the end of the game, it shows the average time for
 //! each move.
 
-#![feature(stdsimd, avx512_target_feature)]
+#![feature(avx512_target_feature, stdarch_x86_avx512)]
 #![feature(stmt_expr_attributes)]
 
 use rand::seq::SliceRandom;
diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs
index a961793a045..490556e8bf7 100644
--- a/library/stdarch/examples/hex.rs
+++ b/library/stdarch/examples/hex.rs
@@ -12,7 +12,7 @@
 //!
 //! and you should see `746573740a` get printed out.
 
-#![feature(stdsimd, wasm_target_feature)]
+#![feature(wasm_target_feature)]
 #![cfg_attr(test, feature(test))]
 #![allow(
     clippy::unwrap_used,
diff --git a/library/stdarch/examples/wasm.rs b/library/stdarch/examples/wasm.rs
index 6b92ae9b87d..8a95ed54e14 100644
--- a/library/stdarch/examples/wasm.rs
+++ b/library/stdarch/examples/wasm.rs
@@ -1,6 +1,5 @@
 //! A simple slab allocator for pages in wasm
 
-#![feature(stdsimd)]
 #![cfg(target_arch = "wasm32")]
 
 use std::ptr;
-- 
cgit 1.4.1-3-g733a5


From ea27e5cfcb0bf156e11d45fd439ccbf539cd1d18 Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Fri, 27 Oct 2023 13:37:04 +0100
Subject: Fix various compilation errors

---
 library/stdarch/crates/core_arch/src/arm/mod.rs    |  2 ++
 library/stdarch/crates/core_arch/src/lib.rs        |  1 +
 library/stdarch/crates/intrinsic-test/src/main.rs  |  8 ++++++
 library/stdarch/crates/std_detect/src/lib.rs       |  2 ++
 .../crates/std_detect/tests/cpu-detection.rs       | 29 +++++++++++-----------
 .../std_detect/tests/macro_trailing_commas.rs      |  4 ++-
 library/stdarch/crates/stdarch-verify/tests/arm.rs |  3 ++-
 library/stdarch/examples/connect5.rs               |  4 ++-
 8 files changed, 36 insertions(+), 17 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/arm/mod.rs b/library/stdarch/crates/core_arch/src/arm/mod.rs
index d994508e817..217906c78da 100644
--- a/library/stdarch/crates/core_arch/src/arm/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm/mod.rs
@@ -11,6 +11,8 @@
 mod sat;
 
 #[cfg(any(target_feature = "v6", doc))]
+// Remove warning because this module is currently empty.
+#[allow(unused_imports)]
 pub use self::sat::*;
 
 // Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD)
diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index 155363b892d..ed301e208a3 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -74,6 +74,7 @@ mod core_arch;
 #[stable(feature = "stdsimd", since = "1.27.0")]
 pub mod arch {
     #[stable(feature = "stdsimd", since = "1.27.0")]
+    #[allow(unused_imports)]
     pub use crate::core_arch::arch::*;
     #[stable(feature = "stdsimd", since = "1.27.0")]
     pub use core::arch::asm;
diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs
index 162d3a72bd3..3c181584bef 100644
--- a/library/stdarch/crates/intrinsic-test/src/main.rs
+++ b/library/stdarch/crates/intrinsic-test/src/main.rs
@@ -166,6 +166,14 @@ fn generate_rust_program(notices: &str, intrinsic: &Intrinsic, a32: bool) -> Str
     format!(
         r#"{notices}#![feature(simd_ffi)]
 #![feature(link_llvm_intrinsics)]
+#![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))]
+#![feature(stdarch_arm_crc32)]
+#![cfg_attr(target_arch = "aarch64", feature(stdarch_neon_fcma))]
+#![cfg_attr(target_arch = "aarch64", feature(stdarch_neon_dotprod))]
+#![cfg_attr(target_arch = "aarch64", feature(stdarch_neon_i8mm))]
+#![cfg_attr(target_arch = "aarch64", feature(stdarch_neon_sha3))]
+#![cfg_attr(target_arch = "aarch64", feature(stdarch_neon_sm4))]
+#![cfg_attr(target_arch = "aarch64", feature(stdarch_neon_ftts))]
 #![allow(overflowing_literals)]
 #![allow(non_upper_case_globals)]
 use core_arch::arch::{target_arch}::*;
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 1ff42000992..5862ada21db 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -21,6 +21,8 @@
 #![no_std]
 #![allow(internal_features)]
 // Temporary hack: needed to build against toolchains from before the mass feature renaming.
+// Remove this as soon as the stdarch submodule is updated on nightly.
+#![allow(stable_features)]
 #![feature(stdsimd)]
 
 #[cfg(test)]
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index f20ea6a2f77..8d551fc2cb7 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -1,19 +1,20 @@
-#![feature(
-    stdarch_internal,
-    stdarch_arm_feature_detection,
-    stdarch_powerpc_feature_detection
-)]
+#![feature(stdarch_internal)]
+#![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))]
+#![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))]
+#![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))]
 #![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)]
-#![cfg(any(
-    target_arch = "arm",
-    target_arch = "aarch64",
-    target_arch = "x86",
-    target_arch = "x86_64",
-    target_arch = "powerpc",
-    target_arch = "powerpc64"
-))]
 
-#[macro_use]
+#[cfg_attr(
+    any(
+        target_arch = "arm",
+        target_arch = "aarch64",
+        target_arch = "x86",
+        target_arch = "x86_64",
+        target_arch = "powerpc",
+        target_arch = "powerpc64"
+    ),
+    macro_use
+)]
 extern crate std_detect;
 
 #[test]
diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
index 3d5c736a053..8304b225f53 100644
--- a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
+++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
@@ -1,4 +1,6 @@
-#![feature(stdarch_arm_feature_detection, stdarch_powerpc_feature_detection)]
+#![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))]
+#![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))]
+#![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))]
 #![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)]
 
 #[cfg(any(
diff --git a/library/stdarch/crates/stdarch-verify/tests/arm.rs b/library/stdarch/crates/stdarch-verify/tests/arm.rs
index 84131c0fbec..d6f8e69acb6 100644
--- a/library/stdarch/crates/stdarch-verify/tests/arm.rs
+++ b/library/stdarch/crates/stdarch-verify/tests/arm.rs
@@ -1,8 +1,9 @@
+#![allow(unused)]
+
 use std::collections::HashMap;
 
 use serde::Deserialize;
 
-#[allow(unused)]
 struct Function {
     name: &'static str,
     arguments: &'static [&'static Type],
diff --git a/library/stdarch/examples/connect5.rs b/library/stdarch/examples/connect5.rs
index 6298255421a..ffbff5e4869 100644
--- a/library/stdarch/examples/connect5.rs
+++ b/library/stdarch/examples/connect5.rs
@@ -28,7 +28,9 @@
 //! You should see a game self-playing. In the end of the game, it shows the average time for
 //! each move.
 
-#![feature(avx512_target_feature, stdarch_x86_avx512)]
+#![feature(avx512_target_feature)]
+#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512))]
+#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512))]
 #![feature(stmt_expr_attributes)]
 
 use rand::seq::SliceRandom;
-- 
cgit 1.4.1-3-g733a5


From 936583f12c53330e6f1e5f750f5ea55e91ec6889 Mon Sep 17 00:00:00 2001
From: Eduardo Sánchez Muñoz <eduardosm-dev@e64.io>
Date: Mon, 30 Oct 2023 13:16:34 +0100
Subject: Do not deny `clippy::missing_inline_in_public_items` in std_detect

---
 library/stdarch/crates/std_detect/src/lib.rs | 1 -
 1 file changed, 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 5862ada21db..ac9074eaea7 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -16,7 +16,6 @@
 #![feature(staged_api, doc_cfg, allow_internal_unstable)]
 #![deny(rust_2018_idioms)]
 #![allow(clippy::shadow_reuse)]
-#![deny(clippy::missing_inline_in_public_items)]
 #![cfg_attr(test, allow(unused_imports))]
 #![no_std]
 #![allow(internal_features)]
-- 
cgit 1.4.1-3-g733a5


From eaa729869492219f15f8219c3eb7577bfd0a9a42 Mon Sep 17 00:00:00 2001
From: Eduardo Sánchez Muñoz <eduardosm-dev@e64.io>
Date: Mon, 30 Oct 2023 13:19:11 +0100
Subject: Derive `Default` for `Initializer` in std_detect

---
 library/stdarch/crates/std_detect/src/detect/cache.rs | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index a94c655c3a9..64b2841bbf3 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -29,16 +29,11 @@ const fn unset_bit(x: u64, bit: u32) -> u64 {
 const CACHE_CAPACITY: u32 = 62;
 
 /// This type is used to initialize the cache
-#[derive(Copy, Clone)]
+// The derived `Default` implementation will initialize the field to zero,
+// which is what we want.
+#[derive(Copy, Clone, Default)]
 pub(crate) struct Initializer(u64);
 
-#[allow(clippy::use_self)]
-impl Default for Initializer {
-    fn default() -> Self {
-        Initializer(0)
-    }
-}
-
 // NOTE: the `debug_assert!` would catch that we do not add more Features than
 // the one fitting our cache.
 impl Initializer {
-- 
cgit 1.4.1-3-g733a5


From 5625c8bf49dfd5fb8d6a58143aa435085872f52f Mon Sep 17 00:00:00 2001
From: ZHAI Xiang <zhaixiang@loongson.cn>
Date: Wed, 25 Oct 2023 15:09:45 +0800
Subject: std_detect: Add support for LoongArch

Co-authored-by: WANG Rui <wangrui@loongson.cn>
---
 library/stdarch/crates/std_detect/README.md        |  2 +-
 .../crates/std_detect/src/detect/arch/loongarch.rs | 48 ++++++++++++++++++++++
 .../crates/std_detect/src/detect/arch/mod.rs       |  5 +++
 .../stdarch/crates/std_detect/src/detect/mod.rs    |  1 +
 .../std_detect/src/detect/os/linux/auxvec.rs       |  4 +-
 .../std_detect/src/detect/os/linux/loongarch.rs    | 34 +++++++++++++++
 .../crates/std_detect/src/detect/os/linux/mod.rs   |  3 ++
 library/stdarch/crates/std_detect/src/lib.rs       |  1 +
 8 files changed, 96 insertions(+), 2 deletions(-)
 create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md
index 3611daaf40f..04bc6a665e7 100644
--- a/library/stdarch/crates/std_detect/README.md
+++ b/library/stdarch/crates/std_detect/README.md
@@ -53,7 +53,7 @@ crate from working on applications in which `std` is not available.
   [`cupid`](https://crates.io/crates/cupid) crate.
 
 * Linux/Android:
-  * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `riscv{32,64}`: `std_detect`
+  * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `riscv{32,64}`, `loongarch64`: `std_detect`
     supports these on Linux by querying ELF auxiliary vectors (using `getauxval`
     when available), and if that fails, by querying `/proc/cpuinfo`.
   * `arm64`: partial support for doing run-time feature detection by directly
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
new file mode 100644
index 00000000000..049cb73224a
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
@@ -0,0 +1,48 @@
+//! Run-time feature detection on LoongArch.
+
+features! {
+    @TARGET: loongarch;
+    @CFG: target_arch = "loongarch64";
+    @MACRO_NAME: is_loongarch_feature_detected;
+    @MACRO_ATTRS:
+    /// Checks if `loongarch` feature is enabled.
+    /// Supported arguments are:
+    ///
+    /// * `"lam"`
+    /// * `"ual"`
+    /// * `"fpu"`
+    /// * `"lsx"`
+    /// * `"lasx"`
+    /// * `"crc32"`
+    /// * `"complex"`
+    /// * `"crypto"`
+    /// * `"lvz"`
+    /// * `"lbtx86"`
+    /// * `"lbtarm"`
+    /// * `"lbtmips"`
+    #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")]
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lam: "lam";
+    /// LAM
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] ual: "ual";
+    /// UAL
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] fpu: "fpu";
+    /// FPU
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lsx: "lsx";
+    /// LSX
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lasx: "lasx";
+    /// LASX
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] crc32: "crc32";
+    /// FPU
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] complex: "complex";
+    /// Complex
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] crypto: "crypto";
+    /// Crypto
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lvz: "lvz";
+    /// LVZ
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lbtx86: "lbtx86";
+    /// LBT.X86
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lbtarm: "lbtarm";
+    /// LBT.ARM
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lbtmips: "lbtmips";
+    /// LBT.MIPS
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
index af8222dc2b2..2123fec8bff 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
@@ -19,6 +19,8 @@ mod powerpc64;
 mod mips;
 #[macro_use]
 mod mips64;
+#[macro_use]
+mod loongarch;
 
 cfg_if! {
     if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
@@ -45,6 +47,9 @@ cfg_if! {
     } else if #[cfg(target_arch = "mips64")] {
         #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")]
         pub use mips64::*;
+    } else if #[cfg(target_arch = "loongarch64")] {
+        #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")]
+        pub use loongarch::*;
     } else {
         // Unimplemented architecture:
         #[doc(hidden)]
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 75a2f70db8e..5ce4e54e23b 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -96,6 +96,7 @@ pub fn features() -> impl Iterator<Item = (&'static str, bool)> {
             target_arch = "powerpc64",
             target_arch = "mips",
             target_arch = "mips64",
+            target_arch = "loongarch64",
         ))] {
             (0_u8..Feature::_last as u8).map(|discriminant: u8| {
                 #[allow(bindings_with_variant_name)] // RISC-V has Feature::f
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index ee46aa1ac73..a04ea7b147e 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -129,7 +129,8 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
             target_arch = "riscv32",
             target_arch = "riscv64",
             target_arch = "mips",
-            target_arch = "mips64"
+            target_arch = "mips64",
+            target_arch = "loongarch64",
         ))]
         {
             let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize };
@@ -225,6 +226,7 @@ fn auxv_from_buf(buf: &[usize]) -> Result<AuxVec, ()> {
         target_arch = "riscv64",
         target_arch = "mips",
         target_arch = "mips64",
+        target_arch = "loongarch64",
     ))]
     {
         for el in buf.chunks(2) {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
new file mode 100644
index 00000000000..a2c6d308e6a
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
@@ -0,0 +1,34 @@
+//! Run-time feature detection for LoongArch on Linux.
+
+use super::auxvec;
+use crate::detect::{bit, cache, Feature};
+
+/// Try to read the features from the auxiliary vector.
+pub(crate) fn detect_features() -> cache::Initializer {
+    let mut value = cache::Initializer::default();
+    let enable_feature = |value: &mut cache::Initializer, feature, enable| {
+        if enable {
+            value.set(feature as u32);
+        }
+    };
+
+    // The values are part of the platform-specific [asm/hwcap.h][hwcap]
+    //
+    // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/loongarch/include/uapi/asm/hwcap.h
+    if let Ok(auxv) = auxvec::auxv() {
+        enable_feature(&mut value, Feature::lam, bit::test(auxv.hwcap, 1));
+        enable_feature(&mut value, Feature::ual, bit::test(auxv.hwcap, 2));
+        enable_feature(&mut value, Feature::fpu, bit::test(auxv.hwcap, 3));
+        enable_feature(&mut value, Feature::lsx, bit::test(auxv.hwcap, 4));
+        enable_feature(&mut value, Feature::lasx, bit::test(auxv.hwcap, 5));
+        enable_feature(&mut value, Feature::crc32, bit::test(auxv.hwcap, 6));
+        enable_feature(&mut value, Feature::complex, bit::test(auxv.hwcap, 7));
+        enable_feature(&mut value, Feature::crypto, bit::test(auxv.hwcap, 8));
+        enable_feature(&mut value, Feature::lvz, bit::test(auxv.hwcap, 9));
+        enable_feature(&mut value, Feature::lbtx86, bit::test(auxv.hwcap, 10));
+        enable_feature(&mut value, Feature::lbtarm, bit::test(auxv.hwcap, 11));
+        enable_feature(&mut value, Feature::lbtmips, bit::test(auxv.hwcap, 12));
+        return value;
+    }
+    value
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
index a49a72783d9..30577385200 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
@@ -54,6 +54,9 @@ cfg_if::cfg_if! {
     } else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] {
         mod powerpc;
         pub(crate) use self::powerpc::detect_features;
+    } else if #[cfg(target_arch = "loongarch64")] {
+        mod loongarch;
+        pub(crate) use self::loongarch::detect_features;
     } else {
         use crate::detect::cache;
         /// Performs run-time feature detection.
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index ac9074eaea7..9bdd6473133 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -11,6 +11,7 @@
 //! * `mips64`: [`is_mips64_feature_detected`]
 //! * `powerpc`: [`is_powerpc_feature_detected`]
 //! * `powerpc64`: [`is_powerpc64_feature_detected`]
+//! * `loongarch`: [`is_loongarch_feature_detected`]
 
 #![stable(feature = "stdsimd", since = "1.27.0")]
 #![feature(staged_api, doc_cfg, allow_internal_unstable)]
-- 
cgit 1.4.1-3-g733a5


From 786f28dd8b05bb946697f6f5652d25d7008c3f62 Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <g.burghoorn@gmail.com>
Date: Wed, 4 Oct 2023 14:11:55 +0200
Subject: Stabilize Ratified RISC-V Target Features

As shortly discussed on Zulip
(https://rust-lang.zulipchat.com/#narrow/stream/250483-t-compiler.2Frisc-v/topic/Stabilization.20of.20RISC-V.20Target.20Features/near/394793704), this commit stabilizes the ratified RISC-V instruction bases and extensions.

Specifically, this commit stabilizes the:
* Atomic Instructions (A) on v2.0
* Compressed Instructions (C) on v2.0
* Integer Multiplication and Division (M) on v2.0
* Bit Manipulations (B) on v1.0 listed as `zba`, `zbc`, `zbs`
* Scalar Cryptography (Zk) v1.0.1 listed as `zk`, `zkn`, `zknd`, `zkne`, `zknh`, `zkr`, `zks`, `zksed`, `zksh`, `zkt`, `zbkb`, `zbkc` `zkbx`
---
 .../crates/std_detect/src/detect/arch/riscv.rs     | 115 +++++++++------------
 1 file changed, 51 insertions(+), 64 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 1d2d32a014f..ea6a5e65b49 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -24,19 +24,39 @@ features! {
     ///
     /// The supported ratified RISC-V instruction sets are as follows:
     ///
+    /// * RV32E: `"rv32e"`
     /// * RV32I: `"rv32i"`
-    /// * Zifencei: `"zifencei"`
-    /// * Zihintpause: `"zihintpause"`
     /// * RV64I: `"rv64i"`
-    /// * M: `"m"`
     /// * A: `"a"`
-    /// * Zicsr: `"zicsr"`
-    /// * Zicntr: `"zicntr"`
-    /// * Zihpm: `"zihpm"`
-    /// * F: `"f"`
+    /// * B: `"b"`
+    ///   * Zba: `"zba"`
+    ///   * Zbb: `"zbb"`
+    ///   * Zbc: `"zbc"`
+    ///   * Zbs: `"zbs"`
+    /// * C: `"c"`
     /// * D: `"d"`
+    /// * F: `"f"`
+    /// * M: `"m"`
     /// * Q: `"q"`
-    /// * C: `"c"`
+    /// * V: `"v"`
+    /// * Zicntr: `"zicntr"`
+    /// * Zicsr: `"zicsr"`
+    /// * Zifencei: `"zifencei"`
+    /// * Zihintpause: `"zihintpause"`
+    /// * Zihpm: `"zihpm"`
+    /// * Zk: `"zk"`
+    ///   * Zbkb: `"zbkb"`
+    ///   * Zbkc: `"zbkc"`
+    ///   * Zbkx: `"zbkx"`
+    ///   * Zkn: `"zkn"`
+    ///     * Zknd: `"zknd"`
+    ///     * Zkne: `"zkne"`
+    ///     * Zknh: `"zknh"`
+    ///   * Zkr: `"zkr"`
+    ///   * Zks: `"zks"`
+    ///     * Zksed: `"zksed"`
+    ///     * Zksh: `"zksh"`
+    ///   * Zkt: `"zkt"`
     ///
     /// There's also bases and extensions marked as standard instruction set,
     /// but they are in frozen or draft state. These instruction sets are also
@@ -44,6 +64,8 @@ features! {
     ///
     /// Frozen RISC-V instruction sets:
     ///
+    /// * Zfh: `"zfh"`
+    /// * Zfhmin: `"zfhmin"`
     /// * Zfinx: `"zfinx"`
     /// * Zdinx: `"zdinx"`
     /// * Zhinx: `"zhinx"`
@@ -52,14 +74,9 @@ features! {
     ///
     /// Draft RISC-V instruction sets:
     ///
-    /// * RV32E: `"rv32e"`
     /// * RV128I: `"rv128i"`
-    /// * Zfh: `"zfh"`
-    /// * Zfhmin: `"zfhmin"`
-    /// * B: `"b"`
     /// * J: `"j"`
     /// * P: `"p"`
-    /// * V: `"v"`
     /// * Zam: `"zam"`
     ///
     /// Defined by Privileged Specification:
@@ -70,36 +87,8 @@ features! {
     /// * Svinval: `"svinval"`
     /// * Hypervisor: `"h"`
     ///
-    /// # RISC-V Bit-Manipulation ISA-extensions
-    ///
-    /// This document defined the following extensions:
-    ///
-    /// * Zba: `"zba"`
-    /// * Zbb: `"zbb"`
-    /// * Zbc: `"zbc"`
-    /// * Zbs: `"zbs"`
-    ///
-    /// # RISC-V Cryptography Extensions
-    ///
-    /// These extensions are defined in Volume I, Scalar & Entropy Source
-    /// Instructions:
-    ///
-    /// * Zbkb: `"zbkb"`
-    /// * Zbkc: `"zbkc"`
-    /// * Zbkx: `"zbkx"`
-    /// * Zknd: `"zknd"`
-    /// * Zkne: `"zkne"`
-    /// * Zknh: `"zknh"`
-    /// * Zksed: `"zksed"`
-    /// * Zksh: `"zksh"`
-    /// * Zkr: `"zkr"`
-    /// * Zkn: `"zkn"`
-    /// * Zks: `"zks"`
-    /// * Zk: `"zk"`
-    /// * Zkt: `"zkt"`
-    ///
     /// [ISA manual]: https://github.com/riscv/riscv-isa-manual/
-    #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")]
+    #[stable(feature = "riscv_ratified", since = "1.76.0")]
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32i: "rv32i";
     /// RV32I Base Integer Instruction Set
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zifencei: "zifencei";
@@ -108,9 +97,9 @@ features! {
     /// "Zihintpause" Pause Hint
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv64i: "rv64i";
     /// RV64I Base Integer Instruction Set
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] m: "m";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] m: "m";
     /// "M" Standard Extension for Integer Multiplication and Division
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] a: "a";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] a: "a";
     /// "A" Standard Extension for Atomic Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicsr: "zicsr";
     /// "Zicsr", Control and Status Register (CSR) Instructions
@@ -124,7 +113,7 @@ features! {
     /// "D" Standard Extension for Double-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] q: "q";
     /// "Q" Standard Extension for Quad-Precision Floating-Point
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] c: "c";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] c: "c";
     /// "C" Standard Extension for Compressed Instructions
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfinx: "zfinx";
@@ -146,8 +135,6 @@ features! {
     /// "Zfh" Standard Extension for 16-Bit Half-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfhmin: "zfhmin";
     /// "Zfhmin" Standard Extension for Minimal Half-Precision Floating-Point Support
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] b: "b";
-    /// "B" Standard Extension for Bit Manipulation
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] j: "j";
     /// "J" Standard Extension for Dynamically Translated Languages
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] p: "p";
@@ -168,39 +155,39 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] h: "h";
     /// Hypervisor Extension
 
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zba: "zba";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zba: "zba";
     /// "Zba" Standard Extension for Address Generation Instructions
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zbb: "zbb";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zbb: "zbb";
     /// "Zbb" Standard Extension for Basic Bit-Manipulation
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zbc: "zbc";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zbc: "zbc";
     /// "Zbc" Standard Extension for Carry-less Multiplication
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zbs: "zbs";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zbs: "zbs";
     /// "Zbs" Standard Extension for Single-Bit instructions
 
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zbkb: "zbkb";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zbkb: "zbkb";
     /// "Zbkb" Standard Extension for Bitmanip instructions for Cryptography
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zbkc: "zbkc";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zbkc: "zbkc";
     /// "Zbkc" Standard Extension for Carry-less multiply instructions
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zbkx: "zbkx";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zbkx: "zbkx";
     /// "Zbkx" Standard Extension for Crossbar permutation instructions
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zknd: "zknd";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zknd: "zknd";
     /// "Zknd" Standard Extension for NIST Suite: AES Decryption
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zkne: "zkne";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zkne: "zkne";
     /// "Zkne" Standard Extension for NIST Suite: AES Encryption
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zknh: "zknh";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zknh: "zknh";
     /// "Zknh" Standard Extension for NIST Suite: Hash Function Instructions
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zksed: "zksed";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zksed: "zksed";
     /// "Zksed" Standard Extension for ShangMi Suite: SM4 Block Cipher Instructions
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zksh: "zksh";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zksh: "zksh";
     /// "Zksh" Standard Extension for ShangMi Suite: SM3 Hash Function Instructions
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zkr: "zkr";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zkr: "zkr";
     /// "Zkr" Standard Extension for Entropy Source Extension
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zkn: "zkn";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zkn: "zkn";
     /// "Zkn" Standard Extension for NIST Algorithm Suite
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zks: "zks";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zks: "zks";
     /// "Zks" Standard Extension for ShangMi Algorithm Suite
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zk: "zk";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zk: "zk";
     /// "Zk" Standard Extension for Standard scalar cryptography extension
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zkt: "zkt";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zkt: "zkt";
     /// "Zkt" Standard Extension for Data Independent Execution Latency
 }
-- 
cgit 1.4.1-3-g733a5


From a600c72f50a91d33e4f0708b0c414948fdf83f65 Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Sun, 29 Oct 2023 23:44:54 +0000
Subject: Fixes for use in the standard library

---
 library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs | 9 +++------
 library/stdarch/crates/std_detect/src/lib.rs                | 1 +
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
index 5b43549d342..2d12f5e99b9 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -8707,8 +8707,7 @@ pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
 
 /// 8-bit integer matrix multiply-accumulate
 #[inline]
-#[cfg_attr(not(bootstrap), target_feature(enable = "i8mm"))]
-#[target_feature(enable = "neon")]
+#[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smmla))]
@@ -8735,8 +8734,7 @@ pub unsafe fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t
 
 /// 8-bit integer matrix multiply-accumulate
 #[inline]
-#[cfg_attr(not(bootstrap), target_feature(enable = "i8mm"))]
-#[target_feature(enable = "neon")]
+#[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ummla))]
@@ -8763,8 +8761,7 @@ pub unsafe fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x
 
 /// Unsigned and signed 8-bit integer matrix multiply-accumulate
 #[inline]
-#[cfg_attr(not(bootstrap), target_feature(enable = "i8mm"))]
-#[target_feature(enable = "neon")]
+#[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usmmla))]
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 9bdd6473133..f13dca7cc68 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -24,6 +24,7 @@
 // Remove this as soon as the stdarch submodule is updated on nightly.
 #![allow(stable_features)]
 #![feature(stdsimd)]
+#![cfg_attr(feature = "rustc-dep-of-std", feature(stdarch_x86_has_cpuid))]
 
 #[cfg(test)]
 #[macro_use]
-- 
cgit 1.4.1-3-g733a5


From fd973c2765a0de864e3b83bf6cd2af6dc3db025d Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Thu, 4 Jan 2024 11:25:38 +0000
Subject: Fix std build failure on non-x86 architectures

This is more fallout from #1486
---
 library/stdarch/crates/std_detect/src/lib.rs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index f13dca7cc68..40c1aa4667d 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -24,7 +24,13 @@
 // Remove this as soon as the stdarch submodule is updated on nightly.
 #![allow(stable_features)]
 #![feature(stdsimd)]
-#![cfg_attr(feature = "rustc-dep-of-std", feature(stdarch_x86_has_cpuid))]
+#![cfg_attr(
+    all(
+        any(target_arch = "x86", target_arch = "x86_64"),
+        feature = "rustc-dep-of-std"
+    ),
+    feature(stdarch_x86_has_cpuid)
+)]
 
 #[cfg(test)]
 #[macro_use]
-- 
cgit 1.4.1-3-g733a5


From 76d52cdd6831a092739234cfc0d133ed072a02a4 Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Thu, 4 Jan 2024 14:35:45 +0000
Subject: Fix std_detect not being an unstable crate

More fallout from #1486
---
 library/stdarch/crates/core_arch/src/lib.rs                  |  2 +-
 library/stdarch/crates/std_detect/src/lib.rs                 |  6 +++---
 .../stdarch/crates/std_detect/tests/macro_trailing_commas.rs | 12 ++++++++++++
 library/stdarch/crates/std_detect/tests/x86-specific.rs      |  2 ++
 library/stdarch/examples/connect5.rs                         |  5 +++--
 library/stdarch/examples/hex.rs                              |  5 +++++
 6 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index bd4de67445a..5dcd11fb68c 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -35,7 +35,7 @@
     inline_const,
     generic_arg_infer
 )]
-#![cfg_attr(test, feature(test, abi_vectorcall))]
+#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
 #![deny(clippy::missing_inline_in_public_items)]
 #![allow(
     clippy::identity_op,
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 40c1aa4667d..19cc021712e 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -13,7 +13,7 @@
 //! * `powerpc64`: [`is_powerpc64_feature_detected`]
 //! * `loongarch`: [`is_loongarch_feature_detected`]
 
-#![stable(feature = "stdsimd", since = "1.27.0")]
+#![unstable(feature = "stdarch_internal", issue = "none")]
 #![feature(staged_api, doc_cfg, allow_internal_unstable)]
 #![deny(rust_2018_idioms)]
 #![allow(clippy::shadow_reuse)]
@@ -23,7 +23,7 @@
 // Temporary hack: needed to build against toolchains from before the mass feature renaming.
 // Remove this as soon as the stdarch submodule is updated on nightly.
 #![allow(stable_features)]
-#![feature(stdsimd)]
+#![cfg_attr(not(feature = "rustc-dep-of-std"), feature(stdsimd))]
 #![cfg_attr(
     all(
         any(target_arch = "x86", target_arch = "x86_64"),
@@ -42,5 +42,5 @@ extern crate std;
 extern crate alloc;
 
 #[doc(hidden)]
-#[stable(feature = "stdsimd", since = "1.27.0")]
+#[unstable(feature = "stdarch_internal", issue = "none")]
 pub mod detect;
diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
index 8304b225f53..d37629ec0a4 100644
--- a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
+++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
@@ -1,3 +1,15 @@
+#![allow(internal_features)]
+#![cfg_attr(
+    any(
+        target_arch = "arm",
+        target_arch = "aarch64",
+        target_arch = "x86",
+        target_arch = "x86_64",
+        target_arch = "powerpc",
+        target_arch = "powerpc64"
+    ),
+    feature(stdarch_internal)
+)]
 #![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))]
 #![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))]
 #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))]
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
index 54bcab7b1ec..1d8d8ba2ecc 100644
--- a/library/stdarch/crates/std_detect/tests/x86-specific.rs
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -1,4 +1,6 @@
 #![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#![allow(internal_features)]
+#![feature(stdarch_internal)]
 
 extern crate cupid;
 #[macro_use]
diff --git a/library/stdarch/examples/connect5.rs b/library/stdarch/examples/connect5.rs
index 53e9b8124de..a569689fad3 100644
--- a/library/stdarch/examples/connect5.rs
+++ b/library/stdarch/examples/connect5.rs
@@ -28,9 +28,10 @@
 //! You should see a game self-playing. In the end of the game, it shows the average time for
 //! each move.
 
+#![allow(internal_features)]
 #![feature(avx512_target_feature)]
-#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512))]
-#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512))]
+#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512, stdarch_internal))]
+#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512, stdarch_internal))]
 #![feature(stmt_expr_attributes)]
 
 use rand::seq::SliceRandom;
diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs
index 490556e8bf7..b73a306f5fe 100644
--- a/library/stdarch/examples/hex.rs
+++ b/library/stdarch/examples/hex.rs
@@ -12,8 +12,13 @@
 //!
 //! and you should see `746573740a` get printed out.
 
+#![allow(internal_features)]
 #![feature(wasm_target_feature)]
 #![cfg_attr(test, feature(test))]
+#![cfg_attr(
+    any(target_arch = "x86", target_arch = "x86_64"),
+    feature(stdarch_internal)
+)]
 #![allow(
     clippy::unwrap_used,
     clippy::print_stdout,
-- 
cgit 1.4.1-3-g733a5


From d51ea0d261392bc526004c09894155038bad131d Mon Sep 17 00:00:00 2001
From: Makoto Kato <m_kato@ga2.so-net.ne.jp>
Date: Wed, 11 Jan 2023 15:58:49 +0900
Subject: Add CPU detection for macOS/aarch64.

---
 .../stdarch/crates/std_detect/src/detect/mod.rs    |  3 +
 .../std_detect/src/detect/os/macos/aarch64.rs      | 98 ++++++++++++++++++++++
 .../crates/std_detect/tests/cpu-detection.rs       | 32 +++++++
 3 files changed, 133 insertions(+)
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/macos/aarch64.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index 5ce4e54e23b..c938abf17da 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -66,6 +66,9 @@ cfg_if! {
     } else if #[cfg(all(target_os = "windows", target_arch = "aarch64"))] {
         #[path = "os/windows/aarch64.rs"]
         mod os;
+    } else if #[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "libc"))] {
+        #[path = "os/macos/aarch64.rs"]
+        mod os;
     } else {
         #[path = "os/other.rs"]
         mod os;
diff --git a/library/stdarch/crates/std_detect/src/detect/os/macos/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/macos/aarch64.rs
new file mode 100644
index 00000000000..d7ebd956d69
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/macos/aarch64.rs
@@ -0,0 +1,98 @@
+//! Run-time feature detection for aarch64 on macOS.
+
+use crate::detect::{cache, Feature};
+
+#[inline]
+fn _sysctlbyname(name: &str) -> bool {
+    use libc;
+
+    let mut enabled: i32 = 0;
+    let mut enabled_len: usize = 4;
+    let enabled_ptr = &mut enabled as *mut i32 as *mut libc::c_void;
+
+    let ret = unsafe {
+        libc::sysctlbyname(
+            name.as_ptr() as *const i8,
+            enabled_ptr,
+            &mut enabled_len,
+            core::ptr::null_mut(),
+            0,
+        )
+    };
+
+    match ret {
+        0 => enabled != 0,
+        _ => false,
+    }
+}
+
+/// Try to read the features using sysctlbyname.
+pub(crate) fn detect_features() -> cache::Initializer {
+    let mut value = cache::Initializer::default();
+
+    let mut enable_feature = |f, enable| {
+        if enable {
+            value.set(f as u32);
+        }
+    };
+
+    let asimd = _sysctlbyname("hw.optional.AdvSIMD\0");
+    let pmull = _sysctlbyname("hw.optional.arm.FEAT_PMULL\0");
+    let fp = _sysctlbyname("hw.optional.floatingpoint\0");
+    let fp16 = _sysctlbyname("hw.optional.arm.FEAT_FP16\0");
+    let crc = _sysctlbyname("hw.optional.armv8_crc32\0");
+    let lse = _sysctlbyname("hw.optional.arm.FEAT_LSE\0");
+    let lse2 = _sysctlbyname("hw.optional.arm.FEAT_LSE2\0");
+    let rdm = _sysctlbyname("hw.optional.arm.FEAT_RDM\0");
+    let rcpc = _sysctlbyname("hw.optional.arm.FEAT_LRCPC\0");
+    let rcpc2 = _sysctlbyname("hw.optional.arm.FEAT_LRCPC2\0");
+    let dotprod = _sysctlbyname("hw.optional.arm.FEAT_DotProd\0");
+    let fhm = _sysctlbyname("hw.optional.arm.FEAT_FHM\0");
+    let flagm = _sysctlbyname("hw.optional.arm.FEAT_FlagM\0");
+    let ssbs = _sysctlbyname("hw.optional.arm.FEAT_SSBS\0");
+    let sb = _sysctlbyname("hw.optional.arm.FEAT_SB\0");
+    let paca = _sysctlbyname("hw.optional.arm.FEAT_PAuth\0");
+    let dpb = _sysctlbyname("hw.optional.arm.FEAT_DPB\0");
+    let dpb2 = _sysctlbyname("hw.optional.arm.FEAT_DPB2\0");
+    let frintts = _sysctlbyname("hw.optional.arm.FEAT_FRINTTS\0");
+    let i8mm = _sysctlbyname("hw.optional.arm.FEAT_I8MM\0");
+    let bf16 = _sysctlbyname("hw.optional.arm.FEAT_BF16\0");
+    let bti = _sysctlbyname("hw.optional.arm.FEAT_BTI\0");
+    let fcma = _sysctlbyname("hw.optional.arm.FEAT_FCMA\0");
+    let aes = _sysctlbyname("hw.optional.arm.FEAT_AES\0");
+    let sha1 = _sysctlbyname("hw.optional.arm.FEAT_SHA1\0");
+    let sha2 = _sysctlbyname("hw.optional.arm.FEAT_SHA256\0");
+    let sha3 = _sysctlbyname("hw.optional.arm.FEAT_SHA3\0");
+    let sha512 = _sysctlbyname("hw.optional.arm.FEAT_SHA512\0");
+    let jsconv = _sysctlbyname("hw.optional.arm.FEAT_JSCVT\0");
+
+    enable_feature(Feature::asimd, asimd);
+    enable_feature(Feature::pmull, pmull);
+    enable_feature(Feature::fp, fp);
+    enable_feature(Feature::fp16, fp16);
+    enable_feature(Feature::crc, crc);
+    enable_feature(Feature::lse, lse);
+    enable_feature(Feature::lse2, lse2);
+    enable_feature(Feature::rdm, rdm);
+    enable_feature(Feature::rcpc, rcpc);
+    enable_feature(Feature::rcpc2, rcpc2);
+    enable_feature(Feature::dotprod, dotprod);
+    enable_feature(Feature::fhm, fhm);
+    enable_feature(Feature::flagm, flagm);
+    enable_feature(Feature::ssbs, ssbs);
+    enable_feature(Feature::sb, sb);
+    enable_feature(Feature::paca, paca);
+    enable_feature(Feature::dpb, dpb);
+    enable_feature(Feature::dpb2, dpb2);
+    enable_feature(Feature::frintts, frintts);
+    enable_feature(Feature::i8mm, i8mm);
+    enable_feature(Feature::bf16, bf16);
+    enable_feature(Feature::bti, bti);
+    enable_feature(Feature::fcma, fcma);
+    enable_feature(Feature::aes, aes);
+    enable_feature(Feature::jsconv, jsconv);
+    enable_feature(Feature::sha2, sha1 && sha2 && asimd);
+    enable_feature(Feature::sha3, sha512 && sha3 && asimd);
+
+    value
+}
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index cb57b849d69..1ad897a2e21 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -139,6 +139,38 @@ fn aarch64_bsd() {
     println!("sha2: {:?}", is_aarch64_feature_detected!("sha2"));
 }
 
+#[test]
+#[cfg(all(target_arch = "aarch64", target_os = "macos"))]
+fn aarch64_macos() {
+    println!("asimd: {:?}", is_aarch64_feature_detected!("asimd"));
+    println!("fp: {:?}", is_aarch64_feature_detected!("fp"));
+    println!("fp16: {:?}", is_aarch64_feature_detected!("fp16"));
+    println!("pmull: {:?}", is_aarch64_feature_detected!("pmull"));
+    println!("crc: {:?}", is_aarch64_feature_detected!("crc"));
+    println!("lse: {:?}", is_aarch64_feature_detected!("lse"));
+    println!("lse2: {:?}", is_aarch64_feature_detected!("lse2"));
+    println!("rdm: {:?}", is_aarch64_feature_detected!("rdm"));
+    println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc"));
+    println!("rcpc2: {:?}", is_aarch64_feature_detected!("rcpc2"));
+    println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod"));
+    println!("fhm: {:?}", is_aarch64_feature_detected!("fhm"));
+    println!("flagm: {:?}", is_aarch64_feature_detected!("flagm"));
+    println!("ssbs: {:?}", is_aarch64_feature_detected!("ssbs"));
+    println!("sb: {:?}", is_aarch64_feature_detected!("sb"));
+    println!("paca: {:?}", is_aarch64_feature_detected!("paca"));
+    println!("dpb: {:?}", is_aarch64_feature_detected!("dpb"));
+    println!("dpb2: {:?}", is_aarch64_feature_detected!("dpb2"));
+    println!("frintts: {:?}", is_aarch64_feature_detected!("frintts"));
+    println!("i8mm: {:?}", is_aarch64_feature_detected!("i8mm"));
+    println!("bf16: {:?}", is_aarch64_feature_detected!("bf16"));
+    println!("bti: {:?}", is_aarch64_feature_detected!("bti"));
+    println!("fcma: {:?}", is_aarch64_feature_detected!("fcma"));
+    println!("jsconv: {:?}", is_aarch64_feature_detected!("jsconv"));
+    println!("aes: {:?}", is_aarch64_feature_detected!("aes"));
+    println!("sha2: {:?}", is_aarch64_feature_detected!("sha2"));
+    println!("sha3: {:?}", is_aarch64_feature_detected!("sha3"));
+}
+
 #[test]
 #[cfg(all(target_arch = "powerpc", target_os = "linux"))]
 fn powerpc_linux() {
-- 
cgit 1.4.1-3-g733a5


From 1e8274d34d998bdf678095973a295462650daad4 Mon Sep 17 00:00:00 2001
From: daxpedda <daxpedda@gmail.com>
Date: Tue, 6 Feb 2024 11:14:25 +0100
Subject: Remove last mention of `stdsimd`

---
 library/stdarch/crates/std_detect/src/lib.rs | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 19cc021712e..846b17b01d5 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -20,15 +20,8 @@
 #![cfg_attr(test, allow(unused_imports))]
 #![no_std]
 #![allow(internal_features)]
-// Temporary hack: needed to build against toolchains from before the mass feature renaming.
-// Remove this as soon as the stdarch submodule is updated on nightly.
-#![allow(stable_features)]
-#![cfg_attr(not(feature = "rustc-dep-of-std"), feature(stdsimd))]
 #![cfg_attr(
-    all(
-        any(target_arch = "x86", target_arch = "x86_64"),
-        feature = "rustc-dep-of-std"
-    ),
+    any(target_arch = "x86", target_arch = "x86_64"),
     feature(stdarch_x86_has_cpuid)
 )]
 
-- 
cgit 1.4.1-3-g733a5


From e54a6d71bffe2c10f01d76cea593f788f052d0fd Mon Sep 17 00:00:00 2001
From: Adam Gemmell <adam.gemmell@arm.com>
Date: Thu, 25 Jan 2024 13:29:14 +0000
Subject: Improve feature detect for combined aarch64 features

LLVM's `ssbs` and `mte` target_features represent two Arm features.
Linux's HWCAP also represents the same two features, so this is just a
documentation update.

LLVM's `ras` target_feature represents two Arm features - FEAT_RAS and FEAT_RASv1p1. There is no runtime detection for this, so this is a no-op in stdarch.

LLVM's `aes` feature covers both `FEAT_AES` and `FEAT_PMULL`, but Linux
exposes seperate feature bits. This patch makes the `aes` target_feature
correctly shortcut runtime `pmull` detection and also makes the `aes`
feature check for `pmull` at runtime to bring it in line with the
target_feature behaviour. In practice I think this makes the two runtime
features identical since the ID_AA64ISAR0_EL1 register does not allow
for PMULL without AES.
---
 .../stdarch/crates/std_detect/src/detect/arch/aarch64.rs  | 15 ++++++++-------
 .../stdarch/crates/std_detect/src/detect/os/aarch64.rs    |  2 +-
 .../crates/std_detect/src/detect/os/linux/aarch64.rs      |  3 ++-
 .../crates/std_detect/src/detect/os/macos/aarch64.rs      |  2 +-
 4 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 5f46c76966a..f697ae857e8 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -30,7 +30,7 @@ features! {
     /// * `"fhm"` - FEAT_FHM
     /// * `"dit"` - FEAT_DIT
     /// * `"flagm"` - FEAT_FLAGM
-    /// * `"ssbs"` - FEAT_SSBS
+    /// * `"ssbs"` - FEAT_SSBS & FEAT_SSBS2
     /// * `"sb"` - FEAT_SB
     /// * `"paca"` - FEAT_PAuth (address authentication)
     /// * `"pacg"` - FEAT_Pauth (generic authentication)
@@ -48,10 +48,10 @@ features! {
     /// * `"bf16"` - FEAT_BF16
     /// * `"rand"` - FEAT_RNG
     /// * `"bti"` - FEAT_BTI
-    /// * `"mte"` - FEAT_MTE
+    /// * `"mte"` - FEAT_MTE & FEAT_MTE2
     /// * `"jsconv"` - FEAT_JSCVT
     /// * `"fcma"` - FEAT_FCMA
-    /// * `"aes"` - FEAT_AES
+    /// * `"aes"` - FEAT_AES & FEAT_PMULL
     /// * `"sha2"` - FEAT_SHA1 & FEAT_SHA256
     /// * `"sha3"` - FEAT_SHA512 & FEAT_SHA3
     /// * `"sm4"` - FEAT_SM3 & FEAT_SM4
@@ -70,7 +70,8 @@ features! {
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] asimd: "neon";
     /// FEAT_AdvSIMD (Advanced SIMD/NEON)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pmull: "pmull";
-    /// FEAT_PMULL (Polynomial Multiply)
+    implied by target_features: ["aes"];
+    /// FEAT_PMULL (Polynomial Multiply) - Implied by `aes` target_feature
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp: "fp";
     implied by target_features: ["neon"];
     /// FEAT_FP (Floating point support) - Implied by `neon` target_feature
@@ -101,7 +102,7 @@ features! {
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] flagm: "flagm";
     /// FEAT_FLAGM (flag manipulation instructions)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs";
-    /// FEAT_SSBS (speculative store bypass safe)
+    /// FEAT_SSBS & FEAT_SSBS2 (speculative store bypass safe)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb";
     /// FEAT_SB (speculation barrier)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] paca: "paca";
@@ -137,13 +138,13 @@ features! {
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bti: "bti";
     /// FEAT_BTI (Branch Target Identification)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] mte: "mte";
-    /// FEAT_MTE (Memory Tagging Extension)
+    /// FEAT_MTE & FEAT_MTE2 (Memory Tagging Extension)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] jsconv: "jsconv";
     /// FEAT_JSCVT (JavaScript float conversion instructions)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fcma: "fcma";
     /// FEAT_FCMA (float complex number operations)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] aes: "aes";
-    /// FEAT_AES (AES instructions)
+    /// FEAT_AES (AES SIMD instructions) & FEAT_PMULL (PMULL{2}, 64-bit operand variants)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha2: "sha2";
     /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha3: "sha3";
diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
index 5790f016834..acc616c0e84 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -99,7 +99,7 @@ pub(crate) fn parse_system_registers(
         // supported, it also requires half-float support:
         enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp));
         // SIMD extensions require SIMD support:
-        enable_feature(Feature::aes, asimd && bits_shift(aa64isar0, 7, 4) >= 1);
+        enable_feature(Feature::aes, asimd && bits_shift(aa64isar0, 7, 4) >= 2);
         let sha1 = bits_shift(aa64isar0, 11, 8) >= 1;
         let sha2 = bits_shift(aa64isar0, 15, 12) >= 1;
         enable_feature(Feature::sha2, asimd && sha1 && sha2);
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index caaa39f1463..01ce47806cc 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -252,7 +252,8 @@ impl AtHwcap {
                 let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp);
                 enable_feature(Feature::asimd, asimd);
                 // Cryptographic extensions require ASIMD
-                enable_feature(Feature::aes, self.aes && asimd);
+                // AES also covers FEAT_PMULL
+                enable_feature(Feature::aes, self.aes && self.pmull && asimd);
                 enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd);
                 return value;
             }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/macos/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/macos/aarch64.rs
index d7ebd956d69..a29968b5c3d 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/macos/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/macos/aarch64.rs
@@ -89,7 +89,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
     enable_feature(Feature::bf16, bf16);
     enable_feature(Feature::bti, bti);
     enable_feature(Feature::fcma, fcma);
-    enable_feature(Feature::aes, aes);
+    enable_feature(Feature::aes, aes && pmull);
     enable_feature(Feature::jsconv, jsconv);
     enable_feature(Feature::sha2, sha1 && sha2 && asimd);
     enable_feature(Feature::sha3, sha512 && sha3 && asimd);
-- 
cgit 1.4.1-3-g733a5


From 8531e289b00c847d746b05bc379b337b7f04aaee Mon Sep 17 00:00:00 2001
From: usamoi <usamoi@outlook.com>
Date: Tue, 12 Dec 2023 17:04:15 +0800
Subject: feat: std_detect avx512fp16

Signed-off-by: usamoi <usamoi@outlook.com>
---
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs |  3 +++
 library/stdarch/crates/std_detect/src/detect/os/x86.rs   | 14 ++++++++------
 library/stdarch/crates/std_detect/tests/cpu-detection.rs |  1 +
 library/stdarch/crates/std_detect/tests/x86-specific.rs  |  1 +
 4 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 828ac5c38a2..f4f45750ed6 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -75,6 +75,7 @@ features! {
     /// * `"avx512bitalg"`
     /// * `"avx512bf16"`
     /// * `"avx512vp2intersect"`
+    /// * `"avx512fp16"`
     /// * `"f16c"`
     /// * `"fma"`
     /// * `"bmi1"`
@@ -169,6 +170,8 @@ features! {
     /// AVX-512 BF16 (BFLOAT16 instructions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vp2intersect: "avx512vp2intersect";
     /// AVX-512 P2INTERSECT
+    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512fp16: "avx512fp16";
+    /// AVX-512 FP16 (FLOAT16 instructions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c";
     /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma";
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index d8dd84db497..4ff9ac5f13b 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -69,12 +69,13 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
     // EAX = 7, ECX = 0: Queries "Extended Features";
     // Contains information about bmi,bmi2, and avx2 support.
-    let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7 {
-        let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
-        (ebx, ecx)
-    } else {
-        (0, 0) // CPUID does not support "Extended Features"
-    };
+    let (extended_features_ebx, extended_features_ecx, extended_features_edx) =
+        if max_basic_leaf >= 7 {
+            let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
+            (ebx, ecx, edx)
+        } else {
+            (0, 0, 0) // CPUID does not support "Extended Features"
+        };
 
     // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
     // - EAX returns the max leaf value for extended information, that is,
@@ -217,6 +218,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
                         enable(extended_features_ecx, 11, Feature::avx512vnni);
                         enable(extended_features_ecx, 12, Feature::avx512bitalg);
                         enable(extended_features_ecx, 14, Feature::avx512vpopcntdq);
+                        enable(extended_features_edx, 23, Feature::avx512fp16);
                     }
                 }
             }
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 1ad897a2e21..1053de3a826 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -233,6 +233,7 @@ fn x86_all() {
         "avx512vp2intersect {:?}",
         is_x86_feature_detected!("avx512vp2intersect")
     );
+    println!("avx512fp16 {:?}", is_x86_feature_detected!("avx512fp16"));
     println!("f16c: {:?}", is_x86_feature_detected!("f16c"));
     println!("fma: {:?}", is_x86_feature_detected!("fma"));
     println!("bmi1: {:?}", is_x86_feature_detected!("bmi1"));
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
index 1d8d8ba2ecc..ae7f677ed4d 100644
--- a/library/stdarch/crates/std_detect/tests/x86-specific.rs
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -51,6 +51,7 @@ fn dump() {
         "avx512vp2intersect {:?}",
         is_x86_feature_detected!("avx512vp2intersect")
     );
+    println!("avx512fp16 {:?}", is_x86_feature_detected!("avx512fp16"));
     println!("fma: {:?}", is_x86_feature_detected!("fma"));
     println!("abm: {:?}", is_x86_feature_detected!("abm"));
     println!("bmi: {:?}", is_x86_feature_detected!("bmi1"));
-- 
cgit 1.4.1-3-g733a5


From 210913f1c4d46e33f5a18a7105b15371ca24ece0 Mon Sep 17 00:00:00 2001
From: Mingzhuo Yin <yinmingzhuo@gmail.com>
Date: Sun, 3 Mar 2024 00:53:35 +0800
Subject: fix: invalid bit for detecting avx512 feature

Signed-off-by: Mingzhuo Yin <yinmingzhuo@gmail.com>
---
 .../stdarch/crates/std_detect/src/detect/os/x86.rs | 24 ++++++++++++++--------
 1 file changed, 15 insertions(+), 9 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 4ff9ac5f13b..971e56c1403 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -69,13 +69,19 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
     // EAX = 7, ECX = 0: Queries "Extended Features";
     // Contains information about bmi,bmi2, and avx2 support.
-    let (extended_features_ebx, extended_features_ecx, extended_features_edx) =
-        if max_basic_leaf >= 7 {
-            let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
-            (ebx, ecx, edx)
-        } else {
-            (0, 0, 0) // CPUID does not support "Extended Features"
-        };
+    let (
+        extended_features_eax,
+        extended_features_ebx,
+        extended_features_ecx,
+        extended_features_edx,
+    ) = if max_basic_leaf >= 7 {
+        let CpuidResult {
+            eax, ebx, ecx, edx, ..
+        } = unsafe { __cpuid(0x0000_0007_u32) };
+        (eax, ebx, ecx, edx)
+    } else {
+        (0, 0, 0, 0) // CPUID does not support "Extended Features"
+    };
 
     // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
     // - EAX returns the max leaf value for extended information, that is,
@@ -200,6 +206,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
                     // For AVX-512 the OS also needs to support saving/restoring
                     // the extended state, only then we enable AVX-512 support:
                     if os_avx512_support {
+                        enable(extended_features_eax, 5, Feature::avx512bf16);
                         enable(extended_features_ebx, 16, Feature::avx512f);
                         enable(extended_features_ebx, 17, Feature::avx512dq);
                         enable(extended_features_ebx, 21, Feature::avx512ifma);
@@ -209,15 +216,14 @@ pub(crate) fn detect_features() -> cache::Initializer {
                         enable(extended_features_ebx, 30, Feature::avx512bw);
                         enable(extended_features_ebx, 31, Feature::avx512vl);
                         enable(extended_features_ecx, 1, Feature::avx512vbmi);
-                        enable(extended_features_ecx, 5, Feature::avx512bf16);
                         enable(extended_features_ecx, 6, Feature::avx512vbmi2);
                         enable(extended_features_ecx, 8, Feature::gfni);
-                        enable(extended_features_ecx, 8, Feature::avx512vp2intersect);
                         enable(extended_features_ecx, 9, Feature::vaes);
                         enable(extended_features_ecx, 10, Feature::vpclmulqdq);
                         enable(extended_features_ecx, 11, Feature::avx512vnni);
                         enable(extended_features_ecx, 12, Feature::avx512bitalg);
                         enable(extended_features_ecx, 14, Feature::avx512vpopcntdq);
+                        enable(extended_features_edx, 8, Feature::avx512vp2intersect);
                         enable(extended_features_edx, 23, Feature::avx512fp16);
                     }
                 }
-- 
cgit 1.4.1-3-g733a5


From 012e4ce580a9024040bab39dcdaef396ac3fa681 Mon Sep 17 00:00:00 2001
From: Mingzhuo Yin <yinmingzhuo@gmail.com>
Date: Sun, 3 Mar 2024 09:54:10 +0800
Subject: check sub-leaf to detect avx512bf16

Signed-off-by: Mingzhuo Yin <yinmingzhuo@gmail.com>
---
 library/stdarch/crates/std_detect/src/detect/os/x86.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 971e56c1403..3e55baa7d8c 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -67,18 +67,18 @@ pub(crate) fn detect_features() -> cache::Initializer {
         ..
     } = unsafe { __cpuid(0x0000_0001_u32) };
 
-    // EAX = 7, ECX = 0: Queries "Extended Features";
+    // EAX = 7: Queries "Extended Features";
     // Contains information about bmi,bmi2, and avx2 support.
     let (
-        extended_features_eax,
         extended_features_ebx,
         extended_features_ecx,
         extended_features_edx,
+        extended_features_eax_leaf_1,
     ) = if max_basic_leaf >= 7 {
-        let CpuidResult {
-            eax, ebx, ecx, edx, ..
-        } = unsafe { __cpuid(0x0000_0007_u32) };
-        (eax, ebx, ecx, edx)
+        let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
+        let CpuidResult { eax: eax_1, .. } =
+            unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) };
+        (ebx, ecx, edx, eax_1)
     } else {
         (0, 0, 0, 0) // CPUID does not support "Extended Features"
     };
@@ -206,7 +206,6 @@ pub(crate) fn detect_features() -> cache::Initializer {
                     // For AVX-512 the OS also needs to support saving/restoring
                     // the extended state, only then we enable AVX-512 support:
                     if os_avx512_support {
-                        enable(extended_features_eax, 5, Feature::avx512bf16);
                         enable(extended_features_ebx, 16, Feature::avx512f);
                         enable(extended_features_ebx, 17, Feature::avx512dq);
                         enable(extended_features_ebx, 21, Feature::avx512ifma);
@@ -225,6 +224,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
                         enable(extended_features_ecx, 14, Feature::avx512vpopcntdq);
                         enable(extended_features_edx, 8, Feature::avx512vp2intersect);
                         enable(extended_features_edx, 23, Feature::avx512fp16);
+                        enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16);
                     }
                 }
             }
-- 
cgit 1.4.1-3-g733a5


From 5fd2d04ef7f8583a595a2ba3bb1c782568b1a1b2 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui@loongson.cn>
Date: Thu, 7 Mar 2024 15:15:13 +0800
Subject: std_detect: loongarch: Remove features not supported by LLVM

---
 .../crates/std_detect/src/detect/arch/loongarch.rs | 24 ----------------------
 .../std_detect/src/detect/os/linux/loongarch.rs    |  8 --------
 2 files changed, 32 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
index 049cb73224a..1db98410fee 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
@@ -8,41 +8,17 @@ features! {
     /// Checks if `loongarch` feature is enabled.
     /// Supported arguments are:
     ///
-    /// * `"lam"`
     /// * `"ual"`
-    /// * `"fpu"`
     /// * `"lsx"`
     /// * `"lasx"`
-    /// * `"crc32"`
-    /// * `"complex"`
-    /// * `"crypto"`
     /// * `"lvz"`
-    /// * `"lbtx86"`
-    /// * `"lbtarm"`
-    /// * `"lbtmips"`
     #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")]
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lam: "lam";
-    /// LAM
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] ual: "ual";
     /// UAL
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] fpu: "fpu";
-    /// FPU
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lsx: "lsx";
     /// LSX
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lasx: "lasx";
     /// LASX
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] crc32: "crc32";
-    /// FPU
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] complex: "complex";
-    /// Complex
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] crypto: "crypto";
-    /// Crypto
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lvz: "lvz";
     /// LVZ
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lbtx86: "lbtx86";
-    /// LBT.X86
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lbtarm: "lbtarm";
-    /// LBT.ARM
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lbtmips: "lbtmips";
-    /// LBT.MIPS
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
index a2c6d308e6a..92d7d56f7cc 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
@@ -16,18 +16,10 @@ pub(crate) fn detect_features() -> cache::Initializer {
     //
     // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/loongarch/include/uapi/asm/hwcap.h
     if let Ok(auxv) = auxvec::auxv() {
-        enable_feature(&mut value, Feature::lam, bit::test(auxv.hwcap, 1));
         enable_feature(&mut value, Feature::ual, bit::test(auxv.hwcap, 2));
-        enable_feature(&mut value, Feature::fpu, bit::test(auxv.hwcap, 3));
         enable_feature(&mut value, Feature::lsx, bit::test(auxv.hwcap, 4));
         enable_feature(&mut value, Feature::lasx, bit::test(auxv.hwcap, 5));
-        enable_feature(&mut value, Feature::crc32, bit::test(auxv.hwcap, 6));
-        enable_feature(&mut value, Feature::complex, bit::test(auxv.hwcap, 7));
-        enable_feature(&mut value, Feature::crypto, bit::test(auxv.hwcap, 8));
         enable_feature(&mut value, Feature::lvz, bit::test(auxv.hwcap, 9));
-        enable_feature(&mut value, Feature::lbtx86, bit::test(auxv.hwcap, 10));
-        enable_feature(&mut value, Feature::lbtarm, bit::test(auxv.hwcap, 11));
-        enable_feature(&mut value, Feature::lbtmips, bit::test(auxv.hwcap, 12));
         return value;
     }
     value
-- 
cgit 1.4.1-3-g733a5


From 1fbe4dddecc1895bc5dc7cc359c5148597dd15a7 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui@loongson.cn>
Date: Thu, 7 Mar 2024 16:11:35 +0800
Subject: std_detect: loongarch: Add runtime detectable features from LLVM

* f
* d
* frecipe
* lbt
---
 .../crates/std_detect/src/detect/arch/loongarch.rs | 18 ++++++++++---
 .../std_detect/src/detect/os/linux/loongarch.rs    | 31 +++++++++++++++++++++-
 2 files changed, 45 insertions(+), 4 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
index 1db98410fee..c55f4ea272b 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
@@ -8,17 +8,29 @@ features! {
     /// Checks if `loongarch` feature is enabled.
     /// Supported arguments are:
     ///
-    /// * `"ual"`
+    /// * `"f"`
+    /// * `"d"`
+    /// * `"frecipe"`
     /// * `"lsx"`
     /// * `"lasx"`
+    /// * `"lbt"`
     /// * `"lvz"`
+    /// * `"ual"`
     #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")]
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] ual: "ual";
-    /// UAL
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] f: "f";
+    /// F
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] d: "d";
+    /// D
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] frecipe: "frecipe";
+    /// Frecipe
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lsx: "lsx";
     /// LSX
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lasx: "lasx";
     /// LASX
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lbt: "lbt";
+    /// LBT
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lvz: "lvz";
     /// LVZ
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] ual: "ual";
+    /// UAL
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
index 92d7d56f7cc..27c800ec076 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
@@ -2,6 +2,7 @@
 
 use super::auxvec;
 use crate::detect::{bit, cache, Feature};
+use core::arch::asm;
 
 /// Try to read the features from the auxiliary vector.
 pub(crate) fn detect_features() -> cache::Initializer {
@@ -12,14 +13,42 @@ pub(crate) fn detect_features() -> cache::Initializer {
         }
     };
 
+    // The values are part of the platform-specific [cpucfg]
+    //
+    // [cpucfg]: LoongArch Reference Manual Volume 1: Basic Architecture v1.1
+    let cpucfg2: usize;
+    unsafe {
+        asm!(
+            "cpucfg {}, {}",
+            out(reg) cpucfg2, in(reg) 2,
+            options(pure, nomem, preserves_flags, nostack)
+        );
+    }
+    enable_feature(&mut value, Feature::frecipe, bit::test(cpucfg2, 25));
+
     // The values are part of the platform-specific [asm/hwcap.h][hwcap]
     //
     // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/loongarch/include/uapi/asm/hwcap.h
     if let Ok(auxv) = auxvec::auxv() {
-        enable_feature(&mut value, Feature::ual, bit::test(auxv.hwcap, 2));
+        enable_feature(
+            &mut value,
+            Feature::f,
+            bit::test(cpucfg2, 1) && bit::test(auxv.hwcap, 3),
+        );
+        enable_feature(
+            &mut value,
+            Feature::d,
+            bit::test(cpucfg2, 2) && bit::test(auxv.hwcap, 3),
+        );
         enable_feature(&mut value, Feature::lsx, bit::test(auxv.hwcap, 4));
         enable_feature(&mut value, Feature::lasx, bit::test(auxv.hwcap, 5));
+        enable_feature(
+            &mut value,
+            Feature::lbt,
+            bit::test(auxv.hwcap, 10) && bit::test(auxv.hwcap, 11) && bit::test(auxv.hwcap, 12),
+        );
         enable_feature(&mut value, Feature::lvz, bit::test(auxv.hwcap, 9));
+        enable_feature(&mut value, Feature::ual, bit::test(auxv.hwcap, 2));
         return value;
     }
     value
-- 
cgit 1.4.1-3-g733a5


From a00a70eacb5ab39d941fe1de0c011dc14046cbd0 Mon Sep 17 00:00:00 2001
From: Daniel Paoliello <danpao@microsoft.com>
Date: Thu, 30 Nov 2023 15:00:25 -0800
Subject: arm64ec

---
 .../crates/core_arch/src/aarch64/neon/generated.rs |  696 ++--
 .../crates/core_arch/src/aarch64/neon/mod.rs       |   35 +-
 .../crates/core_arch/src/arm_shared/barrier/mod.rs |   36 +-
 .../stdarch/crates/core_arch/src/arm_shared/crc.rs |   44 +-
 .../crates/core_arch/src/arm_shared/crypto.rs      |   70 +-
 .../crates/core_arch/src/arm_shared/hints.rs       |   36 +-
 .../stdarch/crates/core_arch/src/arm_shared/mod.rs |   35 +-
 .../core_arch/src/arm_shared/neon/generated.rs     | 4088 ++++++++++----------
 .../core_arch/src/arm_shared/neon/load_tests.rs    |    2 +-
 .../crates/core_arch/src/arm_shared/neon/mod.rs    | 2039 ++++++++--
 .../src/arm_shared/neon/shift_and_insert_tests.rs  |    2 +-
 .../core_arch/src/arm_shared/neon/store_tests.rs   |    2 +-
 .../src/arm_shared/neon/table_lookup_tests.rs      |   98 +-
 .../core_arch/src/arm_shared/test_support.rs       |    2 +-
 library/stdarch/crates/core_arch/src/mod.rs        |   15 +-
 library/stdarch/crates/intrinsic-test/src/main.rs  |   12 +-
 library/stdarch/crates/simd-test-macro/src/lib.rs  |    2 +-
 .../crates/std_detect/src/detect/arch/aarch64.rs   |    2 +-
 .../crates/std_detect/src/detect/arch/mod.rs       |    2 +-
 .../stdarch/crates/std_detect/src/detect/mod.rs    |    3 +-
 .../crates/std_detect/tests/cpu-detection.rs       |    6 +-
 .../std_detect/tests/macro_trailing_commas.rs      |    2 +
 library/stdarch/crates/stdarch-gen-arm/src/main.rs |   22 +-
 .../stdarch/crates/stdarch-test/src/disassembly.rs |    2 +-
 library/stdarch/crates/stdarch-test/src/lib.rs     |    2 +-
 25 files changed, 4316 insertions(+), 2939 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 1a8dbd3af98..bde9f588697 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -19,7 +19,7 @@ use stdarch_test::assert_instr;
 pub unsafe fn veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3s.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3s.v16i8")]
         fn veor3q_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
     }
     veor3q_s8_(a, b, c)
@@ -35,7 +35,7 @@ pub unsafe fn veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
 pub unsafe fn veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3s.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3s.v8i16")]
         fn veor3q_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
     }
     veor3q_s16_(a, b, c)
@@ -51,7 +51,7 @@ pub unsafe fn veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t
 pub unsafe fn veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3s.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3s.v4i32")]
         fn veor3q_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
     }
     veor3q_s32_(a, b, c)
@@ -67,7 +67,7 @@ pub unsafe fn veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t
 pub unsafe fn veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3s.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3s.v2i64")]
         fn veor3q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
     }
     veor3q_s64_(a, b, c)
@@ -83,7 +83,7 @@ pub unsafe fn veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t
 pub unsafe fn veor3q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3u.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3u.v16i8")]
         fn veor3q_u8_(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t;
     }
     veor3q_u8_(a, b, c)
@@ -99,7 +99,7 @@ pub unsafe fn veor3q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16
 pub unsafe fn veor3q_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3u.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3u.v8i16")]
         fn veor3q_u16_(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t;
     }
     veor3q_u16_(a, b, c)
@@ -115,7 +115,7 @@ pub unsafe fn veor3q_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x
 pub unsafe fn veor3q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3u.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3u.v4i32")]
         fn veor3q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
     }
     veor3q_u32_(a, b, c)
@@ -131,7 +131,7 @@ pub unsafe fn veor3q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x
 pub unsafe fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3u.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3u.v2i64")]
         fn veor3q_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
     }
     veor3q_u64_(a, b, c)
@@ -147,7 +147,7 @@ pub unsafe fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x
 pub unsafe fn vabd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fabd.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fabd.v1f64")]
         fn vabd_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
     vabd_f64_(a, b)
@@ -163,7 +163,7 @@ pub unsafe fn vabd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 pub unsafe fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fabd.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fabd.v2f64")]
         fn vabdq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     vabdq_f64_(a, b)
@@ -846,7 +846,7 @@ pub unsafe fn vtstd_u64(a: u64, b: u64) -> u64 {
 pub unsafe fn vuqadds_s32(a: i32, b: u32) -> i32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.suqadd.i32")]
         fn vuqadds_s32_(a: i32, b: u32) -> i32;
     }
     vuqadds_s32_(a, b)
@@ -862,7 +862,7 @@ pub unsafe fn vuqadds_s32(a: i32, b: u32) -> i32 {
 pub unsafe fn vuqaddd_s64(a: i64, b: u64) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.suqadd.i64")]
         fn vuqaddd_s64_(a: i64, b: u64) -> i64;
     }
     vuqaddd_s64_(a, b)
@@ -2070,7 +2070,7 @@ pub unsafe fn vcltzd_f64(a: f64) -> u64 {
 pub unsafe fn vcagt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facgt.v1i64.v1f64")]
         fn vcagt_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t;
     }
     vcagt_f64_(a, b)
@@ -2086,7 +2086,7 @@ pub unsafe fn vcagt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
 pub unsafe fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facgt.v2i64.v2f64")]
         fn vcagtq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t;
     }
     vcagtq_f64_(a, b)
@@ -2102,7 +2102,7 @@ pub unsafe fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
 pub unsafe fn vcagts_f32(a: f32, b: f32) -> u32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.i32.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facgt.i32.f32")]
         fn vcagts_f32_(a: f32, b: f32) -> u32;
     }
     vcagts_f32_(a, b)
@@ -2118,7 +2118,7 @@ pub unsafe fn vcagts_f32(a: f32, b: f32) -> u32 {
 pub unsafe fn vcagtd_f64(a: f64, b: f64) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.i64.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facgt.i64.f64")]
         fn vcagtd_f64_(a: f64, b: f64) -> u64;
     }
     vcagtd_f64_(a, b)
@@ -2134,7 +2134,7 @@ pub unsafe fn vcagtd_f64(a: f64, b: f64) -> u64 {
 pub unsafe fn vcage_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facge.v1i64.v1f64")]
         fn vcage_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t;
     }
     vcage_f64_(a, b)
@@ -2150,7 +2150,7 @@ pub unsafe fn vcage_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
 pub unsafe fn vcageq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facge.v2i64.v2f64")]
         fn vcageq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t;
     }
     vcageq_f64_(a, b)
@@ -2166,7 +2166,7 @@ pub unsafe fn vcageq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
 pub unsafe fn vcages_f32(a: f32, b: f32) -> u32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.i32.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facge.i32.f32")]
         fn vcages_f32_(a: f32, b: f32) -> u32;
     }
     vcages_f32_(a, b)
@@ -2182,7 +2182,7 @@ pub unsafe fn vcages_f32(a: f32, b: f32) -> u32 {
 pub unsafe fn vcaged_f64(a: f64, b: f64) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.i64.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facge.i64.f64")]
         fn vcaged_f64_(a: f64, b: f64) -> u64;
     }
     vcaged_f64_(a, b)
@@ -3380,7 +3380,7 @@ pub unsafe fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
 pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtxn.v2f32.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtxn.v2f32.v2f64")]
         fn vcvtx_f32_f64_(a: float64x2_t) -> float32x2_t;
     }
     vcvtx_f32_f64_(a)
@@ -3420,7 +3420,7 @@ pub unsafe fn vcvt_n_f64_s64<const N: i32>(a: int64x1_t) -> float64x1_t {
     static_assert!(N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64")]
         fn vcvt_n_f64_s64_(a: int64x1_t, n: i32) -> float64x1_t;
     }
     vcvt_n_f64_s64_(a, N)
@@ -3438,7 +3438,7 @@ pub unsafe fn vcvtq_n_f64_s64<const N: i32>(a: int64x2_t) -> float64x2_t {
     static_assert!(N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64")]
         fn vcvtq_n_f64_s64_(a: int64x2_t, n: i32) -> float64x2_t;
     }
     vcvtq_n_f64_s64_(a, N)
@@ -3456,7 +3456,7 @@ pub unsafe fn vcvts_n_f32_s32<const N: i32>(a: i32) -> f32 {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.f32.i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxs2fp.f32.i32")]
         fn vcvts_n_f32_s32_(a: i32, n: i32) -> f32;
     }
     vcvts_n_f32_s32_(a, N)
@@ -3474,7 +3474,7 @@ pub unsafe fn vcvtd_n_f64_s64<const N: i32>(a: i64) -> f64 {
     static_assert!(N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.f64.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxs2fp.f64.i64")]
         fn vcvtd_n_f64_s64_(a: i64, n: i32) -> f64;
     }
     vcvtd_n_f64_s64_(a, N)
@@ -3492,7 +3492,7 @@ pub unsafe fn vcvt_n_f64_u64<const N: i32>(a: uint64x1_t) -> float64x1_t {
     static_assert!(N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64")]
         fn vcvt_n_f64_u64_(a: uint64x1_t, n: i32) -> float64x1_t;
     }
     vcvt_n_f64_u64_(a, N)
@@ -3510,7 +3510,7 @@ pub unsafe fn vcvtq_n_f64_u64<const N: i32>(a: uint64x2_t) -> float64x2_t {
     static_assert!(N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64")]
         fn vcvtq_n_f64_u64_(a: uint64x2_t, n: i32) -> float64x2_t;
     }
     vcvtq_n_f64_u64_(a, N)
@@ -3528,7 +3528,7 @@ pub unsafe fn vcvts_n_f32_u32<const N: i32>(a: u32) -> f32 {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.f32.i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxu2fp.f32.i32")]
         fn vcvts_n_f32_u32_(a: u32, n: i32) -> f32;
     }
     vcvts_n_f32_u32_(a, N)
@@ -3546,7 +3546,7 @@ pub unsafe fn vcvtd_n_f64_u64<const N: i32>(a: u64) -> f64 {
     static_assert!(N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.f64.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxu2fp.f64.i64")]
         fn vcvtd_n_f64_u64_(a: u64, n: i32) -> f64;
     }
     vcvtd_n_f64_u64_(a, N)
@@ -3564,7 +3564,7 @@ pub unsafe fn vcvt_n_s64_f64<const N: i32>(a: float64x1_t) -> int64x1_t {
     static_assert!(N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64")]
         fn vcvt_n_s64_f64_(a: float64x1_t, n: i32) -> int64x1_t;
     }
     vcvt_n_s64_f64_(a, N)
@@ -3582,7 +3582,7 @@ pub unsafe fn vcvtq_n_s64_f64<const N: i32>(a: float64x2_t) -> int64x2_t {
     static_assert!(N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64")]
         fn vcvtq_n_s64_f64_(a: float64x2_t, n: i32) -> int64x2_t;
     }
     vcvtq_n_s64_f64_(a, N)
@@ -3600,7 +3600,7 @@ pub unsafe fn vcvts_n_s32_f32<const N: i32>(a: f32) -> i32 {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.i32.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxs.i32.f32")]
         fn vcvts_n_s32_f32_(a: f32, n: i32) -> i32;
     }
     vcvts_n_s32_f32_(a, N)
@@ -3618,7 +3618,7 @@ pub unsafe fn vcvtd_n_s64_f64<const N: i32>(a: f64) -> i64 {
     static_assert!(N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.i64.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxs.i64.f64")]
         fn vcvtd_n_s64_f64_(a: f64, n: i32) -> i64;
     }
     vcvtd_n_s64_f64_(a, N)
@@ -3636,7 +3636,7 @@ pub unsafe fn vcvt_n_u64_f64<const N: i32>(a: float64x1_t) -> uint64x1_t {
     static_assert!(N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64")]
         fn vcvt_n_u64_f64_(a: float64x1_t, n: i32) -> uint64x1_t;
     }
     vcvt_n_u64_f64_(a, N)
@@ -3654,7 +3654,7 @@ pub unsafe fn vcvtq_n_u64_f64<const N: i32>(a: float64x2_t) -> uint64x2_t {
     static_assert!(N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64")]
         fn vcvtq_n_u64_f64_(a: float64x2_t, n: i32) -> uint64x2_t;
     }
     vcvtq_n_u64_f64_(a, N)
@@ -3672,7 +3672,7 @@ pub unsafe fn vcvts_n_u32_f32<const N: i32>(a: f32) -> u32 {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.i32.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxu.i32.f32")]
         fn vcvts_n_u32_f32_(a: f32, n: i32) -> u32;
     }
     vcvts_n_u32_f32_(a, N)
@@ -3690,7 +3690,7 @@ pub unsafe fn vcvtd_n_u64_f64<const N: i32>(a: f64) -> u64 {
     static_assert!(N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.i64.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxu.i64.f64")]
         fn vcvtd_n_u64_f64_(a: f64, n: i32) -> u64;
     }
     vcvtd_n_u64_f64_(a, N)
@@ -3794,7 +3794,7 @@ pub unsafe fn vcvtd_u64_f64(a: f64) -> u64 {
 pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptosi.sat.v1i64.v1f64")]
         fn vcvt_s64_f64_(a: float64x1_t) -> int64x1_t;
     }
     vcvt_s64_f64_(a)
@@ -3810,7 +3810,7 @@ pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
 pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptosi.sat.v2i64.v2f64")]
         fn vcvtq_s64_f64_(a: float64x2_t) -> int64x2_t;
     }
     vcvtq_s64_f64_(a)
@@ -3826,7 +3826,7 @@ pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
 pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptoui.sat.v1i64.v1f64")]
         fn vcvt_u64_f64_(a: float64x1_t) -> uint64x1_t;
     }
     vcvt_u64_f64_(a)
@@ -3842,7 +3842,7 @@ pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
 pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptoui.sat.v2i64.v2f64")]
         fn vcvtq_u64_f64_(a: float64x2_t) -> uint64x2_t;
     }
     vcvtq_u64_f64_(a)
@@ -3858,7 +3858,7 @@ pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
 pub unsafe fn vcvta_s32_f32(a: float32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtas.v2i32.v2f32")]
         fn vcvta_s32_f32_(a: float32x2_t) -> int32x2_t;
     }
     vcvta_s32_f32_(a)
@@ -3874,7 +3874,7 @@ pub unsafe fn vcvta_s32_f32(a: float32x2_t) -> int32x2_t {
 pub unsafe fn vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtas.v4i32.v4f32")]
         fn vcvtaq_s32_f32_(a: float32x4_t) -> int32x4_t;
     }
     vcvtaq_s32_f32_(a)
@@ -3890,7 +3890,7 @@ pub unsafe fn vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t {
 pub unsafe fn vcvta_s64_f64(a: float64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtas.v1i64.v1f64")]
         fn vcvta_s64_f64_(a: float64x1_t) -> int64x1_t;
     }
     vcvta_s64_f64_(a)
@@ -3906,7 +3906,7 @@ pub unsafe fn vcvta_s64_f64(a: float64x1_t) -> int64x1_t {
 pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtas.v2i64.v2f64")]
         fn vcvtaq_s64_f64_(a: float64x2_t) -> int64x2_t;
     }
     vcvtaq_s64_f64_(a)
@@ -3922,7 +3922,7 @@ pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t {
 pub unsafe fn vcvtas_s32_f32(a: f32) -> i32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.i32.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtas.i32.f32")]
         fn vcvtas_s32_f32_(a: f32) -> i32;
     }
     vcvtas_s32_f32_(a)
@@ -3938,7 +3938,7 @@ pub unsafe fn vcvtas_s32_f32(a: f32) -> i32 {
 pub unsafe fn vcvtad_s64_f64(a: f64) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.i64.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtas.i64.f64")]
         fn vcvtad_s64_f64_(a: f64) -> i64;
     }
     vcvtad_s64_f64_(a)
@@ -3954,7 +3954,7 @@ pub unsafe fn vcvtad_s64_f64(a: f64) -> i64 {
 pub unsafe fn vcvtas_u32_f32(a: f32) -> u32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.i32.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtau.i32.f32")]
         fn vcvtas_u32_f32_(a: f32) -> u32;
     }
     vcvtas_u32_f32_(a)
@@ -3970,7 +3970,7 @@ pub unsafe fn vcvtas_u32_f32(a: f32) -> u32 {
 pub unsafe fn vcvtad_u64_f64(a: f64) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.i64.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtau.i64.f64")]
         fn vcvtad_u64_f64_(a: f64) -> u64;
     }
     vcvtad_u64_f64_(a)
@@ -3986,7 +3986,7 @@ pub unsafe fn vcvtad_u64_f64(a: f64) -> u64 {
 pub unsafe fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtns.v2i32.v2f32")]
         fn vcvtn_s32_f32_(a: float32x2_t) -> int32x2_t;
     }
     vcvtn_s32_f32_(a)
@@ -4002,7 +4002,7 @@ pub unsafe fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t {
 pub unsafe fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtns.v4i32.v4f32")]
         fn vcvtnq_s32_f32_(a: float32x4_t) -> int32x4_t;
     }
     vcvtnq_s32_f32_(a)
@@ -4018,7 +4018,7 @@ pub unsafe fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t {
 pub unsafe fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtns.v1i64.v1f64")]
         fn vcvtn_s64_f64_(a: float64x1_t) -> int64x1_t;
     }
     vcvtn_s64_f64_(a)
@@ -4034,7 +4034,7 @@ pub unsafe fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t {
 pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtns.v2i64.v2f64")]
         fn vcvtnq_s64_f64_(a: float64x2_t) -> int64x2_t;
     }
     vcvtnq_s64_f64_(a)
@@ -4050,7 +4050,7 @@ pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
 pub unsafe fn vcvtns_s32_f32(a: f32) -> i32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.i32.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtns.i32.f32")]
         fn vcvtns_s32_f32_(a: f32) -> i32;
     }
     vcvtns_s32_f32_(a)
@@ -4066,7 +4066,7 @@ pub unsafe fn vcvtns_s32_f32(a: f32) -> i32 {
 pub unsafe fn vcvtnd_s64_f64(a: f64) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.i64.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtns.i64.f64")]
         fn vcvtnd_s64_f64_(a: f64) -> i64;
     }
     vcvtnd_s64_f64_(a)
@@ -4082,7 +4082,7 @@ pub unsafe fn vcvtnd_s64_f64(a: f64) -> i64 {
 pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtms.v2i32.v2f32")]
         fn vcvtm_s32_f32_(a: float32x2_t) -> int32x2_t;
     }
     vcvtm_s32_f32_(a)
@@ -4098,7 +4098,7 @@ pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t {
 pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtms.v4i32.v4f32")]
         fn vcvtmq_s32_f32_(a: float32x4_t) -> int32x4_t;
     }
     vcvtmq_s32_f32_(a)
@@ -4114,7 +4114,7 @@ pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t {
 pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtms.v1i64.v1f64")]
         fn vcvtm_s64_f64_(a: float64x1_t) -> int64x1_t;
     }
     vcvtm_s64_f64_(a)
@@ -4130,7 +4130,7 @@ pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t {
 pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtms.v2i64.v2f64")]
         fn vcvtmq_s64_f64_(a: float64x2_t) -> int64x2_t;
     }
     vcvtmq_s64_f64_(a)
@@ -4146,7 +4146,7 @@ pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t {
 pub unsafe fn vcvtms_s32_f32(a: f32) -> i32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.i32.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtms.i32.f32")]
         fn vcvtms_s32_f32_(a: f32) -> i32;
     }
     vcvtms_s32_f32_(a)
@@ -4162,7 +4162,7 @@ pub unsafe fn vcvtms_s32_f32(a: f32) -> i32 {
 pub unsafe fn vcvtmd_s64_f64(a: f64) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.i64.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtms.i64.f64")]
         fn vcvtmd_s64_f64_(a: f64) -> i64;
     }
     vcvtmd_s64_f64_(a)
@@ -4178,7 +4178,7 @@ pub unsafe fn vcvtmd_s64_f64(a: f64) -> i64 {
 pub unsafe fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtps.v2i32.v2f32")]
         fn vcvtp_s32_f32_(a: float32x2_t) -> int32x2_t;
     }
     vcvtp_s32_f32_(a)
@@ -4194,7 +4194,7 @@ pub unsafe fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t {
 pub unsafe fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtps.v4i32.v4f32")]
         fn vcvtpq_s32_f32_(a: float32x4_t) -> int32x4_t;
     }
     vcvtpq_s32_f32_(a)
@@ -4210,7 +4210,7 @@ pub unsafe fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t {
 pub unsafe fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtps.v1i64.v1f64")]
         fn vcvtp_s64_f64_(a: float64x1_t) -> int64x1_t;
     }
     vcvtp_s64_f64_(a)
@@ -4226,7 +4226,7 @@ pub unsafe fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t {
 pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtps.v2i64.v2f64")]
         fn vcvtpq_s64_f64_(a: float64x2_t) -> int64x2_t;
     }
     vcvtpq_s64_f64_(a)
@@ -4242,7 +4242,7 @@ pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
 pub unsafe fn vcvtps_s32_f32(a: f32) -> i32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.i32.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtps.i32.f32")]
         fn vcvtps_s32_f32_(a: f32) -> i32;
     }
     vcvtps_s32_f32_(a)
@@ -4258,7 +4258,7 @@ pub unsafe fn vcvtps_s32_f32(a: f32) -> i32 {
 pub unsafe fn vcvtpd_s64_f64(a: f64) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.i64.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtps.i64.f64")]
         fn vcvtpd_s64_f64_(a: f64) -> i64;
     }
     vcvtpd_s64_f64_(a)
@@ -4274,7 +4274,7 @@ pub unsafe fn vcvtpd_s64_f64(a: f64) -> i64 {
 pub unsafe fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtau.v2i32.v2f32")]
         fn vcvta_u32_f32_(a: float32x2_t) -> uint32x2_t;
     }
     vcvta_u32_f32_(a)
@@ -4290,7 +4290,7 @@ pub unsafe fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t {
 pub unsafe fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtau.v4i32.v4f32")]
         fn vcvtaq_u32_f32_(a: float32x4_t) -> uint32x4_t;
     }
     vcvtaq_u32_f32_(a)
@@ -4306,7 +4306,7 @@ pub unsafe fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t {
 pub unsafe fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtau.v1i64.v1f64")]
         fn vcvta_u64_f64_(a: float64x1_t) -> uint64x1_t;
     }
     vcvta_u64_f64_(a)
@@ -4322,7 +4322,7 @@ pub unsafe fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t {
 pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtau.v2i64.v2f64")]
         fn vcvtaq_u64_f64_(a: float64x2_t) -> uint64x2_t;
     }
     vcvtaq_u64_f64_(a)
@@ -4338,7 +4338,7 @@ pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
 pub unsafe fn vcvtn_u32_f32(a: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtnu.v2i32.v2f32")]
         fn vcvtn_u32_f32_(a: float32x2_t) -> uint32x2_t;
     }
     vcvtn_u32_f32_(a)
@@ -4354,7 +4354,7 @@ pub unsafe fn vcvtn_u32_f32(a: float32x2_t) -> uint32x2_t {
 pub unsafe fn vcvtnq_u32_f32(a: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtnu.v4i32.v4f32")]
         fn vcvtnq_u32_f32_(a: float32x4_t) -> uint32x4_t;
     }
     vcvtnq_u32_f32_(a)
@@ -4370,7 +4370,7 @@ pub unsafe fn vcvtnq_u32_f32(a: float32x4_t) -> uint32x4_t {
 pub unsafe fn vcvtn_u64_f64(a: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtnu.v1i64.v1f64")]
         fn vcvtn_u64_f64_(a: float64x1_t) -> uint64x1_t;
     }
     vcvtn_u64_f64_(a)
@@ -4386,7 +4386,7 @@ pub unsafe fn vcvtn_u64_f64(a: float64x1_t) -> uint64x1_t {
 pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtnu.v2i64.v2f64")]
         fn vcvtnq_u64_f64_(a: float64x2_t) -> uint64x2_t;
     }
     vcvtnq_u64_f64_(a)
@@ -4402,7 +4402,7 @@ pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
 pub unsafe fn vcvtns_u32_f32(a: f32) -> u32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.i32.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtnu.i32.f32")]
         fn vcvtns_u32_f32_(a: f32) -> u32;
     }
     vcvtns_u32_f32_(a)
@@ -4418,7 +4418,7 @@ pub unsafe fn vcvtns_u32_f32(a: f32) -> u32 {
 pub unsafe fn vcvtnd_u64_f64(a: f64) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.i64.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtnu.i64.f64")]
         fn vcvtnd_u64_f64_(a: f64) -> u64;
     }
     vcvtnd_u64_f64_(a)
@@ -4434,7 +4434,7 @@ pub unsafe fn vcvtnd_u64_f64(a: f64) -> u64 {
 pub unsafe fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtmu.v2i32.v2f32")]
         fn vcvtm_u32_f32_(a: float32x2_t) -> uint32x2_t;
     }
     vcvtm_u32_f32_(a)
@@ -4450,7 +4450,7 @@ pub unsafe fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t {
 pub unsafe fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtmu.v4i32.v4f32")]
         fn vcvtmq_u32_f32_(a: float32x4_t) -> uint32x4_t;
     }
     vcvtmq_u32_f32_(a)
@@ -4466,7 +4466,7 @@ pub unsafe fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t {
 pub unsafe fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtmu.v1i64.v1f64")]
         fn vcvtm_u64_f64_(a: float64x1_t) -> uint64x1_t;
     }
     vcvtm_u64_f64_(a)
@@ -4482,7 +4482,7 @@ pub unsafe fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t {
 pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtmu.v2i64.v2f64")]
         fn vcvtmq_u64_f64_(a: float64x2_t) -> uint64x2_t;
     }
     vcvtmq_u64_f64_(a)
@@ -4498,7 +4498,7 @@ pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
 pub unsafe fn vcvtms_u32_f32(a: f32) -> u32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.i32.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtmu.i32.f32")]
         fn vcvtms_u32_f32_(a: f32) -> u32;
     }
     vcvtms_u32_f32_(a)
@@ -4514,7 +4514,7 @@ pub unsafe fn vcvtms_u32_f32(a: f32) -> u32 {
 pub unsafe fn vcvtmd_u64_f64(a: f64) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.i64.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtmu.i64.f64")]
         fn vcvtmd_u64_f64_(a: f64) -> u64;
     }
     vcvtmd_u64_f64_(a)
@@ -4530,7 +4530,7 @@ pub unsafe fn vcvtmd_u64_f64(a: f64) -> u64 {
 pub unsafe fn vcvtp_u32_f32(a: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtpu.v2i32.v2f32")]
         fn vcvtp_u32_f32_(a: float32x2_t) -> uint32x2_t;
     }
     vcvtp_u32_f32_(a)
@@ -4546,7 +4546,7 @@ pub unsafe fn vcvtp_u32_f32(a: float32x2_t) -> uint32x2_t {
 pub unsafe fn vcvtpq_u32_f32(a: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtpu.v4i32.v4f32")]
         fn vcvtpq_u32_f32_(a: float32x4_t) -> uint32x4_t;
     }
     vcvtpq_u32_f32_(a)
@@ -4562,7 +4562,7 @@ pub unsafe fn vcvtpq_u32_f32(a: float32x4_t) -> uint32x4_t {
 pub unsafe fn vcvtp_u64_f64(a: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v1i64.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtpu.v1i64.v1f64")]
         fn vcvtp_u64_f64_(a: float64x1_t) -> uint64x1_t;
     }
     vcvtp_u64_f64_(a)
@@ -4578,7 +4578,7 @@ pub unsafe fn vcvtp_u64_f64(a: float64x1_t) -> uint64x1_t {
 pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v2i64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtpu.v2i64.v2f64")]
         fn vcvtpq_u64_f64_(a: float64x2_t) -> uint64x2_t;
     }
     vcvtpq_u64_f64_(a)
@@ -4594,7 +4594,7 @@ pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
 pub unsafe fn vcvtps_u32_f32(a: f32) -> u32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.i32.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtpu.i32.f32")]
         fn vcvtps_u32_f32_(a: f32) -> u32;
     }
     vcvtps_u32_f32_(a)
@@ -4610,7 +4610,7 @@ pub unsafe fn vcvtps_u32_f32(a: f32) -> u32 {
 pub unsafe fn vcvtpd_u64_f64(a: f64) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.i64.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtpu.i64.f64")]
         fn vcvtpd_u64_f64_(a: f64) -> u64;
     }
     vcvtpd_u64_f64_(a)
@@ -5699,7 +5699,7 @@ pub unsafe fn vnegq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vqneg_s64(a: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v1i64")]
         fn vqneg_s64_(a: int64x1_t) -> int64x1_t;
     }
     vqneg_s64_(a)
@@ -5715,7 +5715,7 @@ pub unsafe fn vqneg_s64(a: int64x1_t) -> int64x1_t {
 pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v2i64")]
         fn vqnegq_s64_(a: int64x2_t) -> int64x2_t;
     }
     vqnegq_s64_(a)
@@ -5827,7 +5827,7 @@ pub unsafe fn vqsubh_u16(a: u16, b: u16) -> u16 {
 pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.i32")]
         fn vqsubs_u32_(a: u32, b: u32) -> u32;
     }
     vqsubs_u32_(a, b)
@@ -5843,7 +5843,7 @@ pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
 pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.i64")]
         fn vqsubd_u64_(a: u64, b: u64) -> u64;
     }
     vqsubd_u64_(a, b)
@@ -5859,7 +5859,7 @@ pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 {
 pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.i32")]
         fn vqsubs_s32_(a: i32, b: i32) -> i32;
     }
     vqsubs_s32_(a, b)
@@ -5875,7 +5875,7 @@ pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
 pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.i64")]
         fn vqsubd_s64_(a: i64, b: i64) -> i64;
     }
     vqsubd_s64_(a, b)
@@ -5891,7 +5891,7 @@ pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
 pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rbit.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rbit.v8i8")]
         fn vrbit_s8_(a: int8x8_t) -> int8x8_t;
     }
     vrbit_s8_(a)
@@ -5907,7 +5907,7 @@ pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t {
 pub unsafe fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rbit.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rbit.v16i8")]
         fn vrbitq_s8_(a: int8x16_t) -> int8x16_t;
     }
     vrbitq_s8_(a)
@@ -5967,7 +5967,7 @@ pub unsafe fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
 pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.rint.v2f32")]
         fn vrndx_f32_(a: float32x2_t) -> float32x2_t;
     }
     vrndx_f32_(a)
@@ -5983,7 +5983,7 @@ pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t {
 pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.rint.v4f32")]
         fn vrndxq_f32_(a: float32x4_t) -> float32x4_t;
     }
     vrndxq_f32_(a)
@@ -5999,7 +5999,7 @@ pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
 pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.rint.v1f64")]
         fn vrndx_f64_(a: float64x1_t) -> float64x1_t;
     }
     vrndx_f64_(a)
@@ -6015,7 +6015,7 @@ pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t {
 pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.rint.v2f64")]
         fn vrndxq_f64_(a: float64x2_t) -> float64x2_t;
     }
     vrndxq_f64_(a)
@@ -6031,7 +6031,7 @@ pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.round.v2f32")]
         fn vrnda_f32_(a: float32x2_t) -> float32x2_t;
     }
     vrnda_f32_(a)
@@ -6047,7 +6047,7 @@ pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t {
 pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.round.v4f32")]
         fn vrndaq_f32_(a: float32x4_t) -> float32x4_t;
     }
     vrndaq_f32_(a)
@@ -6063,7 +6063,7 @@ pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
 pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.round.v1f64")]
         fn vrnda_f64_(a: float64x1_t) -> float64x1_t;
     }
     vrnda_f64_(a)
@@ -6079,7 +6079,7 @@ pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t {
 pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.round.v2f64")]
         fn vrndaq_f64_(a: float64x2_t) -> float64x2_t;
     }
     vrndaq_f64_(a)
@@ -6095,7 +6095,7 @@ pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frintn.v1f64")]
         fn vrndn_f64_(a: float64x1_t) -> float64x1_t;
     }
     vrndn_f64_(a)
@@ -6111,7 +6111,7 @@ pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
 pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frintn.v2f64")]
         fn vrndnq_f64_(a: float64x2_t) -> float64x2_t;
     }
     vrndnq_f64_(a)
@@ -6127,7 +6127,7 @@ pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vrndns_f32(a: f32) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.roundeven.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.roundeven.f32")]
         fn vrndns_f32_(a: f32) -> f32;
     }
     vrndns_f32_(a)
@@ -6143,7 +6143,7 @@ pub unsafe fn vrndns_f32(a: f32) -> f32 {
 pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.floor.v2f32")]
         fn vrndm_f32_(a: float32x2_t) -> float32x2_t;
     }
     vrndm_f32_(a)
@@ -6159,7 +6159,7 @@ pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t {
 pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.floor.v4f32")]
         fn vrndmq_f32_(a: float32x4_t) -> float32x4_t;
     }
     vrndmq_f32_(a)
@@ -6175,7 +6175,7 @@ pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
 pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.floor.v1f64")]
         fn vrndm_f64_(a: float64x1_t) -> float64x1_t;
     }
     vrndm_f64_(a)
@@ -6191,7 +6191,7 @@ pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t {
 pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.floor.v2f64")]
         fn vrndmq_f64_(a: float64x2_t) -> float64x2_t;
     }
     vrndmq_f64_(a)
@@ -6207,7 +6207,7 @@ pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.ceil.v2f32")]
         fn vrndp_f32_(a: float32x2_t) -> float32x2_t;
     }
     vrndp_f32_(a)
@@ -6223,7 +6223,7 @@ pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t {
 pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.ceil.v4f32")]
         fn vrndpq_f32_(a: float32x4_t) -> float32x4_t;
     }
     vrndpq_f32_(a)
@@ -6239,7 +6239,7 @@ pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
 pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.ceil.v1f64")]
         fn vrndp_f64_(a: float64x1_t) -> float64x1_t;
     }
     vrndp_f64_(a)
@@ -6255,7 +6255,7 @@ pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t {
 pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.ceil.v2f64")]
         fn vrndpq_f64_(a: float64x2_t) -> float64x2_t;
     }
     vrndpq_f64_(a)
@@ -6271,7 +6271,7 @@ pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.trunc.v2f32")]
         fn vrnd_f32_(a: float32x2_t) -> float32x2_t;
     }
     vrnd_f32_(a)
@@ -6287,7 +6287,7 @@ pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t {
 pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.trunc.v4f32")]
         fn vrndq_f32_(a: float32x4_t) -> float32x4_t;
     }
     vrndq_f32_(a)
@@ -6303,7 +6303,7 @@ pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t {
 pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.trunc.v1f64")]
         fn vrnd_f64_(a: float64x1_t) -> float64x1_t;
     }
     vrnd_f64_(a)
@@ -6319,7 +6319,7 @@ pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t {
 pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.trunc.v2f64")]
         fn vrndq_f64_(a: float64x2_t) -> float64x2_t;
     }
     vrndq_f64_(a)
@@ -6335,7 +6335,7 @@ pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.nearbyint.v2f32")]
         fn vrndi_f32_(a: float32x2_t) -> float32x2_t;
     }
     vrndi_f32_(a)
@@ -6351,7 +6351,7 @@ pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t {
 pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.nearbyint.v4f32")]
         fn vrndiq_f32_(a: float32x4_t) -> float32x4_t;
     }
     vrndiq_f32_(a)
@@ -6367,7 +6367,7 @@ pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
 pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.nearbyint.v1f64")]
         fn vrndi_f64_(a: float64x1_t) -> float64x1_t;
     }
     vrndi_f64_(a)
@@ -6383,7 +6383,7 @@ pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t {
 pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.nearbyint.v2f64")]
         fn vrndiq_f64_(a: float64x2_t) -> float64x2_t;
     }
     vrndiq_f64_(a)
@@ -6451,7 +6451,7 @@ pub unsafe fn vqaddh_u16(a: u16, b: u16) -> u16 {
 pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.i32")]
         fn vqadds_u32_(a: u32, b: u32) -> u32;
     }
     vqadds_u32_(a, b)
@@ -6467,7 +6467,7 @@ pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
 pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.i64")]
         fn vqaddd_u64_(a: u64, b: u64) -> u64;
     }
     vqaddd_u64_(a, b)
@@ -6483,7 +6483,7 @@ pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 {
 pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.i32")]
         fn vqadds_s32_(a: i32, b: i32) -> i32;
     }
     vqadds_s32_(a, b)
@@ -6499,7 +6499,7 @@ pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
 pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.i64")]
         fn vqaddd_s64_(a: i64, b: i64) -> i64;
     }
     vqaddd_s64_(a, b)
@@ -6515,7 +6515,7 @@ pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
 pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v1f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v1f64.p0f64")]
         fn vld1_f64_x2_(a: *const f64) -> float64x1x2_t;
     }
     vld1_f64_x2_(a)
@@ -6531,7 +6531,7 @@ pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
 pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v2f64.p0f64")]
         fn vld1q_f64_x2_(a: *const f64) -> float64x2x2_t;
     }
     vld1q_f64_x2_(a)
@@ -6547,7 +6547,7 @@ pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
 pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v1f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v1f64.p0f64")]
         fn vld1_f64_x3_(a: *const f64) -> float64x1x3_t;
     }
     vld1_f64_x3_(a)
@@ -6563,7 +6563,7 @@ pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
 pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v2f64.p0f64")]
         fn vld1q_f64_x3_(a: *const f64) -> float64x2x3_t;
     }
     vld1q_f64_x3_(a)
@@ -6579,7 +6579,7 @@ pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
 pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v1f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v1f64.p0f64")]
         fn vld1_f64_x4_(a: *const f64) -> float64x1x4_t;
     }
     vld1_f64_x4_(a)
@@ -6595,7 +6595,7 @@ pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
 pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v2f64.p0f64")]
         fn vld1q_f64_x4_(a: *const f64) -> float64x2x4_t;
     }
     vld1q_f64_x4_(a)
@@ -6611,7 +6611,7 @@ pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
 pub unsafe fn vld2q_s64(a: *const i64) -> int64x2x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v2i64.p0v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v2i64.p0v2i64")]
         fn vld2q_s64_(ptr: *const int64x2_t) -> int64x2x2_t;
     }
     vld2q_s64_(a as _)
@@ -6649,7 +6649,7 @@ pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t {
 pub unsafe fn vld2_f64(a: *const f64) -> float64x1x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v1f64.p0v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v1f64.p0v1f64")]
         fn vld2_f64_(ptr: *const float64x1_t) -> float64x1x2_t;
     }
     vld2_f64_(a as _)
@@ -6665,7 +6665,7 @@ pub unsafe fn vld2_f64(a: *const f64) -> float64x1x2_t {
 pub unsafe fn vld2q_f64(a: *const f64) -> float64x2x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v2f64.p0v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v2f64.p0v2f64")]
         fn vld2q_f64_(ptr: *const float64x2_t) -> float64x2x2_t;
     }
     vld2q_f64_(a as _)
@@ -6681,7 +6681,7 @@ pub unsafe fn vld2q_f64(a: *const f64) -> float64x2x2_t {
 pub unsafe fn vld2q_dup_s64(a: *const i64) -> int64x2x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v2i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v2i64.p0i64")]
         fn vld2q_dup_s64_(ptr: *const i64) -> int64x2x2_t;
     }
     vld2q_dup_s64_(a as _)
@@ -6719,7 +6719,7 @@ pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t {
 pub unsafe fn vld2_dup_f64(a: *const f64) -> float64x1x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v1f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v1f64.p0f64")]
         fn vld2_dup_f64_(ptr: *const f64) -> float64x1x2_t;
     }
     vld2_dup_f64_(a as _)
@@ -6735,7 +6735,7 @@ pub unsafe fn vld2_dup_f64(a: *const f64) -> float64x1x2_t {
 pub unsafe fn vld2q_dup_f64(a: *const f64) -> float64x2x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v2f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v2f64.p0f64")]
         fn vld2q_dup_f64_(ptr: *const f64) -> float64x2x2_t;
     }
     vld2q_dup_f64_(a as _)
@@ -6753,7 +6753,7 @@ pub unsafe fn vld2q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x2_t) -> in
     static_assert_uimm_bits!(LANE, 4);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v16i8.p0i8")]
         fn vld2q_lane_s8_(a: int8x16_t, b: int8x16_t, n: i64, ptr: *const i8) -> int8x16x2_t;
     }
     vld2q_lane_s8_(b.0, b.1, LANE as i64, a as _)
@@ -6771,7 +6771,7 @@ pub unsafe fn vld2_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x2_t) -> i
     static_assert!(LANE == 0);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v1i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v1i64.p0i8")]
         fn vld2_lane_s64_(a: int64x1_t, b: int64x1_t, n: i64, ptr: *const i8) -> int64x1x2_t;
     }
     vld2_lane_s64_(b.0, b.1, LANE as i64, a as _)
@@ -6789,7 +6789,7 @@ pub unsafe fn vld2q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x2_t) ->
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v2i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v2i64.p0i8")]
         fn vld2q_lane_s64_(a: int64x2_t, b: int64x2_t, n: i64, ptr: *const i8) -> int64x2x2_t;
     }
     vld2q_lane_s64_(b.0, b.1, LANE as i64, a as _)
@@ -6885,7 +6885,7 @@ pub unsafe fn vld2_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x2_t) ->
     static_assert!(LANE == 0);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v1f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v1f64.p0i8")]
         fn vld2_lane_f64_(a: float64x1_t, b: float64x1_t, n: i64, ptr: *const i8) -> float64x1x2_t;
     }
     vld2_lane_f64_(b.0, b.1, LANE as i64, a as _)
@@ -6903,7 +6903,7 @@ pub unsafe fn vld2q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x2_t) -
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v2f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v2f64.p0i8")]
         fn vld2q_lane_f64_(a: float64x2_t, b: float64x2_t, n: i64, ptr: *const i8) -> float64x2x2_t;
     }
     vld2q_lane_f64_(b.0, b.1, LANE as i64, a as _)
@@ -6919,7 +6919,7 @@ pub unsafe fn vld2q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x2_t) -
 pub unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v2i64.p0v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v2i64.p0v2i64")]
         fn vld3q_s64_(ptr: *const int64x2_t) -> int64x2x3_t;
     }
     vld3q_s64_(a as _)
@@ -6957,7 +6957,7 @@ pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t {
 pub unsafe fn vld3_f64(a: *const f64) -> float64x1x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v1f64.p0v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v1f64.p0v1f64")]
         fn vld3_f64_(ptr: *const float64x1_t) -> float64x1x3_t;
     }
     vld3_f64_(a as _)
@@ -6973,7 +6973,7 @@ pub unsafe fn vld3_f64(a: *const f64) -> float64x1x3_t {
 pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v2f64.p0v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v2f64.p0v2f64")]
         fn vld3q_f64_(ptr: *const float64x2_t) -> float64x2x3_t;
     }
     vld3q_f64_(a as _)
@@ -6989,7 +6989,7 @@ pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
 pub unsafe fn vld3q_dup_s64(a: *const i64) -> int64x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v2i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v2i64.p0i64")]
         fn vld3q_dup_s64_(ptr: *const i64) -> int64x2x3_t;
     }
     vld3q_dup_s64_(a as _)
@@ -7027,7 +7027,7 @@ pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t {
 pub unsafe fn vld3_dup_f64(a: *const f64) -> float64x1x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v1f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v1f64.p0f64")]
         fn vld3_dup_f64_(ptr: *const f64) -> float64x1x3_t;
     }
     vld3_dup_f64_(a as _)
@@ -7043,7 +7043,7 @@ pub unsafe fn vld3_dup_f64(a: *const f64) -> float64x1x3_t {
 pub unsafe fn vld3q_dup_f64(a: *const f64) -> float64x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v2f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v2f64.p0f64")]
         fn vld3q_dup_f64_(ptr: *const f64) -> float64x2x3_t;
     }
     vld3q_dup_f64_(a as _)
@@ -7061,7 +7061,7 @@ pub unsafe fn vld3q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x3_t) -> in
     static_assert_uimm_bits!(LANE, 4);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v16i8.p0i8")]
         fn vld3q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *const i8) -> int8x16x3_t;
     }
     vld3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -7079,7 +7079,7 @@ pub unsafe fn vld3_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x3_t) -> i
     static_assert!(LANE == 0);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v1i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v1i64.p0i8")]
         fn vld3_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *const i8) -> int64x1x3_t;
     }
     vld3_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -7097,7 +7097,7 @@ pub unsafe fn vld3q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x3_t) ->
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v2i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v2i64.p0i8")]
         fn vld3q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *const i8) -> int64x2x3_t;
     }
     vld3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -7193,7 +7193,7 @@ pub unsafe fn vld3_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x3_t) ->
     static_assert!(LANE == 0);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v1f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v1f64.p0i8")]
         fn vld3_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *const i8) -> float64x1x3_t;
     }
     vld3_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -7211,7 +7211,7 @@ pub unsafe fn vld3q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x3_t) -
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v2f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v2f64.p0i8")]
         fn vld3q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *const i8) -> float64x2x3_t;
     }
     vld3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -7227,7 +7227,7 @@ pub unsafe fn vld3q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x3_t) -
 pub unsafe fn vld4q_s64(a: *const i64) -> int64x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v2i64.p0v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v2i64.p0v2i64")]
         fn vld4q_s64_(ptr: *const int64x2_t) -> int64x2x4_t;
     }
     vld4q_s64_(a as _)
@@ -7265,7 +7265,7 @@ pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t {
 pub unsafe fn vld4_f64(a: *const f64) -> float64x1x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v1f64.p0v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v1f64.p0v1f64")]
         fn vld4_f64_(ptr: *const float64x1_t) -> float64x1x4_t;
     }
     vld4_f64_(a as _)
@@ -7281,7 +7281,7 @@ pub unsafe fn vld4_f64(a: *const f64) -> float64x1x4_t {
 pub unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v2f64.p0v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v2f64.p0v2f64")]
         fn vld4q_f64_(ptr: *const float64x2_t) -> float64x2x4_t;
     }
     vld4q_f64_(a as _)
@@ -7297,7 +7297,7 @@ pub unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t {
 pub unsafe fn vld4q_dup_s64(a: *const i64) -> int64x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v2i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v2i64.p0i64")]
         fn vld4q_dup_s64_(ptr: *const i64) -> int64x2x4_t;
     }
     vld4q_dup_s64_(a as _)
@@ -7335,7 +7335,7 @@ pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t {
 pub unsafe fn vld4_dup_f64(a: *const f64) -> float64x1x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v1f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v1f64.p0f64")]
         fn vld4_dup_f64_(ptr: *const f64) -> float64x1x4_t;
     }
     vld4_dup_f64_(a as _)
@@ -7351,7 +7351,7 @@ pub unsafe fn vld4_dup_f64(a: *const f64) -> float64x1x4_t {
 pub unsafe fn vld4q_dup_f64(a: *const f64) -> float64x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v2f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v2f64.p0f64")]
         fn vld4q_dup_f64_(ptr: *const f64) -> float64x2x4_t;
     }
     vld4q_dup_f64_(a as _)
@@ -7369,7 +7369,7 @@ pub unsafe fn vld4q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x4_t) -> in
     static_assert_uimm_bits!(LANE, 4);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v16i8.p0i8")]
         fn vld4q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, n: i64, ptr: *const i8) -> int8x16x4_t;
     }
     vld4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -7387,7 +7387,7 @@ pub unsafe fn vld4_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x4_t) -> i
     static_assert!(LANE == 0);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v1i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v1i64.p0i8")]
         fn vld4_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, n: i64, ptr: *const i8) -> int64x1x4_t;
     }
     vld4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -7405,7 +7405,7 @@ pub unsafe fn vld4q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x4_t) ->
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v2i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v2i64.p0i8")]
         fn vld4q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, n: i64, ptr: *const i8) -> int64x2x4_t;
     }
     vld4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -7501,7 +7501,7 @@ pub unsafe fn vld4_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x4_t) ->
     static_assert!(LANE == 0);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v1f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v1f64.p0i8")]
         fn vld4_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, n: i64, ptr: *const i8) -> float64x1x4_t;
     }
     vld4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -7519,7 +7519,7 @@ pub unsafe fn vld4q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x4_t) -
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v2f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v2f64.p0i8")]
         fn vld4q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, n: i64, ptr: *const i8) -> float64x2x4_t;
     }
     vld4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -7561,7 +7561,7 @@ pub unsafe fn vst1q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2_t) {
 pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v1f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v1f64.p0f64")]
         fn vst1_f64_x2_(a: float64x1_t, b: float64x1_t, ptr: *mut f64);
     }
     vst1_f64_x2_(b.0, b.1, a)
@@ -7577,7 +7577,7 @@ pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) {
 pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v2f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v2f64.p0f64")]
         fn vst1q_f64_x2_(a: float64x2_t, b: float64x2_t, ptr: *mut f64);
     }
     vst1q_f64_x2_(b.0, b.1, a)
@@ -7593,7 +7593,7 @@ pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) {
 pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v1f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v1f64.p0f64")]
         fn vst1_f64_x3_(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut f64);
     }
     vst1_f64_x3_(b.0, b.1, b.2, a)
@@ -7609,7 +7609,7 @@ pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) {
 pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v2f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v2f64.p0f64")]
         fn vst1q_f64_x3_(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut f64);
     }
     vst1q_f64_x3_(b.0, b.1, b.2, a)
@@ -7625,7 +7625,7 @@ pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) {
 pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v1f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v1f64.p0f64")]
         fn vst1_f64_x4_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut f64);
     }
     vst1_f64_x4_(b.0, b.1, b.2, b.3, a)
@@ -7641,7 +7641,7 @@ pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) {
 pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v2f64.p0f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v2f64.p0f64")]
         fn vst1q_f64_x4_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut f64);
     }
     vst1q_f64_x4_(b.0, b.1, b.2, b.3, a)
@@ -7657,7 +7657,7 @@ pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) {
 pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v2i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v2i64.p0i8")]
         fn vst2q_s64_(a: int64x2_t, b: int64x2_t, ptr: *mut i8);
     }
     vst2q_s64_(b.0, b.1, a as _)
@@ -7695,7 +7695,7 @@ pub unsafe fn vst2q_p64(a: *mut p64, b: poly64x2x2_t) {
 pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v1f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v1f64.p0i8")]
         fn vst2_f64_(a: float64x1_t, b: float64x1_t, ptr: *mut i8);
     }
     vst2_f64_(b.0, b.1, a as _)
@@ -7711,7 +7711,7 @@ pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
 pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v2f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v2f64.p0i8")]
         fn vst2q_f64_(a: float64x2_t, b: float64x2_t, ptr: *mut i8);
     }
     vst2q_f64_(b.0, b.1, a as _)
@@ -7729,7 +7729,7 @@ pub unsafe fn vst2q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x2_t) {
     static_assert_uimm_bits!(LANE, 4);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v16i8.p0i8")]
         fn vst2q_lane_s8_(a: int8x16_t, b: int8x16_t, n: i64, ptr: *mut i8);
     }
     vst2q_lane_s8_(b.0, b.1, LANE as i64, a as _)
@@ -7747,7 +7747,7 @@ pub unsafe fn vst2_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x2_t) {
     static_assert!(LANE == 0);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v1i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v1i64.p0i8")]
         fn vst2_lane_s64_(a: int64x1_t, b: int64x1_t, n: i64, ptr: *mut i8);
     }
     vst2_lane_s64_(b.0, b.1, LANE as i64, a as _)
@@ -7765,7 +7765,7 @@ pub unsafe fn vst2q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x2_t) {
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v2i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v2i64.p0i8")]
         fn vst2q_lane_s64_(a: int64x2_t, b: int64x2_t, n: i64, ptr: *mut i8);
     }
     vst2q_lane_s64_(b.0, b.1, LANE as i64, a as _)
@@ -7861,7 +7861,7 @@ pub unsafe fn vst2_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x2_t) {
     static_assert!(LANE == 0);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v1f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v1f64.p0i8")]
         fn vst2_lane_f64_(a: float64x1_t, b: float64x1_t, n: i64, ptr: *mut i8);
     }
     vst2_lane_f64_(b.0, b.1, LANE as i64, a as _)
@@ -7879,7 +7879,7 @@ pub unsafe fn vst2q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x2_t) {
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v2f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v2f64.p0i8")]
         fn vst2q_lane_f64_(a: float64x2_t, b: float64x2_t, n: i64, ptr: *mut i8);
     }
     vst2q_lane_f64_(b.0, b.1, LANE as i64, a as _)
@@ -7895,7 +7895,7 @@ pub unsafe fn vst2q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x2_t) {
 pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v2i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v2i64.p0i8")]
         fn vst3q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i8);
     }
     vst3q_s64_(b.0, b.1, b.2, a as _)
@@ -7933,7 +7933,7 @@ pub unsafe fn vst3q_p64(a: *mut p64, b: poly64x2x3_t) {
 pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v1f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v1f64.p0i8")]
         fn vst3_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut i8);
     }
     vst3_f64_(b.0, b.1, b.2, a as _)
@@ -7949,7 +7949,7 @@ pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
 pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v2f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v2f64.p0i8")]
         fn vst3q_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut i8);
     }
     vst3q_f64_(b.0, b.1, b.2, a as _)
@@ -7967,7 +7967,7 @@ pub unsafe fn vst3q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x3_t) {
     static_assert_uimm_bits!(LANE, 4);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v16i8.p0i8")]
         fn vst3q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *mut i8);
     }
     vst3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -7985,7 +7985,7 @@ pub unsafe fn vst3_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x3_t) {
     static_assert!(LANE == 0);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v1i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v1i64.p0i8")]
         fn vst3_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *mut i8);
     }
     vst3_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -8003,7 +8003,7 @@ pub unsafe fn vst3q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x3_t) {
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v2i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v2i64.p0i8")]
         fn vst3q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *mut i8);
     }
     vst3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -8099,7 +8099,7 @@ pub unsafe fn vst3_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x3_t) {
     static_assert!(LANE == 0);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v1f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v1f64.p0i8")]
         fn vst3_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *mut i8);
     }
     vst3_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -8117,7 +8117,7 @@ pub unsafe fn vst3q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x3_t) {
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v2f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v2f64.p0i8")]
         fn vst3q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *mut i8);
     }
     vst3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -8133,7 +8133,7 @@ pub unsafe fn vst3q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x3_t) {
 pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v2i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v2i64.p0i8")]
         fn vst4q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i8);
     }
     vst4q_s64_(b.0, b.1, b.2, b.3, a as _)
@@ -8171,7 +8171,7 @@ pub unsafe fn vst4q_p64(a: *mut p64, b: poly64x2x4_t) {
 pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v1f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v1f64.p0i8")]
         fn vst4_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut i8);
     }
     vst4_f64_(b.0, b.1, b.2, b.3, a as _)
@@ -8187,7 +8187,7 @@ pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
 pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v2f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v2f64.p0i8")]
         fn vst4q_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut i8);
     }
     vst4q_f64_(b.0, b.1, b.2, b.3, a as _)
@@ -8205,7 +8205,7 @@ pub unsafe fn vst4q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x4_t) {
     static_assert_uimm_bits!(LANE, 4);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v16i8.p0i8")]
         fn vst4q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, n: i64, ptr: *mut i8);
     }
     vst4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -8223,7 +8223,7 @@ pub unsafe fn vst4_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x4_t) {
     static_assert!(LANE == 0);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v1i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v1i64.p0i8")]
         fn vst4_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, n: i64, ptr: *mut i8);
     }
     vst4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -8241,7 +8241,7 @@ pub unsafe fn vst4q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v2i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v2i64.p0i8")]
         fn vst4q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, n: i64, ptr: *mut i8);
     }
     vst4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -8337,7 +8337,7 @@ pub unsafe fn vst4_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x4_t) {
     static_assert!(LANE == 0);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v1f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v1f64.p0i8")]
         fn vst4_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, n: i64, ptr: *mut i8);
     }
     vst4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -8355,7 +8355,7 @@ pub unsafe fn vst4q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v2f64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v2f64.p0i8")]
         fn vst4q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, n: i64, ptr: *mut i8);
     }
     vst4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -8661,7 +8661,7 @@ pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
 pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.pmull64")]
         fn vmull_p64_(a: p64, b: p64) -> int8x16_t;
     }
     transmute(vmull_p64_(a, b))
@@ -8849,7 +8849,7 @@ pub unsafe fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t
 pub unsafe fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmulx.v2f32")]
         fn vmulx_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
     vmulx_f32_(a, b)
@@ -8865,7 +8865,7 @@ pub unsafe fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 pub unsafe fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmulx.v4f32")]
         fn vmulxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
     vmulxq_f32_(a, b)
@@ -8881,7 +8881,7 @@ pub unsafe fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 pub unsafe fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmulx.v1f64")]
         fn vmulx_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
     vmulx_f64_(a, b)
@@ -8897,7 +8897,7 @@ pub unsafe fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 pub unsafe fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmulx.v2f64")]
         fn vmulxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     vmulxq_f64_(a, b)
@@ -9017,7 +9017,7 @@ pub unsafe fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t)
 pub unsafe fn vmulxs_f32(a: f32, b: f32) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmulx.f32")]
         fn vmulxs_f32_(a: f32, b: f32) -> f32;
     }
     vmulxs_f32_(a, b)
@@ -9033,7 +9033,7 @@ pub unsafe fn vmulxs_f32(a: f32, b: f32) -> f32 {
 pub unsafe fn vmulxd_f64(a: f64, b: f64) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmulx.f64")]
         fn vmulxd_f64_(a: f64, b: f64) -> f64;
     }
     vmulxd_f64_(a, b)
@@ -9101,7 +9101,7 @@ pub unsafe fn vmulxd_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
 pub unsafe fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.v1f64")]
         fn vfma_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t;
     }
     vfma_f64_(b, c, a)
@@ -9117,7 +9117,7 @@ pub unsafe fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float6
 pub unsafe fn vfmaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.v2f64")]
         fn vfmaq_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
     }
     vfmaq_f64_(b, c, a)
@@ -9260,7 +9260,7 @@ pub unsafe fn vfmaq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t, c
 pub unsafe fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.f32")]
         fn vfmas_lane_f32_(a: f32, b: f32, c: f32) -> f32;
     }
     static_assert_uimm_bits!(LANE, 1);
@@ -9279,7 +9279,7 @@ pub unsafe fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) ->
 pub unsafe fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.f32")]
         fn vfmas_laneq_f32_(a: f32, b: f32, c: f32) -> f32;
     }
     static_assert_uimm_bits!(LANE, 2);
@@ -9298,7 +9298,7 @@ pub unsafe fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -
 pub unsafe fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.f64")]
         fn vfmad_lane_f64_(a: f64, b: f64, c: f64) -> f64;
     }
     static_assert!(LANE == 0);
@@ -9317,7 +9317,7 @@ pub unsafe fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) ->
 pub unsafe fn vfmad_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.f64")]
         fn vfmad_laneq_f64_(a: f64, b: f64, c: f64) -> f64;
     }
     static_assert_uimm_bits!(LANE, 1);
@@ -9643,7 +9643,7 @@ pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
 pub unsafe fn vaddv_f32(a: float32x2_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddv.f32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.faddv.f32.v2f32")]
         fn vaddv_f32_(a: float32x2_t) -> f32;
     }
     vaddv_f32_(a)
@@ -9659,7 +9659,7 @@ pub unsafe fn vaddv_f32(a: float32x2_t) -> f32 {
 pub unsafe fn vaddvq_f32(a: float32x4_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddv.f32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.faddv.f32.v4f32")]
         fn vaddvq_f32_(a: float32x4_t) -> f32;
     }
     vaddvq_f32_(a)
@@ -9675,7 +9675,7 @@ pub unsafe fn vaddvq_f32(a: float32x4_t) -> f32 {
 pub unsafe fn vaddvq_f64(a: float64x2_t) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddv.f64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.faddv.f64.v2f64")]
         fn vaddvq_f64_(a: float64x2_t) -> f64;
     }
     vaddvq_f64_(a)
@@ -9691,7 +9691,7 @@ pub unsafe fn vaddvq_f64(a: float64x2_t) -> f64 {
 pub unsafe fn vaddlv_s16(a: int16x4_t) -> i32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.saddlv.i32.v4i16")]
         fn vaddlv_s16_(a: int16x4_t) -> i32;
     }
     vaddlv_s16_(a)
@@ -9707,7 +9707,7 @@ pub unsafe fn vaddlv_s16(a: int16x4_t) -> i32 {
 pub unsafe fn vaddlvq_s16(a: int16x8_t) -> i32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.saddlv.i32.v8i16")]
         fn vaddlvq_s16_(a: int16x8_t) -> i32;
     }
     vaddlvq_s16_(a)
@@ -9723,7 +9723,7 @@ pub unsafe fn vaddlvq_s16(a: int16x8_t) -> i32 {
 pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i64.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.saddlv.i64.v2i32")]
         fn vaddlv_s32_(a: int32x2_t) -> i64;
     }
     vaddlv_s32_(a)
@@ -9739,7 +9739,7 @@ pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
 pub unsafe fn vaddlvq_s32(a: int32x4_t) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i64.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.saddlv.i64.v4i32")]
         fn vaddlvq_s32_(a: int32x4_t) -> i64;
     }
     vaddlvq_s32_(a)
@@ -9755,7 +9755,7 @@ pub unsafe fn vaddlvq_s32(a: int32x4_t) -> i64 {
 pub unsafe fn vaddlv_u16(a: uint16x4_t) -> u32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i32.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uaddlv.i32.v4i16")]
         fn vaddlv_u16_(a: uint16x4_t) -> u32;
     }
     vaddlv_u16_(a)
@@ -9771,7 +9771,7 @@ pub unsafe fn vaddlv_u16(a: uint16x4_t) -> u32 {
 pub unsafe fn vaddlvq_u16(a: uint16x8_t) -> u32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i32.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uaddlv.i32.v8i16")]
         fn vaddlvq_u16_(a: uint16x8_t) -> u32;
     }
     vaddlvq_u16_(a)
@@ -9787,7 +9787,7 @@ pub unsafe fn vaddlvq_u16(a: uint16x8_t) -> u32 {
 pub unsafe fn vaddlv_u32(a: uint32x2_t) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i64.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uaddlv.i64.v2i32")]
         fn vaddlv_u32_(a: uint32x2_t) -> u64;
     }
     vaddlv_u32_(a)
@@ -9803,7 +9803,7 @@ pub unsafe fn vaddlv_u32(a: uint32x2_t) -> u64 {
 pub unsafe fn vaddlvq_u32(a: uint32x4_t) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i64.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uaddlv.i64.v4i32")]
         fn vaddlvq_u32_(a: uint32x4_t) -> u64;
     }
     vaddlvq_u32_(a)
@@ -9981,7 +9981,7 @@ pub unsafe fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
 pub unsafe fn vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxs.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxs.v16i8")]
         fn vbcaxq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
     }
     vbcaxq_s8_(a, b, c)
@@ -9997,7 +9997,7 @@ pub unsafe fn vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
 pub unsafe fn vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxs.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxs.v8i16")]
         fn vbcaxq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
     }
     vbcaxq_s16_(a, b, c)
@@ -10013,7 +10013,7 @@ pub unsafe fn vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t
 pub unsafe fn vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxs.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxs.v4i32")]
         fn vbcaxq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
     }
     vbcaxq_s32_(a, b, c)
@@ -10029,7 +10029,7 @@ pub unsafe fn vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t
 pub unsafe fn vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxs.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxs.v2i64")]
         fn vbcaxq_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
     }
     vbcaxq_s64_(a, b, c)
@@ -10045,7 +10045,7 @@ pub unsafe fn vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t
 pub unsafe fn vbcaxq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxu.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxu.v16i8")]
         fn vbcaxq_u8_(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t;
     }
     vbcaxq_u8_(a, b, c)
@@ -10061,7 +10061,7 @@ pub unsafe fn vbcaxq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16
 pub unsafe fn vbcaxq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxu.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxu.v8i16")]
         fn vbcaxq_u16_(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t;
     }
     vbcaxq_u16_(a, b, c)
@@ -10077,7 +10077,7 @@ pub unsafe fn vbcaxq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x
 pub unsafe fn vbcaxq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxu.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxu.v4i32")]
         fn vbcaxq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
     }
     vbcaxq_u32_(a, b, c)
@@ -10093,7 +10093,7 @@ pub unsafe fn vbcaxq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x
 pub unsafe fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxu.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxu.v2i64")]
         fn vbcaxq_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
     }
     vbcaxq_u64_(a, b, c)
@@ -10109,7 +10109,7 @@ pub unsafe fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x
 pub unsafe fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot270.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcadd.rot270.v2f32")]
         fn vcadd_rot270_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
     vcadd_rot270_f32_(a, b)
@@ -10125,7 +10125,7 @@ pub unsafe fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 pub unsafe fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot270.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcadd.rot270.v4f32")]
         fn vcaddq_rot270_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
     vcaddq_rot270_f32_(a, b)
@@ -10141,7 +10141,7 @@ pub unsafe fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 pub unsafe fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot270.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcadd.rot270.v2f64")]
         fn vcaddq_rot270_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     vcaddq_rot270_f64_(a, b)
@@ -10157,7 +10157,7 @@ pub unsafe fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 pub unsafe fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot90.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcadd.rot90.v2f32")]
         fn vcadd_rot90_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
     vcadd_rot90_f32_(a, b)
@@ -10173,7 +10173,7 @@ pub unsafe fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 pub unsafe fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot90.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcadd.rot90.v4f32")]
         fn vcaddq_rot90_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
     vcaddq_rot90_f32_(a, b)
@@ -10189,7 +10189,7 @@ pub unsafe fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 pub unsafe fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot90.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcadd.rot90.v2f64")]
         fn vcaddq_rot90_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     vcaddq_rot90_f64_(a, b)
@@ -10205,7 +10205,7 @@ pub unsafe fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 pub unsafe fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32")]
         fn vcmla_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
     }
     vcmla_f32_(a, b, c)
@@ -10221,7 +10221,7 @@ pub unsafe fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float
 pub unsafe fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32")]
         fn vcmlaq_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
     }
     vcmlaq_f32_(a, b, c)
@@ -10237,7 +10237,7 @@ pub unsafe fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> floa
 pub unsafe fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot0.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot0.v2f64")]
         fn vcmlaq_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
     }
     vcmlaq_f64_(a, b, c)
@@ -10253,7 +10253,7 @@ pub unsafe fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> floa
 pub unsafe fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot90.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot90.v2f32")]
         fn vcmla_rot90_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
     }
     vcmla_rot90_f32_(a, b, c)
@@ -10269,7 +10269,7 @@ pub unsafe fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) ->
 pub unsafe fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot90.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot90.v4f32")]
         fn vcmlaq_rot90_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
     }
     vcmlaq_rot90_f32_(a, b, c)
@@ -10285,7 +10285,7 @@ pub unsafe fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -
 pub unsafe fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot90.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot90.v2f64")]
         fn vcmlaq_rot90_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
     }
     vcmlaq_rot90_f64_(a, b, c)
@@ -10301,7 +10301,7 @@ pub unsafe fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -
 pub unsafe fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot180.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot180.v2f32")]
         fn vcmla_rot180_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
     }
     vcmla_rot180_f32_(a, b, c)
@@ -10317,7 +10317,7 @@ pub unsafe fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -
 pub unsafe fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot180.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot180.v4f32")]
         fn vcmlaq_rot180_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
     }
     vcmlaq_rot180_f32_(a, b, c)
@@ -10333,7 +10333,7 @@ pub unsafe fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t)
 pub unsafe fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot180.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot180.v2f64")]
         fn vcmlaq_rot180_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
     }
     vcmlaq_rot180_f64_(a, b, c)
@@ -10349,7 +10349,7 @@ pub unsafe fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t)
 pub unsafe fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot270.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot270.v2f32")]
         fn vcmla_rot270_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
     }
     vcmla_rot270_f32_(a, b, c)
@@ -10365,7 +10365,7 @@ pub unsafe fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -
 pub unsafe fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot270.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot270.v4f32")]
         fn vcmlaq_rot270_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
     }
     vcmlaq_rot270_f32_(a, b, c)
@@ -10381,7 +10381,7 @@ pub unsafe fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t)
 pub unsafe fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot270.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot270.v2f64")]
         fn vcmlaq_rot270_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
     }
     vcmlaq_rot270_f64_(a, b, c)
@@ -10681,7 +10681,7 @@ pub unsafe fn vdotq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c:
 pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmax.v1f64")]
         fn vmax_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
     vmax_f64_(a, b)
@@ -10697,7 +10697,7 @@ pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmax.v2f64")]
         fn vmaxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     vmaxq_f64_(a, b)
@@ -10713,7 +10713,7 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnm.v1f64")]
         fn vmaxnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
     vmaxnm_f64_(a, b)
@@ -10729,7 +10729,7 @@ pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnm.v2f64")]
         fn vmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     vmaxnmq_f64_(a, b)
@@ -10745,7 +10745,7 @@ pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 pub unsafe fn vmaxnmv_f32(a: float32x2_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32")]
         fn vmaxnmv_f32_(a: float32x2_t) -> f32;
     }
     vmaxnmv_f32_(a)
@@ -10761,7 +10761,7 @@ pub unsafe fn vmaxnmv_f32(a: float32x2_t) -> f32 {
 pub unsafe fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64")]
         fn vmaxnmvq_f64_(a: float64x2_t) -> f64;
     }
     vmaxnmvq_f64_(a)
@@ -10777,7 +10777,7 @@ pub unsafe fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
 pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmv.f32.v4f32")]
         fn vmaxnmvq_f32_(a: float32x4_t) -> f32;
     }
     vmaxnmvq_f32_(a)
@@ -10793,7 +10793,7 @@ pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
 pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmp.v2f32")]
         fn vpmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
     vpmaxnm_f32_(a, b)
@@ -10809,7 +10809,7 @@ pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmp.v2f64")]
         fn vpmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     vpmaxnmq_f64_(a, b)
@@ -10825,7 +10825,7 @@ pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmp.v4f32")]
         fn vpmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
     vpmaxnmq_f32_(a, b)
@@ -10841,7 +10841,7 @@ pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 pub unsafe fn vpmaxnms_f32(a: float32x2_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32")]
         fn vpmaxnms_f32_(a: float32x2_t) -> f32;
     }
     vpmaxnms_f32_(a)
@@ -10857,7 +10857,7 @@ pub unsafe fn vpmaxnms_f32(a: float32x2_t) -> f32 {
 pub unsafe fn vpmaxnmqd_f64(a: float64x2_t) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64")]
         fn vpmaxnmqd_f64_(a: float64x2_t) -> f64;
     }
     vpmaxnmqd_f64_(a)
@@ -10873,7 +10873,7 @@ pub unsafe fn vpmaxnmqd_f64(a: float64x2_t) -> f64 {
 pub unsafe fn vpmaxs_f32(a: float32x2_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32")]
         fn vpmaxs_f32_(a: float32x2_t) -> f32;
     }
     vpmaxs_f32_(a)
@@ -10889,7 +10889,7 @@ pub unsafe fn vpmaxs_f32(a: float32x2_t) -> f32 {
 pub unsafe fn vpmaxqd_f64(a: float64x2_t) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64")]
         fn vpmaxqd_f64_(a: float64x2_t) -> f64;
     }
     vpmaxqd_f64_(a)
@@ -10905,7 +10905,7 @@ pub unsafe fn vpmaxqd_f64(a: float64x2_t) -> f64 {
 pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmin.v1f64")]
         fn vmin_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
     vmin_f64_(a, b)
@@ -10921,7 +10921,7 @@ pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmin.v2f64")]
         fn vminq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     vminq_f64_(a, b)
@@ -10937,7 +10937,7 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnm.v1f64")]
         fn vminnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
     vminnm_f64_(a, b)
@@ -10953,7 +10953,7 @@ pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnm.v2f64")]
         fn vminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     vminnmq_f64_(a, b)
@@ -10969,7 +10969,7 @@ pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 pub unsafe fn vminnmv_f32(a: float32x2_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32")]
         fn vminnmv_f32_(a: float32x2_t) -> f32;
     }
     vminnmv_f32_(a)
@@ -10985,7 +10985,7 @@ pub unsafe fn vminnmv_f32(a: float32x2_t) -> f32 {
 pub unsafe fn vminnmvq_f64(a: float64x2_t) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64")]
         fn vminnmvq_f64_(a: float64x2_t) -> f64;
     }
     vminnmvq_f64_(a)
@@ -11001,7 +11001,7 @@ pub unsafe fn vminnmvq_f64(a: float64x2_t) -> f64 {
 pub unsafe fn vminnmvq_f32(a: float32x4_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmv.f32.v4f32")]
         fn vminnmvq_f32_(a: float32x4_t) -> f32;
     }
     vminnmvq_f32_(a)
@@ -11089,7 +11089,7 @@ pub unsafe fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t {
 pub unsafe fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddp.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.faddp.v4f32")]
         fn vpaddq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
     vpaddq_f32_(a, b)
@@ -11105,7 +11105,7 @@ pub unsafe fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 pub unsafe fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddp.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.faddp.v2f64")]
         fn vpaddq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     vpaddq_f64_(a, b)
@@ -11147,7 +11147,7 @@ pub unsafe fn vpaddd_f64(a: float64x2_t) -> f64 {
 pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmp.v2f32")]
         fn vpminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
     vpminnm_f32_(a, b)
@@ -11163,7 +11163,7 @@ pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmp.v2f64")]
         fn vpminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     vpminnmq_f64_(a, b)
@@ -11179,7 +11179,7 @@ pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmp.v4f32")]
         fn vpminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
     vpminnmq_f32_(a, b)
@@ -11195,7 +11195,7 @@ pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 pub unsafe fn vpminnms_f32(a: float32x2_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32")]
         fn vpminnms_f32_(a: float32x2_t) -> f32;
     }
     vpminnms_f32_(a)
@@ -11211,7 +11211,7 @@ pub unsafe fn vpminnms_f32(a: float32x2_t) -> f32 {
 pub unsafe fn vpminnmqd_f64(a: float64x2_t) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64")]
         fn vpminnmqd_f64_(a: float64x2_t) -> f64;
     }
     vpminnmqd_f64_(a)
@@ -11227,7 +11227,7 @@ pub unsafe fn vpminnmqd_f64(a: float64x2_t) -> f64 {
 pub unsafe fn vpmins_f32(a: float32x2_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminv.f32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminv.f32.v2f32")]
         fn vpmins_f32_(a: float32x2_t) -> f32;
     }
     vpmins_f32_(a)
@@ -11243,7 +11243,7 @@ pub unsafe fn vpmins_f32(a: float32x2_t) -> f32 {
 pub unsafe fn vpminqd_f64(a: float64x2_t) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminv.f64.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminv.f64.v2f64")]
         fn vpminqd_f64_(a: float64x2_t) -> f64;
     }
     vpminqd_f64_(a)
@@ -11272,7 +11272,7 @@ pub unsafe fn vqdmullh_s16(a: i16, b: i16) -> i32 {
 pub unsafe fn vqdmulls_s32(a: i32, b: i32) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulls.scalar")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmulls.scalar")]
         fn vqdmulls_s32_(a: i32, b: i32) -> i64;
     }
     vqdmulls_s32_(a, b)
@@ -12058,7 +12058,7 @@ pub unsafe fn vqmovns_u32(a: u32) -> u16 {
 pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64")]
         fn vqmovnd_s64_(a: i64) -> i32;
     }
     vqmovnd_s64_(a)
@@ -12074,7 +12074,7 @@ pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
 pub unsafe fn vqmovnd_u64(a: u64) -> u32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64")]
         fn vqmovnd_u64_(a: u64) -> u32;
     }
     vqmovnd_u64_(a)
@@ -12296,7 +12296,7 @@ pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32
 pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlah.v4i16")]
         fn vqrdmlah_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
     }
     vqrdmlah_s16_(a, b, c)
@@ -12312,7 +12312,7 @@ pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_
 pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlah.v8i16")]
         fn vqrdmlahq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
     }
     vqrdmlahq_s16_(a, b, c)
@@ -12328,7 +12328,7 @@ pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8
 pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlah.v2i32")]
         fn vqrdmlah_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
     }
     vqrdmlah_s32_(a, b, c)
@@ -12344,7 +12344,7 @@ pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_
 pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlah.v4i32")]
         fn vqrdmlahq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
     }
     vqrdmlahq_s32_(a, b, c)
@@ -12552,7 +12552,7 @@ pub unsafe fn vqrdmlahs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t)
 pub unsafe fn vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlsh.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlsh.v4i16")]
         fn vqrdmlsh_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
     }
     vqrdmlsh_s16_(a, b, c)
@@ -12568,7 +12568,7 @@ pub unsafe fn vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_
 pub unsafe fn vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlsh.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlsh.v8i16")]
         fn vqrdmlshq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
     }
     vqrdmlshq_s16_(a, b, c)
@@ -12584,7 +12584,7 @@ pub unsafe fn vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8
 pub unsafe fn vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlsh.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlsh.v2i32")]
         fn vqrdmlsh_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
     }
     vqrdmlsh_s32_(a, b, c)
@@ -12600,7 +12600,7 @@ pub unsafe fn vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_
 pub unsafe fn vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlsh.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlsh.v4i32")]
         fn vqrdmlshq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
     }
     vqrdmlshq_s32_(a, b, c)
@@ -12808,7 +12808,7 @@ pub unsafe fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t)
 pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.i32")]
         fn vqrshls_s32_(a: i32, b: i32) -> i32;
     }
     vqrshls_s32_(a, b)
@@ -12824,7 +12824,7 @@ pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
 pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.i64")]
         fn vqrshld_s64_(a: i64, b: i64) -> i64;
     }
     vqrshld_s64_(a, b)
@@ -12866,7 +12866,7 @@ pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 {
 pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.i32")]
         fn vqrshls_u32_(a: u32, b: i32) -> u32;
     }
     vqrshls_u32_(a, b)
@@ -12882,7 +12882,7 @@ pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
 pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.i64")]
         fn vqrshld_u64_(a: u64, b: i64) -> u64;
     }
     vqrshld_u64_(a, b)
@@ -13167,7 +13167,7 @@ pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) ->
 pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.i64")]
         fn vqshld_s64_(a: i64, b: i64) -> i64;
     }
     vqshld_s64_(a, b)
@@ -13219,7 +13219,7 @@ pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 {
 pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.i64")]
         fn vqshld_u64_(a: u64, b: i64) -> u64;
     }
     vqshld_u64_(a, b)
@@ -13429,7 +13429,7 @@ pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrn.i32")]
         fn vqshrnd_n_s64_(a: i64, n: i32) -> i32;
     }
     vqshrnd_n_s64_(a, N)
@@ -13512,7 +13512,7 @@ pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshrn.i32")]
         fn vqshrnd_n_u64_(a: u64, n: i32) -> u32;
     }
     vqshrnd_n_u64_(a, N)
@@ -13693,7 +13693,7 @@ pub unsafe fn vsqaddh_u16(a: u16, b: i16) -> u16 {
 pub unsafe fn vsqadds_u32(a: u32, b: i32) -> u32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.usqadd.i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.usqadd.i32")]
         fn vsqadds_u32_(a: u32, b: i32) -> u32;
     }
     vsqadds_u32_(a, b)
@@ -13709,7 +13709,7 @@ pub unsafe fn vsqadds_u32(a: u32, b: i32) -> u32 {
 pub unsafe fn vsqaddd_u64(a: u64, b: i64) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.usqadd.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.usqadd.i64")]
         fn vsqaddd_u64_(a: u64, b: i64) -> u64;
     }
     vsqaddd_u64_(a, b)
@@ -13769,7 +13769,7 @@ pub unsafe fn vsqrtq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrte.v1f64")]
         fn vrsqrte_f64_(a: float64x1_t) -> float64x1_t;
     }
     vrsqrte_f64_(a)
@@ -13785,7 +13785,7 @@ pub unsafe fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
 pub unsafe fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrte.v2f64")]
         fn vrsqrteq_f64_(a: float64x2_t) -> float64x2_t;
     }
     vrsqrteq_f64_(a)
@@ -13801,7 +13801,7 @@ pub unsafe fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vrsqrtes_f32(a: f32) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrte.f32")]
         fn vrsqrtes_f32_(a: f32) -> f32;
     }
     vrsqrtes_f32_(a)
@@ -13817,7 +13817,7 @@ pub unsafe fn vrsqrtes_f32(a: f32) -> f32 {
 pub unsafe fn vrsqrted_f64(a: f64) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrte.f64")]
         fn vrsqrted_f64_(a: f64) -> f64;
     }
     vrsqrted_f64_(a)
@@ -13833,7 +13833,7 @@ pub unsafe fn vrsqrted_f64(a: f64) -> f64 {
 pub unsafe fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrts.v1f64")]
         fn vrsqrts_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
     vrsqrts_f64_(a, b)
@@ -13849,7 +13849,7 @@ pub unsafe fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 pub unsafe fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrts.v2f64")]
         fn vrsqrtsq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     vrsqrtsq_f64_(a, b)
@@ -13865,7 +13865,7 @@ pub unsafe fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 pub unsafe fn vrsqrtss_f32(a: f32, b: f32) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrts.f32")]
         fn vrsqrtss_f32_(a: f32, b: f32) -> f32;
     }
     vrsqrtss_f32_(a, b)
@@ -13881,7 +13881,7 @@ pub unsafe fn vrsqrtss_f32(a: f32, b: f32) -> f32 {
 pub unsafe fn vrsqrtsd_f64(a: f64, b: f64) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrts.f64")]
         fn vrsqrtsd_f64_(a: f64, b: f64) -> f64;
     }
     vrsqrtsd_f64_(a, b)
@@ -13897,7 +13897,7 @@ pub unsafe fn vrsqrtsd_f64(a: f64, b: f64) -> f64 {
 pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpe.v1f64")]
         fn vrecpe_f64_(a: float64x1_t) -> float64x1_t;
     }
     vrecpe_f64_(a)
@@ -13913,7 +13913,7 @@ pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
 pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpe.v2f64")]
         fn vrecpeq_f64_(a: float64x2_t) -> float64x2_t;
     }
     vrecpeq_f64_(a)
@@ -13929,7 +13929,7 @@ pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vrecpes_f32(a: f32) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpe.f32")]
         fn vrecpes_f32_(a: f32) -> f32;
     }
     vrecpes_f32_(a)
@@ -13945,7 +13945,7 @@ pub unsafe fn vrecpes_f32(a: f32) -> f32 {
 pub unsafe fn vrecped_f64(a: f64) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpe.f64")]
         fn vrecped_f64_(a: f64) -> f64;
     }
     vrecped_f64_(a)
@@ -13961,7 +13961,7 @@ pub unsafe fn vrecped_f64(a: f64) -> f64 {
 pub unsafe fn vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.v1f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecps.v1f64")]
         fn vrecps_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
     vrecps_f64_(a, b)
@@ -13977,7 +13977,7 @@ pub unsafe fn vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 pub unsafe fn vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecps.v2f64")]
         fn vrecpsq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     vrecpsq_f64_(a, b)
@@ -13993,7 +13993,7 @@ pub unsafe fn vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 pub unsafe fn vrecpss_f32(a: f32, b: f32) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecps.f32")]
         fn vrecpss_f32_(a: f32, b: f32) -> f32;
     }
     vrecpss_f32_(a, b)
@@ -14009,7 +14009,7 @@ pub unsafe fn vrecpss_f32(a: f32, b: f32) -> f32 {
 pub unsafe fn vrecpsd_f64(a: f64, b: f64) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecps.f64")]
         fn vrecpsd_f64_(a: f64, b: f64) -> f64;
     }
     vrecpsd_f64_(a, b)
@@ -14025,7 +14025,7 @@ pub unsafe fn vrecpsd_f64(a: f64, b: f64) -> f64 {
 pub unsafe fn vrecpxs_f32(a: f32) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpx.f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpx.f32")]
         fn vrecpxs_f32_(a: f32) -> f32;
     }
     vrecpxs_f32_(a)
@@ -14041,7 +14041,7 @@ pub unsafe fn vrecpxs_f32(a: f32) -> f32 {
 pub unsafe fn vrecpxd_f64(a: f64) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpx.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpx.f64")]
         fn vrecpxd_f64_(a: f64) -> f64;
     }
     vrecpxd_f64_(a)
@@ -14739,7 +14739,7 @@ pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
 pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.i64")]
         fn vrshld_s64_(a: i64, b: i64) -> i64;
     }
     vrshld_s64_(a, b)
@@ -14755,7 +14755,7 @@ pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
 pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.i64")]
         fn vrshld_u64_(a: u64, b: i64) -> u64;
     }
     vrshld_u64_(a, b)
@@ -15185,7 +15185,7 @@ pub unsafe fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> ui
 pub unsafe fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3partw1")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sm3partw1")]
         fn vsm3partw1q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
     }
     vsm3partw1q_u32_(a, b, c)
@@ -15201,7 +15201,7 @@ pub unsafe fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> ui
 pub unsafe fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3partw2")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sm3partw2")]
         fn vsm3partw2q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
     }
     vsm3partw2q_u32_(a, b, c)
@@ -15217,7 +15217,7 @@ pub unsafe fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> ui
 pub unsafe fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3ss1")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sm3ss1")]
         fn vsm3ss1q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
     }
     vsm3ss1q_u32_(a, b, c)
@@ -15233,7 +15233,7 @@ pub unsafe fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint3
 pub unsafe fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm4ekey")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sm4ekey")]
         fn vsm4ekeyq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
     }
     vsm4ekeyq_u32_(a, b)
@@ -15249,7 +15249,7 @@ pub unsafe fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 pub unsafe fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm4e")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sm4e")]
         fn vsm4eq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
     }
     vsm4eq_u32_(a, b)
@@ -15265,7 +15265,7 @@ pub unsafe fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 pub unsafe fn vrax1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.rax1")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.rax1")]
         fn vrax1q_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
     }
     vrax1q_u64_(a, b)
@@ -15281,7 +15281,7 @@ pub unsafe fn vrax1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 pub unsafe fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha512h")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sha512h")]
         fn vsha512hq_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
     }
     vsha512hq_u64_(a, b, c)
@@ -15297,7 +15297,7 @@ pub unsafe fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint
 pub unsafe fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha512h2")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sha512h2")]
         fn vsha512h2q_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
     }
     vsha512h2q_u64_(a, b, c)
@@ -15313,7 +15313,7 @@ pub unsafe fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uin
 pub unsafe fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha512su0")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sha512su0")]
         fn vsha512su0q_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
     }
     vsha512su0q_u64_(a, b)
@@ -15329,7 +15329,7 @@ pub unsafe fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 pub unsafe fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha512su1")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sha512su1")]
         fn vsha512su1q_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
     }
     vsha512su1q_u64_(a, b, c)
@@ -15345,7 +15345,7 @@ pub unsafe fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> ui
 pub unsafe fn vrnd32x_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32x.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint32x.v2f32")]
         fn vrnd32x_f32_(a: float32x2_t) -> float32x2_t;
     }
     vrnd32x_f32_(a)
@@ -15361,7 +15361,7 @@ pub unsafe fn vrnd32x_f32(a: float32x2_t) -> float32x2_t {
 pub unsafe fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32x.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint32x.v4f32")]
         fn vrnd32xq_f32_(a: float32x4_t) -> float32x4_t;
     }
     vrnd32xq_f32_(a)
@@ -15377,7 +15377,7 @@ pub unsafe fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
 pub unsafe fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32x.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint32x.v2f64")]
         fn vrnd32xq_f64_(a: float64x2_t) -> float64x2_t;
     }
     vrnd32xq_f64_(a)
@@ -15393,7 +15393,7 @@ pub unsafe fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vrnd32x_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.frint32x.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.frint32x.f64")]
         fn vrnd32x_f64_(a: f64) -> f64;
     }
     transmute(vrnd32x_f64_(simd_extract!(a, 0)))
@@ -15409,7 +15409,7 @@ pub unsafe fn vrnd32x_f64(a: float64x1_t) -> float64x1_t {
 pub unsafe fn vrnd32z_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32z.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint32z.v2f32")]
         fn vrnd32z_f32_(a: float32x2_t) -> float32x2_t;
     }
     vrnd32z_f32_(a)
@@ -15425,7 +15425,7 @@ pub unsafe fn vrnd32z_f32(a: float32x2_t) -> float32x2_t {
 pub unsafe fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32z.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint32z.v4f32")]
         fn vrnd32zq_f32_(a: float32x4_t) -> float32x4_t;
     }
     vrnd32zq_f32_(a)
@@ -15441,7 +15441,7 @@ pub unsafe fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
 pub unsafe fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32z.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint32z.v2f64")]
         fn vrnd32zq_f64_(a: float64x2_t) -> float64x2_t;
     }
     vrnd32zq_f64_(a)
@@ -15457,7 +15457,7 @@ pub unsafe fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vrnd32z_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.frint32z.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.frint32z.f64")]
         fn vrnd32z_f64_(a: f64) -> f64;
     }
     transmute(vrnd32z_f64_(simd_extract!(a, 0)))
@@ -15473,7 +15473,7 @@ pub unsafe fn vrnd32z_f64(a: float64x1_t) -> float64x1_t {
 pub unsafe fn vrnd64x_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64x.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint64x.v2f32")]
         fn vrnd64x_f32_(a: float32x2_t) -> float32x2_t;
     }
     vrnd64x_f32_(a)
@@ -15489,7 +15489,7 @@ pub unsafe fn vrnd64x_f32(a: float32x2_t) -> float32x2_t {
 pub unsafe fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64x.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint64x.v4f32")]
         fn vrnd64xq_f32_(a: float32x4_t) -> float32x4_t;
     }
     vrnd64xq_f32_(a)
@@ -15505,7 +15505,7 @@ pub unsafe fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
 pub unsafe fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64x.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint64x.v2f64")]
         fn vrnd64xq_f64_(a: float64x2_t) -> float64x2_t;
     }
     vrnd64xq_f64_(a)
@@ -15521,7 +15521,7 @@ pub unsafe fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vrnd64x_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.frint64x.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.frint64x.f64")]
         fn vrnd64x_f64_(a: f64) -> f64;
     }
     transmute(vrnd64x_f64_(simd_extract!(a, 0)))
@@ -15537,7 +15537,7 @@ pub unsafe fn vrnd64x_f64(a: float64x1_t) -> float64x1_t {
 pub unsafe fn vrnd64z_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64z.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint64z.v2f32")]
         fn vrnd64z_f32_(a: float32x2_t) -> float32x2_t;
     }
     vrnd64z_f32_(a)
@@ -15553,7 +15553,7 @@ pub unsafe fn vrnd64z_f32(a: float32x2_t) -> float32x2_t {
 pub unsafe fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64z.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint64z.v4f32")]
         fn vrnd64zq_f32_(a: float32x4_t) -> float32x4_t;
     }
     vrnd64zq_f32_(a)
@@ -15569,7 +15569,7 @@ pub unsafe fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
 pub unsafe fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64z.v2f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint64z.v2f64")]
         fn vrnd64zq_f64_(a: float64x2_t) -> float64x2_t;
     }
     vrnd64zq_f64_(a)
@@ -15585,7 +15585,7 @@ pub unsafe fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
 pub unsafe fn vrnd64z_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.frint64z.f64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.frint64z.f64")]
         fn vrnd64z_f64_(a: f64) -> f64;
     }
     transmute(vrnd64z_f64_(simd_extract!(a, 0)))
@@ -17140,7 +17140,7 @@ pub unsafe fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x
 pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v1i64")]
         fn vqabs_s64_(a: int64x1_t) -> int64x1_t;
     }
     vqabs_s64_(a)
@@ -17156,7 +17156,7 @@ pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
 pub unsafe fn vqabsq_s64(a: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v2i64")]
         fn vqabsq_s64_(a: int64x2_t) -> int64x2_t;
     }
     vqabsq_s64_(a)
@@ -17194,7 +17194,7 @@ pub unsafe fn vqabsh_s16(a: i16) -> i16 {
 pub unsafe fn vqabss_s32(a: i32) -> i32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.i32")]
         fn vqabss_s32_(a: i32) -> i32;
     }
     vqabss_s32_(a)
@@ -17210,7 +17210,7 @@ pub unsafe fn vqabss_s32(a: i32) -> i32 {
 pub unsafe fn vqabsd_s64(a: i64) -> i64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.i64")]
         fn vqabsd_s64_(a: i64) -> i64;
     }
     vqabsd_s64_(a)
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index 43084a4b77f..bc89d7041bd 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -2074,7 +2074,10 @@ pub unsafe fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
 #[target_feature(enable = "neon")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, IMM5 = 0)
+)]
 pub unsafe fn vget_lane_f64<const IMM5: i32>(v: float64x1_t) -> f64 {
     static_assert!(IMM5 == 0);
     simd_extract!(v, IMM5 as u32)
@@ -2085,7 +2088,10 @@ pub unsafe fn vget_lane_f64<const IMM5: i32>(v: float64x1_t) -> f64 {
 #[target_feature(enable = "neon")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, IMM5 = 0)
+)]
 pub unsafe fn vgetq_lane_f64<const IMM5: i32>(v: float64x2_t) -> f64 {
     static_assert_uimm_bits!(IMM5, 1);
     simd_extract!(v, IMM5 as u32)
@@ -3417,7 +3423,10 @@ pub unsafe fn vsm3tt1aq_u32<const IMM2: i32>(
     static_assert_uimm_bits!(IMM2, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3tt1a")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sm3tt1a"
+        )]
         fn vsm3tt1aq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
     }
     vsm3tt1aq_u32_(a, b, c, IMM2 as i64)
@@ -3437,7 +3446,10 @@ pub unsafe fn vsm3tt1bq_u32<const IMM2: i32>(
     static_assert_uimm_bits!(IMM2, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3tt1b")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sm3tt1b"
+        )]
         fn vsm3tt1bq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
     }
     vsm3tt1bq_u32_(a, b, c, IMM2 as i64)
@@ -3457,7 +3469,10 @@ pub unsafe fn vsm3tt2aq_u32<const IMM2: i32>(
     static_assert_uimm_bits!(IMM2, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3tt2a")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sm3tt2a"
+        )]
         fn vsm3tt2aq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
     }
     vsm3tt2aq_u32_(a, b, c, IMM2 as i64)
@@ -3477,7 +3492,10 @@ pub unsafe fn vsm3tt2bq_u32<const IMM2: i32>(
     static_assert_uimm_bits!(IMM2, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3tt2b")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sm3tt2b"
+        )]
         fn vsm3tt2bq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
     }
     vsm3tt2bq_u32_(a, b, c, IMM2 as i64)
@@ -3493,7 +3511,10 @@ pub unsafe fn vxarq_u64<const IMM6: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64
     static_assert_uimm_bits!(IMM6, 6);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.xar")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.xar"
+        )]
         fn vxarq_u64_(a: uint64x2_t, b: uint64x2_t, n: i64) -> uint64x2_t;
     }
     vxarq_u64_(a, b, IMM6 as i64)
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/barrier/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/barrier/mod.rs
index fda42024a12..82e8e6e7179 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/barrier/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/barrier/mod.rs
@@ -4,6 +4,7 @@
 #[cfg(not(any(
     // v8
     target_arch = "aarch64",
+    target_arch = "arm64ec",
     // v7
     target_feature = "v7",
     // v6-M
@@ -13,6 +14,7 @@ mod cp15;
 
 #[cfg(not(any(
     target_arch = "aarch64",
+    target_arch = "arm64ec",
     target_feature = "v7",
     target_feature = "mclass"
 )))]
@@ -22,6 +24,7 @@ pub use self::cp15::*;
 // Dedicated instructions
 #[cfg(any(
     target_arch = "aarch64",
+    target_arch = "arm64ec",
     target_feature = "v7",
     target_feature = "mclass"
 ))]
@@ -47,6 +50,7 @@ macro_rules! dmb_dsb {
 
 #[cfg(any(
     target_arch = "aarch64",
+    target_arch = "arm64ec",
     target_feature = "v7",
     target_feature = "mclass"
 ))]
@@ -54,23 +58,32 @@ mod common;
 
 #[cfg(any(
     target_arch = "aarch64",
+    target_arch = "arm64ec",
     target_feature = "v7",
     target_feature = "mclass"
 ))]
 #[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
 pub use self::common::*;
 
-#[cfg(any(target_arch = "aarch64", target_feature = "v7",))]
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+))]
 mod not_mclass;
 
-#[cfg(any(target_arch = "aarch64", target_feature = "v7",))]
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+))]
 #[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
 pub use self::not_mclass::*;
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 mod v8;
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 #[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
 pub use self::v8::*;
 
@@ -132,15 +145,24 @@ where
 }
 
 extern "unadjusted" {
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dmb")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.dmb"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dmb")]
     fn dmb(_: i32);
 
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dsb")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.dsb"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dsb")]
     fn dsb(_: i32);
 
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.isb")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.isb"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.isb")]
     fn isb(_: i32);
 }
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/crc.rs b/library/stdarch/crates/core_arch/src/arm_shared/crc.rs
index 8eedd21696f..d8099aba8e4 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/crc.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/crc.rs
@@ -1,26 +1,50 @@
 extern "unadjusted" {
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32b")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crc32b"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32b")]
     fn crc32b_(crc: u32, data: u32) -> u32;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32h")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crc32h"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32h")]
     fn crc32h_(crc: u32, data: u32) -> u32;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32w")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crc32w"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")]
     fn crc32w_(crc: u32, data: u32) -> u32;
 
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32cb")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crc32cb"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cb")]
     fn crc32cb_(crc: u32, data: u32) -> u32;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32ch")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crc32ch"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32ch")]
     fn crc32ch_(crc: u32, data: u32) -> u32;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32cw")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crc32cw"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")]
     fn crc32cw_(crc: u32, data: u32) -> u32;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32x")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crc32x"
+    )]
     fn crc32x_(crc: u32, data: u64) -> u32;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32cx")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crc32cx"
+    )]
     fn crc32cx_(crc: u32, data: u64) -> u32;
 }
 
@@ -104,7 +128,7 @@ pub unsafe fn __crc32cw(crc: u32, data: u32) -> u32 {
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d)
 #[inline]
 #[target_feature(enable = "crc")]
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(crc32x))]
 #[unstable(feature = "stdarch_arm_crc32", issue = "117215")]
 pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 {
@@ -133,7 +157,7 @@ pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 {
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cd)
 #[inline]
 #[target_feature(enable = "crc")]
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(crc32cx))]
 #[unstable(feature = "stdarch_arm_crc32", issue = "117215")]
 pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 {
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs b/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs
index 7ca84f1a8ff..2f2150b4be3 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs
@@ -2,48 +2,90 @@ use crate::core_arch::arm_shared::{uint32x4_t, uint8x16_t};
 
 #[allow(improper_ctypes)]
 extern "unadjusted" {
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.aese")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.aese"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aese")]
     fn vaeseq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.aesd")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.aesd"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesd")]
     fn vaesdq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.aesmc")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.aesmc"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesmc")]
     fn vaesmcq_u8_(data: uint8x16_t) -> uint8x16_t;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.aesimc")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.aesimc"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesimc")]
     fn vaesimcq_u8_(data: uint8x16_t) -> uint8x16_t;
 
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha1h")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.sha1h"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1h")]
     fn vsha1h_u32_(hash_e: u32) -> u32;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha1su0")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.sha1su0"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su0")]
     fn vsha1su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha1su1")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.sha1su1"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su1")]
     fn vsha1su1q_u32_(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha1c")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.sha1c"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1c")]
     fn vsha1cq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha1p")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.sha1p"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1p")]
     fn vsha1pq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha1m")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.sha1m"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1m")]
     fn vsha1mq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
 
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha256h")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.sha256h"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h")]
     fn vsha256hq_u32_(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha256h2")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.sha256h2"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h2")]
     fn vsha256h2q_u32_(hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha256su0")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.sha256su0"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su0")]
     fn vsha256su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha256su1")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.crypto.sha256su1"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su1")]
     fn vsha256su1q_u32_(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t;
 }
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/hints.rs b/library/stdarch/crates/core_arch/src/arm_shared/hints.rs
index bd0416fc30a..8ae5dcc281b 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/hints.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/hints.rs
@@ -9,7 +9,12 @@
 /// low-power state until one of a number of asynchronous events occurs.
 // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
 // LLVM says "instruction requires: armv6k"
-#[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))]
+#[cfg(any(
+    target_feature = "v6",
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    doc
+))]
 #[inline(always)]
 #[unstable(feature = "stdarch_arm_hints", issue = "117218")]
 pub unsafe fn __wfi() {
@@ -23,7 +28,12 @@ pub unsafe fn __wfi() {
 /// another processor.
 // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
 // LLVM says "instruction requires: armv6k"
-#[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))]
+#[cfg(any(
+    target_feature = "v6",
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    doc
+))]
 #[inline(always)]
 #[unstable(feature = "stdarch_arm_hints", issue = "117218")]
 pub unsafe fn __wfe() {
@@ -36,7 +46,12 @@ pub unsafe fn __wfe() {
 /// system. It is a NOP on a uniprocessor system.
 // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M
 // LLVM says "instruction requires: armv6k"
-#[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))]
+#[cfg(any(
+    target_feature = "v6",
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    doc
+))]
 #[inline(always)]
 #[unstable(feature = "stdarch_arm_hints", issue = "117218")]
 pub unsafe fn __sev() {
@@ -52,6 +67,7 @@ pub unsafe fn __sev() {
 #[cfg(any(
     target_feature = "v8", // 32-bit ARMv8
     target_arch = "aarch64", // AArch64
+    target_arch = "arm64ec", // Arm64EC
     doc,
 ))]
 #[inline(always)]
@@ -67,7 +83,12 @@ pub unsafe fn __sevl() {
 /// improve overall system performance.
 // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
 // LLVM says "instruction requires: armv6k"
-#[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))]
+#[cfg(any(
+    target_feature = "v6",
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    doc
+))]
 #[inline(always)]
 #[unstable(feature = "stdarch_arm_hints", issue = "117218")]
 pub unsafe fn __yield() {
@@ -80,6 +101,8 @@ pub unsafe fn __yield() {
 /// those that do, it is unspecified whether this intrinsic generates it or
 /// another instruction. It is not guaranteed that inserting this instruction
 /// will increase execution time.
+// Inline ASM is not support on ARM64EC yet.
+#[cfg(not(target_arch = "arm64ec"))]
 #[inline(always)]
 #[unstable(feature = "stdarch_arm_hints", issue = "117218")]
 pub unsafe fn __nop() {
@@ -87,7 +110,10 @@ pub unsafe fn __nop() {
 }
 
 extern "unadjusted" {
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.hint")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.hint"
+    )]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")]
     fn hint(_: i32);
 }
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/mod.rs
index ca5b3a64653..1c183c36ac0 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/mod.rs
@@ -66,11 +66,21 @@ pub use crc::*;
 
 // NEON intrinsics are currently broken on big-endian, so don't expose them. (#1484)
 #[cfg(target_endian = "little")]
-#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))]
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+    doc
+))]
 mod crypto;
 // NEON intrinsics are currently broken on big-endian, so don't expose them. (#1484)
 #[cfg(target_endian = "little")]
-#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))]
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+    doc
+))]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
@@ -83,10 +93,20 @@ pub use self::crypto::*;
 
 // NEON intrinsics are currently broken on big-endian, so don't expose them. (#1484)
 #[cfg(target_endian = "little")]
-#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))]
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+    doc
+))]
 pub(crate) mod neon;
 #[cfg(target_endian = "little")]
-#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))]
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+    doc
+))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -98,7 +118,12 @@ pub(crate) mod neon;
 pub use self::neon::*;
 
 #[cfg(test)]
-#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))]
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+    doc
+))]
 pub(crate) mod test_support;
 
 mod sealed {
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index cbeb3776a4b..8b46d35d7a5 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -16,7 +16,7 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vand_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
@@ -30,7 +30,7 @@ pub unsafe fn vand_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
@@ -44,7 +44,7 @@ pub unsafe fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vand_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
@@ -58,7 +58,7 @@ pub unsafe fn vand_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vandq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
@@ -72,7 +72,7 @@ pub unsafe fn vandq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vand_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
@@ -86,7 +86,7 @@ pub unsafe fn vand_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vandq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
@@ -100,7 +100,7 @@ pub unsafe fn vandq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vand_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
@@ -114,7 +114,7 @@ pub unsafe fn vand_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vandq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
@@ -128,7 +128,7 @@ pub unsafe fn vandq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vand_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
@@ -142,7 +142,7 @@ pub unsafe fn vand_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vandq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
@@ -156,7 +156,7 @@ pub unsafe fn vandq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vand_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
@@ -170,7 +170,7 @@ pub unsafe fn vand_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vandq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
@@ -184,7 +184,7 @@ pub unsafe fn vandq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vand_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
@@ -198,7 +198,7 @@ pub unsafe fn vand_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vandq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
@@ -212,7 +212,7 @@ pub unsafe fn vandq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vand_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
@@ -226,7 +226,7 @@ pub unsafe fn vand_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vandq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
@@ -240,7 +240,7 @@ pub unsafe fn vandq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
@@ -254,7 +254,7 @@ pub unsafe fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
@@ -268,7 +268,7 @@ pub unsafe fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
@@ -282,7 +282,7 @@ pub unsafe fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
@@ -296,7 +296,7 @@ pub unsafe fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
@@ -310,7 +310,7 @@ pub unsafe fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
@@ -324,7 +324,7 @@ pub unsafe fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
@@ -338,7 +338,7 @@ pub unsafe fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
@@ -352,7 +352,7 @@ pub unsafe fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
@@ -366,7 +366,7 @@ pub unsafe fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
@@ -380,7 +380,7 @@ pub unsafe fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
@@ -394,7 +394,7 @@ pub unsafe fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
@@ -408,7 +408,7 @@ pub unsafe fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
@@ -422,7 +422,7 @@ pub unsafe fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
@@ -436,7 +436,7 @@ pub unsafe fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
@@ -450,7 +450,7 @@ pub unsafe fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
@@ -464,7 +464,7 @@ pub unsafe fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
@@ -478,7 +478,7 @@ pub unsafe fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
@@ -492,7 +492,7 @@ pub unsafe fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
@@ -506,7 +506,7 @@ pub unsafe fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
@@ -520,7 +520,7 @@ pub unsafe fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
@@ -534,7 +534,7 @@ pub unsafe fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
@@ -548,7 +548,7 @@ pub unsafe fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
@@ -562,7 +562,7 @@ pub unsafe fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
@@ -576,7 +576,7 @@ pub unsafe fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
@@ -590,7 +590,7 @@ pub unsafe fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
@@ -604,7 +604,7 @@ pub unsafe fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
@@ -618,7 +618,7 @@ pub unsafe fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
@@ -632,7 +632,7 @@ pub unsafe fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
@@ -646,7 +646,7 @@ pub unsafe fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
@@ -660,7 +660,7 @@ pub unsafe fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
@@ -674,7 +674,7 @@ pub unsafe fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
@@ -688,14 +688,14 @@ pub unsafe fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sabd.v8i8")]
         fn vabd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
 vabd_s8_(a, b)
@@ -708,14 +708,14 @@ vabd_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabdq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sabd.v16i8")]
         fn vabdq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
 vabdq_s8_(a, b)
@@ -728,14 +728,14 @@ vabdq_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sabd.v4i16")]
         fn vabd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vabd_s16_(a, b)
@@ -748,14 +748,14 @@ vabd_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabdq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sabd.v8i16")]
         fn vabdq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vabdq_s16_(a, b)
@@ -768,14 +768,14 @@ vabdq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sabd.v2i32")]
         fn vabd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vabd_s32_(a, b)
@@ -788,14 +788,14 @@ vabd_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabdq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sabd.v4i32")]
         fn vabdq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vabdq_s32_(a, b)
@@ -808,14 +808,14 @@ vabdq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uabd.v8i8")]
         fn vabd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
     }
 vabd_u8_(a, b)
@@ -828,14 +828,14 @@ vabd_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabdq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uabd.v16i8")]
         fn vabdq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
     }
 vabdq_u8_(a, b)
@@ -848,14 +848,14 @@ vabdq_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uabd.v4i16")]
         fn vabd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
     }
 vabd_u16_(a, b)
@@ -868,14 +868,14 @@ vabd_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabdq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uabd.v8i16")]
         fn vabdq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
     }
 vabdq_u16_(a, b)
@@ -888,14 +888,14 @@ vabdq_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uabd.v2i32")]
         fn vabd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
     }
 vabd_u32_(a, b)
@@ -908,14 +908,14 @@ vabd_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabdq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uabd.v4i32")]
         fn vabdq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
     }
 vabdq_u32_(a, b)
@@ -928,14 +928,14 @@ vabdq_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fabd.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fabd.v2f32")]
         fn vabd_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
 vabd_f32_(a, b)
@@ -948,14 +948,14 @@ vabd_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fabd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabdq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fabd.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fabd.v4f32")]
         fn vabdq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
 vabdq_f32_(a, b)
@@ -968,7 +968,7 @@ vabdq_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabdl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabdl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabdl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
@@ -982,7 +982,7 @@ pub unsafe fn vabdl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabdl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabdl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabdl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
@@ -996,7 +996,7 @@ pub unsafe fn vabdl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabdl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabdl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabdl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
@@ -1010,7 +1010,7 @@ pub unsafe fn vabdl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabdl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabdl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabdl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
@@ -1025,7 +1025,7 @@ pub unsafe fn vabdl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabdl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabdl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabdl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
@@ -1040,7 +1040,7 @@ pub unsafe fn vabdl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabdl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabdl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabdl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
@@ -1055,7 +1055,7 @@ pub unsafe fn vabdl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceq_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
@@ -1069,7 +1069,7 @@ pub unsafe fn vceq_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceqq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
@@ -1083,7 +1083,7 @@ pub unsafe fn vceqq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceq_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
@@ -1097,7 +1097,7 @@ pub unsafe fn vceq_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceqq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
@@ -1111,7 +1111,7 @@ pub unsafe fn vceqq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceq_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
@@ -1125,7 +1125,7 @@ pub unsafe fn vceq_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceqq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
@@ -1139,7 +1139,7 @@ pub unsafe fn vceqq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceq_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
@@ -1153,7 +1153,7 @@ pub unsafe fn vceq_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceqq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
@@ -1167,7 +1167,7 @@ pub unsafe fn vceqq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceq_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
@@ -1181,7 +1181,7 @@ pub unsafe fn vceq_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceqq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
@@ -1195,7 +1195,7 @@ pub unsafe fn vceqq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceq_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
@@ -1209,7 +1209,7 @@ pub unsafe fn vceq_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceqq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
@@ -1223,7 +1223,7 @@ pub unsafe fn vceqq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceq_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t {
@@ -1237,7 +1237,7 @@ pub unsafe fn vceq_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceqq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t {
@@ -1251,7 +1251,7 @@ pub unsafe fn vceqq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceq_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
@@ -1265,7 +1265,7 @@ pub unsafe fn vceq_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmeq))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmeq))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vceqq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
@@ -1279,7 +1279,7 @@ pub unsafe fn vceqq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
@@ -1295,7 +1295,7 @@ pub unsafe fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
@@ -1311,7 +1311,7 @@ pub unsafe fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
@@ -1327,7 +1327,7 @@ pub unsafe fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
@@ -1343,7 +1343,7 @@ pub unsafe fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
@@ -1359,7 +1359,7 @@ pub unsafe fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
@@ -1375,7 +1375,7 @@ pub unsafe fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t {
@@ -1391,7 +1391,7 @@ pub unsafe fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t {
@@ -1407,7 +1407,7 @@ pub unsafe fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t {
@@ -1423,7 +1423,7 @@ pub unsafe fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t {
@@ -1439,7 +1439,7 @@ pub unsafe fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
@@ -1455,7 +1455,7 @@ pub unsafe fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
@@ -1471,7 +1471,7 @@ pub unsafe fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
@@ -1487,7 +1487,7 @@ pub unsafe fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
@@ -1503,7 +1503,7 @@ pub unsafe fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
@@ -1519,7 +1519,7 @@ pub unsafe fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
@@ -1535,7 +1535,7 @@ pub unsafe fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fabs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabs_f32(a: float32x2_t) -> float32x2_t {
@@ -1549,7 +1549,7 @@ pub unsafe fn vabs_f32(a: float32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fabs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabsq_f32(a: float32x4_t) -> float32x4_t {
@@ -1563,7 +1563,7 @@ pub unsafe fn vabsq_f32(a: float32x4_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
@@ -1577,7 +1577,7 @@ pub unsafe fn vcgt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgtq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
@@ -1591,7 +1591,7 @@ pub unsafe fn vcgtq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
@@ -1605,7 +1605,7 @@ pub unsafe fn vcgt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgtq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
@@ -1619,7 +1619,7 @@ pub unsafe fn vcgtq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
@@ -1633,7 +1633,7 @@ pub unsafe fn vcgt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgtq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
@@ -1647,7 +1647,7 @@ pub unsafe fn vcgtq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
@@ -1661,7 +1661,7 @@ pub unsafe fn vcgt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgtq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
@@ -1675,7 +1675,7 @@ pub unsafe fn vcgtq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
@@ -1689,7 +1689,7 @@ pub unsafe fn vcgt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgtq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
@@ -1703,7 +1703,7 @@ pub unsafe fn vcgtq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
@@ -1717,7 +1717,7 @@ pub unsafe fn vcgt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgtq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
@@ -1731,7 +1731,7 @@ pub unsafe fn vcgtq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
@@ -1745,7 +1745,7 @@ pub unsafe fn vcgt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
@@ -1759,7 +1759,7 @@ pub unsafe fn vcgtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
@@ -1773,7 +1773,7 @@ pub unsafe fn vclt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcltq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
@@ -1787,7 +1787,7 @@ pub unsafe fn vcltq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
@@ -1801,7 +1801,7 @@ pub unsafe fn vclt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcltq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
@@ -1815,7 +1815,7 @@ pub unsafe fn vcltq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
@@ -1829,7 +1829,7 @@ pub unsafe fn vclt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcltq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
@@ -1843,7 +1843,7 @@ pub unsafe fn vcltq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
@@ -1857,7 +1857,7 @@ pub unsafe fn vclt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcltq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
@@ -1871,7 +1871,7 @@ pub unsafe fn vcltq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
@@ -1885,7 +1885,7 @@ pub unsafe fn vclt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcltq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
@@ -1899,7 +1899,7 @@ pub unsafe fn vcltq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
@@ -1913,7 +1913,7 @@ pub unsafe fn vclt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
@@ -1927,7 +1927,7 @@ pub unsafe fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
@@ -1941,7 +1941,7 @@ pub unsafe fn vclt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
@@ -1955,7 +1955,7 @@ pub unsafe fn vcltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcle_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
@@ -1969,7 +1969,7 @@ pub unsafe fn vcle_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcleq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
@@ -1983,7 +1983,7 @@ pub unsafe fn vcleq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcle_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
@@ -1997,7 +1997,7 @@ pub unsafe fn vcle_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcleq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
@@ -2011,7 +2011,7 @@ pub unsafe fn vcleq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcle_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
@@ -2025,7 +2025,7 @@ pub unsafe fn vcle_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcleq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
@@ -2039,7 +2039,7 @@ pub unsafe fn vcleq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcle_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
@@ -2053,7 +2053,7 @@ pub unsafe fn vcle_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcleq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
@@ -2067,7 +2067,7 @@ pub unsafe fn vcleq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcle_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
@@ -2081,7 +2081,7 @@ pub unsafe fn vcle_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcleq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
@@ -2095,7 +2095,7 @@ pub unsafe fn vcleq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcle_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
@@ -2109,7 +2109,7 @@ pub unsafe fn vcle_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcleq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
@@ -2123,7 +2123,7 @@ pub unsafe fn vcleq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcle_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
@@ -2137,7 +2137,7 @@ pub unsafe fn vcle_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
@@ -2151,7 +2151,7 @@ pub unsafe fn vcleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcge_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
@@ -2165,7 +2165,7 @@ pub unsafe fn vcge_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgeq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
@@ -2179,7 +2179,7 @@ pub unsafe fn vcgeq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcge_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
@@ -2193,7 +2193,7 @@ pub unsafe fn vcge_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgeq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
@@ -2207,7 +2207,7 @@ pub unsafe fn vcgeq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcge_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
@@ -2221,7 +2221,7 @@ pub unsafe fn vcge_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgeq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
@@ -2235,7 +2235,7 @@ pub unsafe fn vcgeq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcge_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
@@ -2249,7 +2249,7 @@ pub unsafe fn vcge_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgeq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
@@ -2263,7 +2263,7 @@ pub unsafe fn vcgeq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcge_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
@@ -2277,7 +2277,7 @@ pub unsafe fn vcge_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgeq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
@@ -2291,7 +2291,7 @@ pub unsafe fn vcgeq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcge_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
@@ -2305,7 +2305,7 @@ pub unsafe fn vcge_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgeq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
@@ -2319,7 +2319,7 @@ pub unsafe fn vcgeq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcge_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
@@ -2333,7 +2333,7 @@ pub unsafe fn vcge_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcgeq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
@@ -2347,14 +2347,14 @@ pub unsafe fn vcgeq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcls_s8(a: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.cls.v8i8")]
         fn vcls_s8_(a: int8x8_t) -> int8x8_t;
     }
 vcls_s8_(a)
@@ -2367,14 +2367,14 @@ vcls_s8_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclsq_s8(a: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.cls.v16i8")]
         fn vclsq_s8_(a: int8x16_t) -> int8x16_t;
     }
 vclsq_s8_(a)
@@ -2387,14 +2387,14 @@ vclsq_s8_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcls_s16(a: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.cls.v4i16")]
         fn vcls_s16_(a: int16x4_t) -> int16x4_t;
     }
 vcls_s16_(a)
@@ -2407,14 +2407,14 @@ vcls_s16_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclsq_s16(a: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.cls.v8i16")]
         fn vclsq_s16_(a: int16x8_t) -> int16x8_t;
     }
 vclsq_s16_(a)
@@ -2427,14 +2427,14 @@ vclsq_s16_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcls_s32(a: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.cls.v2i32")]
         fn vcls_s32_(a: int32x2_t) -> int32x2_t;
     }
 vcls_s32_(a)
@@ -2447,14 +2447,14 @@ vcls_s32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclsq_s32(a: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.cls.v4i32")]
         fn vclsq_s32_(a: int32x4_t) -> int32x4_t;
     }
 vclsq_s32_(a)
@@ -2467,7 +2467,7 @@ vclsq_s32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcls_u8(a: uint8x8_t) -> int8x8_t {
@@ -2481,7 +2481,7 @@ pub unsafe fn vcls_u8(a: uint8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclsq_u8(a: uint8x16_t) -> int8x16_t {
@@ -2495,7 +2495,7 @@ pub unsafe fn vclsq_u8(a: uint8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcls_u16(a: uint16x4_t) -> int16x4_t {
@@ -2509,7 +2509,7 @@ pub unsafe fn vcls_u16(a: uint16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclsq_u16(a: uint16x8_t) -> int16x8_t {
@@ -2523,7 +2523,7 @@ pub unsafe fn vclsq_u16(a: uint16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcls_u32(a: uint32x2_t) -> int32x2_t {
@@ -2537,7 +2537,7 @@ pub unsafe fn vcls_u32(a: uint32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclsq_u32(a: uint32x4_t) -> int32x4_t {
@@ -2551,7 +2551,7 @@ pub unsafe fn vclsq_u32(a: uint32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclz_s8(a: int8x8_t) -> int8x8_t {
@@ -2565,7 +2565,7 @@ pub unsafe fn vclz_s8(a: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclzq_s8(a: int8x16_t) -> int8x16_t {
@@ -2579,7 +2579,7 @@ pub unsafe fn vclzq_s8(a: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclz_s16(a: int16x4_t) -> int16x4_t {
@@ -2593,7 +2593,7 @@ pub unsafe fn vclz_s16(a: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclzq_s16(a: int16x8_t) -> int16x8_t {
@@ -2607,7 +2607,7 @@ pub unsafe fn vclzq_s16(a: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclz_s32(a: int32x2_t) -> int32x2_t {
@@ -2621,7 +2621,7 @@ pub unsafe fn vclz_s32(a: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclzq_s32(a: int32x4_t) -> int32x4_t {
@@ -2635,7 +2635,7 @@ pub unsafe fn vclzq_s32(a: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
@@ -2649,7 +2649,7 @@ pub unsafe fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclzq_u8(a: uint8x16_t) -> uint8x16_t {
@@ -2663,7 +2663,7 @@ pub unsafe fn vclzq_u8(a: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
@@ -2677,7 +2677,7 @@ pub unsafe fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclzq_u16(a: uint16x8_t) -> uint16x8_t {
@@ -2691,7 +2691,7 @@ pub unsafe fn vclzq_u16(a: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclz_u32(a: uint32x2_t) -> uint32x2_t {
@@ -2705,7 +2705,7 @@ pub unsafe fn vclz_u32(a: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
@@ -2719,14 +2719,14 @@ pub unsafe fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcagt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v2i32.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facgt.v2i32.v2f32")]
         fn vcagt_f32_(a: float32x2_t, b: float32x2_t) -> uint32x2_t;
     }
 vcagt_f32_(a, b)
@@ -2739,14 +2739,14 @@ vcagt_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v4i32.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facgt.v4i32.v4f32")]
         fn vcagtq_f32_(a: float32x4_t, b: float32x4_t) -> uint32x4_t;
     }
 vcagtq_f32_(a, b)
@@ -2759,14 +2759,14 @@ vcagtq_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcage_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v2i32.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facge.v2i32.v2f32")]
         fn vcage_f32_(a: float32x2_t, b: float32x2_t) -> uint32x2_t;
     }
 vcage_f32_(a, b)
@@ -2779,14 +2779,14 @@ vcage_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v4i32.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facge.v4i32.v4f32")]
         fn vcageq_f32_(a: float32x4_t, b: float32x4_t) -> uint32x4_t;
     }
 vcageq_f32_(a, b)
@@ -2799,7 +2799,7 @@ vcageq_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcalt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
@@ -2813,7 +2813,7 @@ pub unsafe fn vcalt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facgt))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcaltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
@@ -2827,7 +2827,7 @@ pub unsafe fn vcaltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcale_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
@@ -2841,7 +2841,7 @@ pub unsafe fn vcale_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facge))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
@@ -2855,7 +2855,7 @@ pub unsafe fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcreate_s8(a: u64) -> int8x8_t {
@@ -2869,7 +2869,7 @@ pub unsafe fn vcreate_s8(a: u64) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcreate_s16(a: u64) -> int16x4_t {
@@ -2883,7 +2883,7 @@ pub unsafe fn vcreate_s16(a: u64) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcreate_s32(a: u64) -> int32x2_t {
@@ -2897,7 +2897,7 @@ pub unsafe fn vcreate_s32(a: u64) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcreate_s64(a: u64) -> int64x1_t {
@@ -2911,7 +2911,7 @@ pub unsafe fn vcreate_s64(a: u64) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcreate_u8(a: u64) -> uint8x8_t {
@@ -2925,7 +2925,7 @@ pub unsafe fn vcreate_u8(a: u64) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcreate_u16(a: u64) -> uint16x4_t {
@@ -2939,7 +2939,7 @@ pub unsafe fn vcreate_u16(a: u64) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcreate_u32(a: u64) -> uint32x2_t {
@@ -2953,7 +2953,7 @@ pub unsafe fn vcreate_u32(a: u64) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcreate_u64(a: u64) -> uint64x1_t {
@@ -2967,7 +2967,7 @@ pub unsafe fn vcreate_u64(a: u64) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcreate_p8(a: u64) -> poly8x8_t {
@@ -2981,7 +2981,7 @@ pub unsafe fn vcreate_p8(a: u64) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcreate_p16(a: u64) -> poly16x4_t {
@@ -2995,7 +2995,7 @@ pub unsafe fn vcreate_p16(a: u64) -> poly16x4_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcreate_p64(a: u64) -> poly64x1_t {
@@ -3009,7 +3009,7 @@ pub unsafe fn vcreate_p64(a: u64) -> poly64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcreate_f32(a: u64) -> float32x2_t {
@@ -3023,7 +3023,7 @@ pub unsafe fn vcreate_f32(a: u64) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(scvtf))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(scvtf))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcvt_f32_s32(a: int32x2_t) -> float32x2_t {
@@ -3037,7 +3037,7 @@ pub unsafe fn vcvt_f32_s32(a: int32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(scvtf))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(scvtf))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcvtq_f32_s32(a: int32x4_t) -> float32x4_t {
@@ -3051,7 +3051,7 @@ pub unsafe fn vcvtq_f32_s32(a: int32x4_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ucvtf))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ucvtf))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcvt_f32_u32(a: uint32x2_t) -> float32x2_t {
@@ -3065,7 +3065,7 @@ pub unsafe fn vcvt_f32_u32(a: uint32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ucvtf))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ucvtf))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t {
@@ -3104,7 +3104,7 @@ pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32")]
         fn vcvt_n_f32_s32_(a: int32x2_t, n: i32) -> float32x2_t;
     }
 vcvt_n_f32_s32_(a, N)
@@ -3142,7 +3142,7 @@ pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32")]
         fn vcvtq_n_f32_s32_(a: int32x4_t, n: i32) -> float32x4_t;
     }
 vcvtq_n_f32_s32_(a, N)
@@ -3180,7 +3180,7 @@ pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32")]
         fn vcvt_n_f32_u32_(a: uint32x2_t, n: i32) -> float32x2_t;
     }
 vcvt_n_f32_u32_(a, N)
@@ -3218,7 +3218,7 @@ pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32")]
         fn vcvtq_n_f32_u32_(a: uint32x4_t, n: i32) -> float32x4_t;
     }
 vcvtq_n_f32_u32_(a, N)
@@ -3256,7 +3256,7 @@ pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32")]
         fn vcvt_n_s32_f32_(a: float32x2_t, n: i32) -> int32x2_t;
     }
 vcvt_n_s32_f32_(a, N)
@@ -3294,7 +3294,7 @@ pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32")]
         fn vcvtq_n_s32_f32_(a: float32x4_t, n: i32) -> int32x4_t;
     }
 vcvtq_n_s32_f32_(a, N)
@@ -3332,7 +3332,7 @@ pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32")]
         fn vcvt_n_u32_f32_(a: float32x2_t, n: i32) -> uint32x2_t;
     }
 vcvt_n_u32_f32_(a, N)
@@ -3370,7 +3370,7 @@ pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32")]
         fn vcvtq_n_u32_f32_(a: float32x4_t, n: i32) -> uint32x4_t;
     }
 vcvtq_n_u32_f32_(a, N)
@@ -3383,14 +3383,14 @@ vcvtq_n_u32_f32_(a, N)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcvtzs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v2i32.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptosi.sat.v2i32.v2f32")]
         fn vcvt_s32_f32_(a: float32x2_t) -> int32x2_t;
     }
 vcvt_s32_f32_(a)
@@ -3403,14 +3403,14 @@ vcvt_s32_f32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcvtzs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v4i32.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptosi.sat.v4i32.v4f32")]
         fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t;
     }
 vcvtq_s32_f32_(a)
@@ -3423,14 +3423,14 @@ vcvtq_s32_f32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcvtzu))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v2i32.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v2i32.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptoui.sat.v2i32.v2f32")]
         fn vcvt_u32_f32_(a: float32x2_t) -> uint32x2_t;
     }
 vcvt_u32_f32_(a)
@@ -3443,14 +3443,14 @@ vcvt_u32_f32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcvtzu))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v4i32.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v4i32.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptoui.sat.v4i32.v4f32")]
         fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t;
     }
 vcvtq_u32_f32_(a)
@@ -3463,7 +3463,7 @@ vcvtq_u32_f32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3479,7 +3479,7 @@ pub unsafe fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 8))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3495,7 +3495,7 @@ pub unsafe fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3511,7 +3511,7 @@ pub unsafe fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3527,7 +3527,7 @@ pub unsafe fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3543,7 +3543,7 @@ pub unsafe fn vdup_lane_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3559,7 +3559,7 @@ pub unsafe fn vdupq_laneq_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 8))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3575,7 +3575,7 @@ pub unsafe fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3591,7 +3591,7 @@ pub unsafe fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3607,7 +3607,7 @@ pub unsafe fn vdup_laneq_s32<const N: i32>(a: int32x4_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3623,7 +3623,7 @@ pub unsafe fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3639,7 +3639,7 @@ pub unsafe fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3655,7 +3655,7 @@ pub unsafe fn vdupq_lane_s32<const N: i32>(a: int32x2_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3671,7 +3671,7 @@ pub unsafe fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 8))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3687,7 +3687,7 @@ pub unsafe fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3703,7 +3703,7 @@ pub unsafe fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3719,7 +3719,7 @@ pub unsafe fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3735,7 +3735,7 @@ pub unsafe fn vdup_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3751,7 +3751,7 @@ pub unsafe fn vdupq_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 8))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3767,7 +3767,7 @@ pub unsafe fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3783,7 +3783,7 @@ pub unsafe fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3799,7 +3799,7 @@ pub unsafe fn vdup_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3815,7 +3815,7 @@ pub unsafe fn vdupq_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3831,7 +3831,7 @@ pub unsafe fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3847,7 +3847,7 @@ pub unsafe fn vdupq_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3863,7 +3863,7 @@ pub unsafe fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 8))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3879,7 +3879,7 @@ pub unsafe fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3895,7 +3895,7 @@ pub unsafe fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3911,7 +3911,7 @@ pub unsafe fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 8))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3927,7 +3927,7 @@ pub unsafe fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3943,7 +3943,7 @@ pub unsafe fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3959,7 +3959,7 @@ pub unsafe fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3975,7 +3975,7 @@ pub unsafe fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -3991,7 +3991,7 @@ pub unsafe fn vdupq_laneq_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 0))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4007,7 +4007,7 @@ pub unsafe fn vdupq_lane_s64<const N: i32>(a: int64x1_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4023,7 +4023,7 @@ pub unsafe fn vdupq_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 0))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4039,7 +4039,7 @@ pub unsafe fn vdupq_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4055,7 +4055,7 @@ pub unsafe fn vdup_lane_f32<const N: i32>(a: float32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4071,7 +4071,7 @@ pub unsafe fn vdupq_laneq_f32<const N: i32>(a: float32x4_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4087,7 +4087,7 @@ pub unsafe fn vdup_laneq_f32<const N: i32>(a: float32x4_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4103,7 +4103,7 @@ pub unsafe fn vdupq_lane_f32<const N: i32>(a: float32x2_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, N = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, N = 0))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4119,7 +4119,7 @@ pub unsafe fn vdup_lane_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, N = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, N = 0))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4135,7 +4135,7 @@ pub unsafe fn vdup_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, N = 1))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4151,7 +4151,7 @@ pub unsafe fn vdup_laneq_s64<const N: i32>(a: int64x2_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, N = 1))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4167,7 +4167,7 @@ pub unsafe fn vdup_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 7))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 7))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4193,7 +4193,7 @@ pub unsafe fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 15))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 15))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4227,7 +4227,7 @@ pub unsafe fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 3))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4249,7 +4249,7 @@ pub unsafe fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 7))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 7))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4275,7 +4275,7 @@ pub unsafe fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4295,7 +4295,7 @@ pub unsafe fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 3))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4317,7 +4317,7 @@ pub unsafe fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 7))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 7))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4343,7 +4343,7 @@ pub unsafe fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 15))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 15))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4377,7 +4377,7 @@ pub unsafe fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 3))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4399,7 +4399,7 @@ pub unsafe fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 7))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 7))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4425,7 +4425,7 @@ pub unsafe fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4445,7 +4445,7 @@ pub unsafe fn vext_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 3))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4467,7 +4467,7 @@ pub unsafe fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 7))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 7))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4493,7 +4493,7 @@ pub unsafe fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 15))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 15))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4527,7 +4527,7 @@ pub unsafe fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 3))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4549,7 +4549,7 @@ pub unsafe fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 7))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 7))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4575,7 +4575,7 @@ pub unsafe fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4595,7 +4595,7 @@ pub unsafe fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4615,7 +4615,7 @@ pub unsafe fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4635,7 +4635,7 @@ pub unsafe fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 3))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -4657,7 +4657,7 @@ pub unsafe fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
@@ -4671,7 +4671,7 @@ pub unsafe fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
@@ -4685,7 +4685,7 @@ pub unsafe fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
@@ -4699,7 +4699,7 @@ pub unsafe fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
@@ -4713,7 +4713,7 @@ pub unsafe fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
@@ -4727,7 +4727,7 @@ pub unsafe fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
@@ -4741,7 +4741,7 @@ pub unsafe fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
@@ -4755,7 +4755,7 @@ pub unsafe fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
@@ -4769,7 +4769,7 @@ pub unsafe fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
@@ -4783,7 +4783,7 @@ pub unsafe fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
@@ -4797,7 +4797,7 @@ pub unsafe fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
@@ -4811,7 +4811,7 @@ pub unsafe fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
@@ -4825,7 +4825,7 @@ pub unsafe fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
@@ -4839,7 +4839,7 @@ pub unsafe fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float3
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
@@ -4853,7 +4853,7 @@ pub unsafe fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
@@ -4867,7 +4867,7 @@ pub unsafe fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
@@ -4881,7 +4881,7 @@ pub unsafe fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
@@ -4895,7 +4895,7 @@ pub unsafe fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
@@ -4909,7 +4909,7 @@ pub unsafe fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
@@ -4923,7 +4923,7 @@ pub unsafe fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
@@ -4937,7 +4937,7 @@ pub unsafe fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
@@ -4951,7 +4951,7 @@ pub unsafe fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
@@ -4965,7 +4965,7 @@ pub unsafe fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
@@ -4979,7 +4979,7 @@ pub unsafe fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
@@ -4993,7 +4993,7 @@ pub unsafe fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5009,7 +5009,7 @@ pub unsafe fn vmla_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int1
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5025,7 +5025,7 @@ pub unsafe fn vmla_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5041,7 +5041,7 @@ pub unsafe fn vmlaq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5057,7 +5057,7 @@ pub unsafe fn vmlaq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: in
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5073,7 +5073,7 @@ pub unsafe fn vmla_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int3
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5089,7 +5089,7 @@ pub unsafe fn vmla_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5105,7 +5105,7 @@ pub unsafe fn vmlaq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5121,7 +5121,7 @@ pub unsafe fn vmlaq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: in
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5137,7 +5137,7 @@ pub unsafe fn vmla_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: ui
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5153,7 +5153,7 @@ pub unsafe fn vmla_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5169,7 +5169,7 @@ pub unsafe fn vmlaq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5185,7 +5185,7 @@ pub unsafe fn vmlaq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5201,7 +5201,7 @@ pub unsafe fn vmla_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: ui
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5217,7 +5217,7 @@ pub unsafe fn vmla_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5233,7 +5233,7 @@ pub unsafe fn vmlaq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5249,7 +5249,7 @@ pub unsafe fn vmlaq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5265,7 +5265,7 @@ pub unsafe fn vmla_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5281,7 +5281,7 @@ pub unsafe fn vmla_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5297,7 +5297,7 @@ pub unsafe fn vmlaq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5313,7 +5313,7 @@ pub unsafe fn vmlaq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
@@ -5327,7 +5327,7 @@ pub unsafe fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
@@ -5341,7 +5341,7 @@ pub unsafe fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
@@ -5355,7 +5355,7 @@ pub unsafe fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
@@ -5369,7 +5369,7 @@ pub unsafe fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
@@ -5383,7 +5383,7 @@ pub unsafe fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
@@ -5397,7 +5397,7 @@ pub unsafe fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
@@ -5411,7 +5411,7 @@ pub unsafe fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
@@ -5425,7 +5425,7 @@ pub unsafe fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
@@ -5439,7 +5439,7 @@ pub unsafe fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
@@ -5453,7 +5453,7 @@ pub unsafe fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5469,7 +5469,7 @@ pub unsafe fn vmlal_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5485,7 +5485,7 @@ pub unsafe fn vmlal_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: in
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5501,7 +5501,7 @@ pub unsafe fn vmlal_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5517,7 +5517,7 @@ pub unsafe fn vmlal_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: in
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5533,7 +5533,7 @@ pub unsafe fn vmlal_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5549,7 +5549,7 @@ pub unsafe fn vmlal_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5565,7 +5565,7 @@ pub unsafe fn vmlal_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5581,7 +5581,7 @@ pub unsafe fn vmlal_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
@@ -5595,7 +5595,7 @@ pub unsafe fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
@@ -5609,7 +5609,7 @@ pub unsafe fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
@@ -5623,7 +5623,7 @@ pub unsafe fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
@@ -5637,7 +5637,7 @@ pub unsafe fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
@@ -5651,7 +5651,7 @@ pub unsafe fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
@@ -5665,7 +5665,7 @@ pub unsafe fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
@@ -5679,7 +5679,7 @@ pub unsafe fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
@@ -5693,7 +5693,7 @@ pub unsafe fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
@@ -5707,7 +5707,7 @@ pub unsafe fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
@@ -5721,7 +5721,7 @@ pub unsafe fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
@@ -5735,7 +5735,7 @@ pub unsafe fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
@@ -5749,7 +5749,7 @@ pub unsafe fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
@@ -5763,7 +5763,7 @@ pub unsafe fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float3
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
@@ -5777,7 +5777,7 @@ pub unsafe fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
@@ -5791,7 +5791,7 @@ pub unsafe fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
@@ -5805,7 +5805,7 @@ pub unsafe fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
@@ -5819,7 +5819,7 @@ pub unsafe fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
@@ -5833,7 +5833,7 @@ pub unsafe fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
@@ -5847,7 +5847,7 @@ pub unsafe fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
@@ -5861,7 +5861,7 @@ pub unsafe fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
@@ -5875,7 +5875,7 @@ pub unsafe fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
@@ -5889,7 +5889,7 @@ pub unsafe fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
@@ -5903,7 +5903,7 @@ pub unsafe fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
@@ -5917,7 +5917,7 @@ pub unsafe fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5933,7 +5933,7 @@ pub unsafe fn vmls_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int1
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5949,7 +5949,7 @@ pub unsafe fn vmls_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5965,7 +5965,7 @@ pub unsafe fn vmlsq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5981,7 +5981,7 @@ pub unsafe fn vmlsq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: in
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -5997,7 +5997,7 @@ pub unsafe fn vmls_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int3
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6013,7 +6013,7 @@ pub unsafe fn vmls_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6029,7 +6029,7 @@ pub unsafe fn vmlsq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6045,7 +6045,7 @@ pub unsafe fn vmlsq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: in
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6061,7 +6061,7 @@ pub unsafe fn vmls_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: ui
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6077,7 +6077,7 @@ pub unsafe fn vmls_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6093,7 +6093,7 @@ pub unsafe fn vmlsq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6109,7 +6109,7 @@ pub unsafe fn vmlsq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6125,7 +6125,7 @@ pub unsafe fn vmls_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: ui
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6141,7 +6141,7 @@ pub unsafe fn vmls_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6157,7 +6157,7 @@ pub unsafe fn vmlsq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6173,7 +6173,7 @@ pub unsafe fn vmlsq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6189,7 +6189,7 @@ pub unsafe fn vmls_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6205,7 +6205,7 @@ pub unsafe fn vmls_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6221,7 +6221,7 @@ pub unsafe fn vmlsq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6237,7 +6237,7 @@ pub unsafe fn vmlsq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
@@ -6251,7 +6251,7 @@ pub unsafe fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
@@ -6265,7 +6265,7 @@ pub unsafe fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
@@ -6279,7 +6279,7 @@ pub unsafe fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
@@ -6293,7 +6293,7 @@ pub unsafe fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
@@ -6307,7 +6307,7 @@ pub unsafe fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
@@ -6321,7 +6321,7 @@ pub unsafe fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
@@ -6335,7 +6335,7 @@ pub unsafe fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
@@ -6349,7 +6349,7 @@ pub unsafe fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
@@ -6363,7 +6363,7 @@ pub unsafe fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
@@ -6377,7 +6377,7 @@ pub unsafe fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6393,7 +6393,7 @@ pub unsafe fn vmlsl_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6409,7 +6409,7 @@ pub unsafe fn vmlsl_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: in
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6425,7 +6425,7 @@ pub unsafe fn vmlsl_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6441,7 +6441,7 @@ pub unsafe fn vmlsl_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: in
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6457,7 +6457,7 @@ pub unsafe fn vmlsl_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6473,7 +6473,7 @@ pub unsafe fn vmlsl_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6489,7 +6489,7 @@ pub unsafe fn vmlsl_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -6505,7 +6505,7 @@ pub unsafe fn vmlsl_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c:
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(neg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vneg_s8(a: int8x8_t) -> int8x8_t {
@@ -6519,7 +6519,7 @@ pub unsafe fn vneg_s8(a: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(neg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vnegq_s8(a: int8x16_t) -> int8x16_t {
@@ -6533,7 +6533,7 @@ pub unsafe fn vnegq_s8(a: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(neg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vneg_s16(a: int16x4_t) -> int16x4_t {
@@ -6547,7 +6547,7 @@ pub unsafe fn vneg_s16(a: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(neg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vnegq_s16(a: int16x8_t) -> int16x8_t {
@@ -6561,7 +6561,7 @@ pub unsafe fn vnegq_s16(a: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(neg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vneg_s32(a: int32x2_t) -> int32x2_t {
@@ -6575,7 +6575,7 @@ pub unsafe fn vneg_s32(a: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(neg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vnegq_s32(a: int32x4_t) -> int32x4_t {
@@ -6589,7 +6589,7 @@ pub unsafe fn vnegq_s32(a: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fneg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fneg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vneg_f32(a: float32x2_t) -> float32x2_t {
@@ -6603,7 +6603,7 @@ pub unsafe fn vneg_f32(a: float32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fneg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fneg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vnegq_f32(a: float32x4_t) -> float32x4_t {
@@ -6617,14 +6617,14 @@ pub unsafe fn vnegq_f32(a: float32x4_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqneg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqneg_s8(a: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v8i8")]
         fn vqneg_s8_(a: int8x8_t) -> int8x8_t;
     }
 vqneg_s8_(a)
@@ -6637,14 +6637,14 @@ vqneg_s8_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqneg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v16i8")]
         fn vqnegq_s8_(a: int8x16_t) -> int8x16_t;
     }
 vqnegq_s8_(a)
@@ -6657,14 +6657,14 @@ vqnegq_s8_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqneg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqneg_s16(a: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v4i16")]
         fn vqneg_s16_(a: int16x4_t) -> int16x4_t;
     }
 vqneg_s16_(a)
@@ -6677,14 +6677,14 @@ vqneg_s16_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqneg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v8i16")]
         fn vqnegq_s16_(a: int16x8_t) -> int16x8_t;
     }
 vqnegq_s16_(a)
@@ -6697,14 +6697,14 @@ vqnegq_s16_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqneg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqneg_s32(a: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v2i32")]
         fn vqneg_s32_(a: int32x2_t) -> int32x2_t;
     }
 vqneg_s32_(a)
@@ -6717,14 +6717,14 @@ vqneg_s32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqneg))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v4i32")]
         fn vqnegq_s32_(a: int32x4_t) -> int32x4_t;
     }
 vqnegq_s32_(a)
@@ -6737,14 +6737,14 @@ vqnegq_s32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v8i8")]
         fn vqsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
     }
 vqsub_u8_(a, b)
@@ -6757,14 +6757,14 @@ vqsub_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v16i8")]
         fn vqsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
     }
 vqsubq_u8_(a, b)
@@ -6777,14 +6777,14 @@ vqsubq_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v4i16")]
         fn vqsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
     }
 vqsub_u16_(a, b)
@@ -6797,14 +6797,14 @@ vqsub_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v8i16")]
         fn vqsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
     }
 vqsubq_u16_(a, b)
@@ -6817,14 +6817,14 @@ vqsubq_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v2i32")]
         fn vqsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
     }
 vqsub_u32_(a, b)
@@ -6837,14 +6837,14 @@ vqsub_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v4i32")]
         fn vqsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
     }
 vqsubq_u32_(a, b)
@@ -6857,14 +6857,14 @@ vqsubq_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v1i64")]
         fn vqsub_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
     }
 vqsub_u64_(a, b)
@@ -6877,14 +6877,14 @@ vqsub_u64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v2i64")]
         fn vqsubq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
     }
 vqsubq_u64_(a, b)
@@ -6897,14 +6897,14 @@ vqsubq_u64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v8i8")]
         fn vqsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
 vqsub_s8_(a, b)
@@ -6917,14 +6917,14 @@ vqsub_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v16i8")]
         fn vqsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
 vqsubq_s8_(a, b)
@@ -6937,14 +6937,14 @@ vqsubq_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v4i16")]
         fn vqsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vqsub_s16_(a, b)
@@ -6957,14 +6957,14 @@ vqsub_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v8i16")]
         fn vqsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vqsubq_s16_(a, b)
@@ -6977,14 +6977,14 @@ vqsubq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v2i32")]
         fn vqsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vqsub_s32_(a, b)
@@ -6997,14 +6997,14 @@ vqsub_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v4i32")]
         fn vqsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vqsubq_s32_(a, b)
@@ -7017,14 +7017,14 @@ vqsubq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v1i64")]
         fn vqsub_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
 vqsub_s64_(a, b)
@@ -7037,14 +7037,14 @@ vqsub_s64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v2i64")]
         fn vqsubq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
 vqsubq_s64_(a, b)
@@ -7057,14 +7057,14 @@ vqsubq_s64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhadd.v8i8")]
         fn vhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
     }
 vhadd_u8_(a, b)
@@ -7077,14 +7077,14 @@ vhadd_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhadd.v16i8")]
         fn vhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
     }
 vhaddq_u8_(a, b)
@@ -7097,14 +7097,14 @@ vhaddq_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhadd.v4i16")]
         fn vhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
     }
 vhadd_u16_(a, b)
@@ -7117,14 +7117,14 @@ vhadd_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhadd.v8i16")]
         fn vhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
     }
 vhaddq_u16_(a, b)
@@ -7137,14 +7137,14 @@ vhaddq_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhadd.v2i32")]
         fn vhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
     }
 vhadd_u32_(a, b)
@@ -7157,14 +7157,14 @@ vhadd_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhadd.v4i32")]
         fn vhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
     }
 vhaddq_u32_(a, b)
@@ -7177,14 +7177,14 @@ vhaddq_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shadd.v8i8")]
         fn vhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
 vhadd_s8_(a, b)
@@ -7197,14 +7197,14 @@ vhadd_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shadd.v16i8")]
         fn vhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
 vhaddq_s8_(a, b)
@@ -7217,14 +7217,14 @@ vhaddq_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shadd.v4i16")]
         fn vhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vhadd_s16_(a, b)
@@ -7237,14 +7237,14 @@ vhadd_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shadd.v8i16")]
         fn vhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vhaddq_s16_(a, b)
@@ -7257,14 +7257,14 @@ vhaddq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shadd.v2i32")]
         fn vhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vhadd_s32_(a, b)
@@ -7277,14 +7277,14 @@ vhadd_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shadd.v4i32")]
         fn vhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vhaddq_s32_(a, b)
@@ -7297,14 +7297,14 @@ vhaddq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urhadd.v8i8")]
         fn vrhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
     }
 vrhadd_u8_(a, b)
@@ -7317,14 +7317,14 @@ vrhadd_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urhadd.v16i8")]
         fn vrhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
     }
 vrhaddq_u8_(a, b)
@@ -7337,14 +7337,14 @@ vrhaddq_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urhadd.v4i16")]
         fn vrhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
     }
 vrhadd_u16_(a, b)
@@ -7357,14 +7357,14 @@ vrhadd_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urhadd.v8i16")]
         fn vrhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
     }
 vrhaddq_u16_(a, b)
@@ -7377,14 +7377,14 @@ vrhaddq_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urhadd.v2i32")]
         fn vrhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
     }
 vrhadd_u32_(a, b)
@@ -7397,14 +7397,14 @@ vrhadd_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urhadd.v4i32")]
         fn vrhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
     }
 vrhaddq_u32_(a, b)
@@ -7417,14 +7417,14 @@ vrhaddq_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srhadd.v8i8")]
         fn vrhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
 vrhadd_s8_(a, b)
@@ -7437,14 +7437,14 @@ vrhadd_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srhadd.v16i8")]
         fn vrhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
 vrhaddq_s8_(a, b)
@@ -7457,14 +7457,14 @@ vrhaddq_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srhadd.v4i16")]
         fn vrhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vrhadd_s16_(a, b)
@@ -7477,14 +7477,14 @@ vrhadd_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srhadd.v8i16")]
         fn vrhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vrhaddq_s16_(a, b)
@@ -7497,14 +7497,14 @@ vrhaddq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srhadd.v2i32")]
         fn vrhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vrhadd_s32_(a, b)
@@ -7517,14 +7517,14 @@ vrhadd_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srhadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srhadd.v4i32")]
         fn vrhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vrhaddq_s32_(a, b)
@@ -7537,14 +7537,14 @@ vrhaddq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frintn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frintn.v2f32")]
         fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
     }
 vrndn_f32_(a)
@@ -7557,14 +7557,14 @@ vrndn_f32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frintn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frintn.v4f32")]
         fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
     }
 vrndnq_f32_(a)
@@ -7577,14 +7577,14 @@ vrndnq_f32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v8i8")]
         fn vqadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
     }
 vqadd_u8_(a, b)
@@ -7597,14 +7597,14 @@ vqadd_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v16i8")]
         fn vqaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
     }
 vqaddq_u8_(a, b)
@@ -7617,14 +7617,14 @@ vqaddq_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v4i16")]
         fn vqadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
     }
 vqadd_u16_(a, b)
@@ -7637,14 +7637,14 @@ vqadd_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v8i16")]
         fn vqaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
     }
 vqaddq_u16_(a, b)
@@ -7657,14 +7657,14 @@ vqaddq_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v2i32")]
         fn vqadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
     }
 vqadd_u32_(a, b)
@@ -7677,14 +7677,14 @@ vqadd_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v4i32")]
         fn vqaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
     }
 vqaddq_u32_(a, b)
@@ -7697,14 +7697,14 @@ vqaddq_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v1i64")]
         fn vqadd_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
     }
 vqadd_u64_(a, b)
@@ -7717,14 +7717,14 @@ vqadd_u64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v2i64")]
         fn vqaddq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
     }
 vqaddq_u64_(a, b)
@@ -7737,14 +7737,14 @@ vqaddq_u64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v8i8")]
         fn vqadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
 vqadd_s8_(a, b)
@@ -7757,14 +7757,14 @@ vqadd_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v16i8")]
         fn vqaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
 vqaddq_s8_(a, b)
@@ -7777,14 +7777,14 @@ vqaddq_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v4i16")]
         fn vqadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vqadd_s16_(a, b)
@@ -7797,14 +7797,14 @@ vqadd_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v8i16")]
         fn vqaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vqaddq_s16_(a, b)
@@ -7817,14 +7817,14 @@ vqaddq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v2i32")]
         fn vqadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vqadd_s32_(a, b)
@@ -7837,14 +7837,14 @@ vqadd_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v4i32")]
         fn vqaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vqaddq_s32_(a, b)
@@ -7857,14 +7857,14 @@ vqaddq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v1i64")]
         fn vqadd_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
 vqadd_s64_(a, b)
@@ -7877,14 +7877,14 @@ vqadd_s64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v2i64")]
         fn vqaddq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
 vqaddq_s64_(a, b)
@@ -7897,14 +7897,14 @@ vqaddq_s64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i8.p0i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v8i8.p0i8")]
         fn vld1_s8_x2_(a: *const i8) -> int8x8x2_t;
     }
 vld1_s8_x2_(a)
@@ -7917,14 +7917,14 @@ vld1_s8_x2_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i16.p0i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v4i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v4i16.p0i16")]
         fn vld1_s16_x2_(a: *const i16) -> int16x4x2_t;
     }
 vld1_s16_x2_(a)
@@ -7937,14 +7937,14 @@ vld1_s16_x2_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i32.p0i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v2i32.p0i32")]
         fn vld1_s32_x2_(a: *const i32) -> int32x2x2_t;
     }
 vld1_s32_x2_(a)
@@ -7957,14 +7957,14 @@ vld1_s32_x2_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v1i64.p0i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v1i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v1i64.p0i64")]
         fn vld1_s64_x2_(a: *const i64) -> int64x1x2_t;
     }
 vld1_s64_x2_(a)
@@ -7977,14 +7977,14 @@ vld1_s64_x2_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v16i8.p0i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v16i8.p0i8")]
         fn vld1q_s8_x2_(a: *const i8) -> int8x16x2_t;
     }
 vld1q_s8_x2_(a)
@@ -7997,14 +7997,14 @@ vld1q_s8_x2_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i16.p0i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v8i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v8i16.p0i16")]
         fn vld1q_s16_x2_(a: *const i16) -> int16x8x2_t;
     }
 vld1q_s16_x2_(a)
@@ -8017,14 +8017,14 @@ vld1q_s16_x2_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i32.p0i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v4i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v4i32.p0i32")]
         fn vld1q_s32_x2_(a: *const i32) -> int32x4x2_t;
     }
 vld1q_s32_x2_(a)
@@ -8037,14 +8037,14 @@ vld1q_s32_x2_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i64.p0i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v2i64.p0i64")]
         fn vld1q_s64_x2_(a: *const i64) -> int64x2x2_t;
     }
 vld1q_s64_x2_(a)
@@ -8057,14 +8057,14 @@ vld1q_s64_x2_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i8.p0i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v8i8.p0i8")]
         fn vld1_s8_x3_(a: *const i8) -> int8x8x3_t;
     }
 vld1_s8_x3_(a)
@@ -8077,14 +8077,14 @@ vld1_s8_x3_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i16.p0i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v4i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v4i16.p0i16")]
         fn vld1_s16_x3_(a: *const i16) -> int16x4x3_t;
     }
 vld1_s16_x3_(a)
@@ -8097,14 +8097,14 @@ vld1_s16_x3_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i32.p0i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v2i32.p0i32")]
         fn vld1_s32_x3_(a: *const i32) -> int32x2x3_t;
     }
 vld1_s32_x3_(a)
@@ -8117,14 +8117,14 @@ vld1_s32_x3_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v1i64.p0i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v1i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v1i64.p0i64")]
         fn vld1_s64_x3_(a: *const i64) -> int64x1x3_t;
     }
 vld1_s64_x3_(a)
@@ -8137,14 +8137,14 @@ vld1_s64_x3_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v16i8.p0i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v16i8.p0i8")]
         fn vld1q_s8_x3_(a: *const i8) -> int8x16x3_t;
     }
 vld1q_s8_x3_(a)
@@ -8157,14 +8157,14 @@ vld1q_s8_x3_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i16.p0i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v8i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v8i16.p0i16")]
         fn vld1q_s16_x3_(a: *const i16) -> int16x8x3_t;
     }
 vld1q_s16_x3_(a)
@@ -8177,14 +8177,14 @@ vld1q_s16_x3_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i32.p0i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v4i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v4i32.p0i32")]
         fn vld1q_s32_x3_(a: *const i32) -> int32x4x3_t;
     }
 vld1q_s32_x3_(a)
@@ -8197,14 +8197,14 @@ vld1q_s32_x3_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i64.p0i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v2i64.p0i64")]
         fn vld1q_s64_x3_(a: *const i64) -> int64x2x3_t;
     }
 vld1q_s64_x3_(a)
@@ -8217,14 +8217,14 @@ vld1q_s64_x3_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i8.p0i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v8i8.p0i8")]
         fn vld1_s8_x4_(a: *const i8) -> int8x8x4_t;
     }
 vld1_s8_x4_(a)
@@ -8237,14 +8237,14 @@ vld1_s8_x4_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i16.p0i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v4i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v4i16.p0i16")]
         fn vld1_s16_x4_(a: *const i16) -> int16x4x4_t;
     }
 vld1_s16_x4_(a)
@@ -8257,14 +8257,14 @@ vld1_s16_x4_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i32.p0i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v2i32.p0i32")]
         fn vld1_s32_x4_(a: *const i32) -> int32x2x4_t;
     }
 vld1_s32_x4_(a)
@@ -8277,14 +8277,14 @@ vld1_s32_x4_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v1i64.p0i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v1i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v1i64.p0i64")]
         fn vld1_s64_x4_(a: *const i64) -> int64x1x4_t;
     }
 vld1_s64_x4_(a)
@@ -8297,14 +8297,14 @@ vld1_s64_x4_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v16i8.p0i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v16i8.p0i8")]
         fn vld1q_s8_x4_(a: *const i8) -> int8x16x4_t;
     }
 vld1q_s8_x4_(a)
@@ -8317,14 +8317,14 @@ vld1q_s8_x4_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i16.p0i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v8i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v8i16.p0i16")]
         fn vld1q_s16_x4_(a: *const i16) -> int16x8x4_t;
     }
 vld1q_s16_x4_(a)
@@ -8337,14 +8337,14 @@ vld1q_s16_x4_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i32.p0i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v4i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v4i32.p0i32")]
         fn vld1q_s32_x4_(a: *const i32) -> int32x4x4_t;
     }
 vld1q_s32_x4_(a)
@@ -8357,14 +8357,14 @@ vld1q_s32_x4_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i64.p0i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v2i64.p0i64")]
         fn vld1q_s64_x4_(a: *const i64) -> int64x2x4_t;
     }
 vld1q_s64_x4_(a)
@@ -8377,7 +8377,7 @@ vld1q_s64_x4_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
@@ -8391,7 +8391,7 @@ pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
@@ -8405,7 +8405,7 @@ pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
@@ -8419,7 +8419,7 @@ pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
@@ -8433,7 +8433,7 @@ pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
@@ -8447,7 +8447,7 @@ pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
@@ -8461,7 +8461,7 @@ pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
@@ -8475,7 +8475,7 @@ pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
@@ -8489,7 +8489,7 @@ pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
@@ -8503,7 +8503,7 @@ pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
@@ -8517,7 +8517,7 @@ pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
@@ -8531,7 +8531,7 @@ pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
@@ -8545,7 +8545,7 @@ pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
@@ -8559,7 +8559,7 @@ pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
@@ -8573,7 +8573,7 @@ pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
@@ -8587,7 +8587,7 @@ pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
@@ -8601,7 +8601,7 @@ pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
@@ -8615,7 +8615,7 @@ pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
@@ -8629,7 +8629,7 @@ pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
@@ -8643,7 +8643,7 @@ pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t {
@@ -8657,7 +8657,7 @@ pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
@@ -8671,7 +8671,7 @@ pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
@@ -8685,7 +8685,7 @@ pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
@@ -8699,7 +8699,7 @@ pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
@@ -8713,7 +8713,7 @@ pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
@@ -8727,7 +8727,7 @@ pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
@@ -8741,7 +8741,7 @@ pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
@@ -8755,7 +8755,7 @@ pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
@@ -8769,7 +8769,7 @@ pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
@@ -8783,7 +8783,7 @@ pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
@@ -8797,7 +8797,7 @@ pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
@@ -8811,7 +8811,7 @@ pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
@@ -8825,7 +8825,7 @@ pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
@@ -8839,7 +8839,7 @@ pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
@@ -8853,7 +8853,7 @@ pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
@@ -8867,7 +8867,7 @@ pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
@@ -8881,7 +8881,7 @@ pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
@@ -8895,7 +8895,7 @@ pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
@@ -8909,7 +8909,7 @@ pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
@@ -8923,7 +8923,7 @@ pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
@@ -8937,7 +8937,7 @@ pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
@@ -8951,7 +8951,7 @@ pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
@@ -8965,14 +8965,14 @@ pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2f32.p0f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v2f32.p0f32")]
         fn vld1_f32_x2_(a: *const f32) -> float32x2x2_t;
     }
 vld1_f32_x2_(a)
@@ -8985,14 +8985,14 @@ vld1_f32_x2_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4f32.p0f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v4f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v4f32.p0f32")]
         fn vld1q_f32_x2_(a: *const f32) -> float32x4x2_t;
     }
 vld1q_f32_x2_(a)
@@ -9005,14 +9005,14 @@ vld1q_f32_x2_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2f32.p0f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v2f32.p0f32")]
         fn vld1_f32_x3_(a: *const f32) -> float32x2x3_t;
     }
 vld1_f32_x3_(a)
@@ -9025,14 +9025,14 @@ vld1_f32_x3_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4f32.p0f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v4f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v4f32.p0f32")]
         fn vld1q_f32_x3_(a: *const f32) -> float32x4x3_t;
     }
 vld1q_f32_x3_(a)
@@ -9045,14 +9045,14 @@ vld1q_f32_x3_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2f32.p0f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v2f32.p0f32")]
         fn vld1_f32_x4_(a: *const f32) -> float32x2x4_t;
     }
 vld1_f32_x4_(a)
@@ -9065,14 +9065,14 @@ vld1_f32_x4_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4f32.p0f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v4f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v4f32.p0f32")]
         fn vld1q_f32_x4_(a: *const f32) -> float32x4x4_t;
     }
 vld1q_f32_x4_(a)
@@ -9106,7 +9106,7 @@ vld2_s8_(a as *const i8, 1)
 pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v8i8.p0v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v8i8.p0v8i8")]
         fn vld2_s8_(ptr: *const int8x8_t) -> int8x8x2_t;
     }
 vld2_s8_(a as _)
@@ -9140,7 +9140,7 @@ vld2_s16_(a as *const i8, 2)
 pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v4i16.p0v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v4i16.p0v4i16")]
         fn vld2_s16_(ptr: *const int16x4_t) -> int16x4x2_t;
     }
 vld2_s16_(a as _)
@@ -9174,7 +9174,7 @@ vld2_s32_(a as *const i8, 4)
 pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v2i32.p0v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v2i32.p0v2i32")]
         fn vld2_s32_(ptr: *const int32x2_t) -> int32x2x2_t;
     }
 vld2_s32_(a as _)
@@ -9208,7 +9208,7 @@ vld2q_s8_(a as *const i8, 1)
 pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v16i8.p0v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v16i8.p0v16i8")]
         fn vld2q_s8_(ptr: *const int8x16_t) -> int8x16x2_t;
     }
 vld2q_s8_(a as _)
@@ -9242,7 +9242,7 @@ vld2q_s16_(a as *const i8, 2)
 pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v8i16.p0v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v8i16.p0v8i16")]
         fn vld2q_s16_(ptr: *const int16x8_t) -> int16x8x2_t;
     }
 vld2q_s16_(a as _)
@@ -9276,7 +9276,7 @@ vld2q_s32_(a as *const i8, 4)
 pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v4i32.p0v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v4i32.p0v4i32")]
         fn vld2q_s32_(ptr: *const int32x4_t) -> int32x4x2_t;
     }
 vld2q_s32_(a as _)
@@ -9310,7 +9310,7 @@ vld2_s64_(a as *const i8, 8)
 pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v1i64.p0v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v1i64.p0v1i64")]
         fn vld2_s64_(ptr: *const int64x1_t) -> int64x1x2_t;
     }
 vld2_s64_(a as _)
@@ -9323,7 +9323,7 @@ vld2_s64_(a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
@@ -9337,7 +9337,7 @@ pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
@@ -9351,7 +9351,7 @@ pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
@@ -9365,7 +9365,7 @@ pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
@@ -9379,7 +9379,7 @@ pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
@@ -9393,7 +9393,7 @@ pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
@@ -9407,7 +9407,7 @@ pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
@@ -9421,7 +9421,7 @@ pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
@@ -9435,7 +9435,7 @@ pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
@@ -9449,7 +9449,7 @@ pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
@@ -9463,7 +9463,7 @@ pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t {
@@ -9477,7 +9477,7 @@ pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t {
@@ -9512,7 +9512,7 @@ vld2_f32_(a as *const i8, 4)
 pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v2f32.p0v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v2f32.p0v2f32")]
         fn vld2_f32_(ptr: *const float32x2_t) -> float32x2x2_t;
     }
 vld2_f32_(a as _)
@@ -9546,7 +9546,7 @@ vld2q_f32_(a as *const i8, 4)
 pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v4f32.p0v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v4f32.p0v4f32")]
         fn vld2q_f32_(ptr: *const float32x4_t) -> float32x4x2_t;
     }
 vld2q_f32_(a as _)
@@ -9580,7 +9580,7 @@ vld2_dup_s8_(a as *const i8, 1)
 pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v8i8.p0i8")]
         fn vld2_dup_s8_(ptr: *const i8) -> int8x8x2_t;
     }
 vld2_dup_s8_(a as _)
@@ -9614,7 +9614,7 @@ vld2_dup_s16_(a as *const i8, 2)
 pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v4i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v4i16.p0i16")]
         fn vld2_dup_s16_(ptr: *const i16) -> int16x4x2_t;
     }
 vld2_dup_s16_(a as _)
@@ -9648,7 +9648,7 @@ vld2_dup_s32_(a as *const i8, 4)
 pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v2i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v2i32.p0i32")]
         fn vld2_dup_s32_(ptr: *const i32) -> int32x2x2_t;
     }
 vld2_dup_s32_(a as _)
@@ -9682,7 +9682,7 @@ vld2q_dup_s8_(a as *const i8, 1)
 pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v16i8.p0i8")]
         fn vld2q_dup_s8_(ptr: *const i8) -> int8x16x2_t;
     }
 vld2q_dup_s8_(a as _)
@@ -9716,7 +9716,7 @@ vld2q_dup_s16_(a as *const i8, 2)
 pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v8i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v8i16.p0i16")]
         fn vld2q_dup_s16_(ptr: *const i16) -> int16x8x2_t;
     }
 vld2q_dup_s16_(a as _)
@@ -9750,7 +9750,7 @@ vld2q_dup_s32_(a as *const i8, 4)
 pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v4i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v4i32.p0i32")]
         fn vld2q_dup_s32_(ptr: *const i32) -> int32x4x2_t;
     }
 vld2q_dup_s32_(a as _)
@@ -9784,7 +9784,7 @@ vld2_dup_s64_(a as *const i8, 8)
 pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v1i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v1i64.p0i64")]
         fn vld2_dup_s64_(ptr: *const i64) -> int64x1x2_t;
     }
 vld2_dup_s64_(a as _)
@@ -9797,7 +9797,7 @@ vld2_dup_s64_(a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
@@ -9811,7 +9811,7 @@ pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
@@ -9825,7 +9825,7 @@ pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
@@ -9839,7 +9839,7 @@ pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
@@ -9853,7 +9853,7 @@ pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
@@ -9867,7 +9867,7 @@ pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
@@ -9881,7 +9881,7 @@ pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
@@ -9895,7 +9895,7 @@ pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
@@ -9909,7 +9909,7 @@ pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
@@ -9923,7 +9923,7 @@ pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
@@ -9937,7 +9937,7 @@ pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
@@ -9951,7 +9951,7 @@ pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t {
@@ -9986,7 +9986,7 @@ vld2_dup_f32_(a as *const i8, 4)
 pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v2f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v2f32.p0f32")]
         fn vld2_dup_f32_(ptr: *const f32) -> float32x2x2_t;
     }
 vld2_dup_f32_(a as _)
@@ -10020,7 +10020,7 @@ vld2q_dup_f32_(a as *const i8, 4)
 pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v4f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v4f32.p0f32")]
         fn vld2q_dup_f32_(ptr: *const f32) -> float32x4x2_t;
     }
 vld2q_dup_f32_(a as _)
@@ -10058,7 +10058,7 @@ pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8
     static_assert_uimm_bits!(LANE, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0i8")]
         fn vld2_lane_s8_(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t;
     }
 vld2_lane_s8_(b.0, b.1, LANE as i64, a as _)
@@ -10096,7 +10096,7 @@ pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> i
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0i8")]
         fn vld2_lane_s16_(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t;
     }
 vld2_lane_s16_(b.0, b.1, LANE as i64, a as _)
@@ -10134,7 +10134,7 @@ pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> i
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0i8")]
         fn vld2_lane_s32_(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t;
     }
 vld2_lane_s32_(b.0, b.1, LANE as i64, a as _)
@@ -10172,7 +10172,7 @@ pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) ->
     static_assert_uimm_bits!(LANE, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0i8")]
         fn vld2q_lane_s16_(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t;
     }
 vld2q_lane_s16_(b.0, b.1, LANE as i64, a as _)
@@ -10210,7 +10210,7 @@ pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) ->
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0i8")]
         fn vld2q_lane_s32_(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t;
     }
 vld2q_lane_s32_(b.0, b.1, LANE as i64, a as _)
@@ -10223,7 +10223,7 @@ vld2q_lane_s32_(b.0, b.1, LANE as i64, a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -10239,7 +10239,7 @@ pub unsafe fn vld2_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x2_t) -> uin
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -10255,7 +10255,7 @@ pub unsafe fn vld2_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x2_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -10271,7 +10271,7 @@ pub unsafe fn vld2_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x2_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -10287,7 +10287,7 @@ pub unsafe fn vld2q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x2_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -10303,7 +10303,7 @@ pub unsafe fn vld2q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x2_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -10319,7 +10319,7 @@ pub unsafe fn vld2_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x2_t) -> pol
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -10335,7 +10335,7 @@ pub unsafe fn vld2_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x2_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -10376,7 +10376,7 @@ pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) ->
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0i8")]
         fn vld2_lane_f32_(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t;
     }
 vld2_lane_f32_(b.0, b.1, LANE as i64, a as _)
@@ -10414,7 +10414,7 @@ pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0i8")]
         fn vld2q_lane_f32_(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8) -> float32x4x2_t;
     }
 vld2q_lane_f32_(b.0, b.1, LANE as i64, a as _)
@@ -10448,7 +10448,7 @@ vld3_s8_(a as *const i8, 1)
 pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v8i8.p0v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v8i8.p0v8i8")]
         fn vld3_s8_(ptr: *const int8x8_t) -> int8x8x3_t;
     }
 vld3_s8_(a as _)
@@ -10482,7 +10482,7 @@ vld3_s16_(a as *const i8, 2)
 pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v4i16.p0v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v4i16.p0v4i16")]
         fn vld3_s16_(ptr: *const int16x4_t) -> int16x4x3_t;
     }
 vld3_s16_(a as _)
@@ -10516,7 +10516,7 @@ vld3_s32_(a as *const i8, 4)
 pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v2i32.p0v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v2i32.p0v2i32")]
         fn vld3_s32_(ptr: *const int32x2_t) -> int32x2x3_t;
     }
 vld3_s32_(a as _)
@@ -10550,7 +10550,7 @@ vld3q_s8_(a as *const i8, 1)
 pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v16i8.p0v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v16i8.p0v16i8")]
         fn vld3q_s8_(ptr: *const int8x16_t) -> int8x16x3_t;
     }
 vld3q_s8_(a as _)
@@ -10584,7 +10584,7 @@ vld3q_s16_(a as *const i8, 2)
 pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v8i16.p0v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v8i16.p0v8i16")]
         fn vld3q_s16_(ptr: *const int16x8_t) -> int16x8x3_t;
     }
 vld3q_s16_(a as _)
@@ -10618,7 +10618,7 @@ vld3q_s32_(a as *const i8, 4)
 pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v4i32.p0v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v4i32.p0v4i32")]
         fn vld3q_s32_(ptr: *const int32x4_t) -> int32x4x3_t;
     }
 vld3q_s32_(a as _)
@@ -10652,7 +10652,7 @@ vld3_s64_(a as *const i8, 8)
 pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v1i64.p0v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v1i64.p0v1i64")]
         fn vld3_s64_(ptr: *const int64x1_t) -> int64x1x3_t;
     }
 vld3_s64_(a as _)
@@ -10665,7 +10665,7 @@ vld3_s64_(a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t {
@@ -10679,7 +10679,7 @@ pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t {
@@ -10693,7 +10693,7 @@ pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t {
@@ -10707,7 +10707,7 @@ pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t {
@@ -10721,7 +10721,7 @@ pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t {
@@ -10735,7 +10735,7 @@ pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t {
@@ -10749,7 +10749,7 @@ pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
@@ -10763,7 +10763,7 @@ pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t {
@@ -10777,7 +10777,7 @@ pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
@@ -10791,7 +10791,7 @@ pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t {
@@ -10805,7 +10805,7 @@ pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t {
@@ -10819,7 +10819,7 @@ pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t {
@@ -10854,7 +10854,7 @@ vld3_f32_(a as *const i8, 4)
 pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v2f32.p0v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v2f32.p0v2f32")]
         fn vld3_f32_(ptr: *const float32x2_t) -> float32x2x3_t;
     }
 vld3_f32_(a as _)
@@ -10888,7 +10888,7 @@ vld3q_f32_(a as *const i8, 4)
 pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v4f32.p0v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v4f32.p0v4f32")]
         fn vld3q_f32_(ptr: *const float32x4_t) -> float32x4x3_t;
     }
 vld3q_f32_(a as _)
@@ -10922,7 +10922,7 @@ vld3_dup_s8_(a as *const i8, 1)
 pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v8i8.p0i8")]
         fn vld3_dup_s8_(ptr: *const i8) -> int8x8x3_t;
     }
 vld3_dup_s8_(a as _)
@@ -10956,7 +10956,7 @@ vld3_dup_s16_(a as *const i8, 2)
 pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v4i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v4i16.p0i16")]
         fn vld3_dup_s16_(ptr: *const i16) -> int16x4x3_t;
     }
 vld3_dup_s16_(a as _)
@@ -10990,7 +10990,7 @@ vld3_dup_s32_(a as *const i8, 4)
 pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v2i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v2i32.p0i32")]
         fn vld3_dup_s32_(ptr: *const i32) -> int32x2x3_t;
     }
 vld3_dup_s32_(a as _)
@@ -11024,7 +11024,7 @@ vld3q_dup_s8_(a as *const i8, 1)
 pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v16i8.p0i8")]
         fn vld3q_dup_s8_(ptr: *const i8) -> int8x16x3_t;
     }
 vld3q_dup_s8_(a as _)
@@ -11058,7 +11058,7 @@ vld3q_dup_s16_(a as *const i8, 2)
 pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v8i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v8i16.p0i16")]
         fn vld3q_dup_s16_(ptr: *const i16) -> int16x8x3_t;
     }
 vld3q_dup_s16_(a as _)
@@ -11092,7 +11092,7 @@ vld3q_dup_s32_(a as *const i8, 4)
 pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v4i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v4i32.p0i32")]
         fn vld3q_dup_s32_(ptr: *const i32) -> int32x4x3_t;
     }
 vld3q_dup_s32_(a as _)
@@ -11126,7 +11126,7 @@ vld3_dup_s64_(a as *const i8, 8)
 pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v1i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v1i64.p0i64")]
         fn vld3_dup_s64_(ptr: *const i64) -> int64x1x3_t;
     }
 vld3_dup_s64_(a as _)
@@ -11139,7 +11139,7 @@ vld3_dup_s64_(a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
@@ -11153,7 +11153,7 @@ pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
@@ -11167,7 +11167,7 @@ pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
@@ -11181,7 +11181,7 @@ pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
@@ -11195,7 +11195,7 @@ pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
@@ -11209,7 +11209,7 @@ pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
@@ -11223,7 +11223,7 @@ pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
@@ -11237,7 +11237,7 @@ pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
@@ -11251,7 +11251,7 @@ pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
@@ -11265,7 +11265,7 @@ pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
@@ -11279,7 +11279,7 @@ pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t {
@@ -11293,7 +11293,7 @@ pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t {
@@ -11328,7 +11328,7 @@ vld3_dup_f32_(a as *const i8, 4)
 pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v2f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v2f32.p0f32")]
         fn vld3_dup_f32_(ptr: *const f32) -> float32x2x3_t;
     }
 vld3_dup_f32_(a as _)
@@ -11362,7 +11362,7 @@ vld3q_dup_f32_(a as *const i8, 4)
 pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v4f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v4f32.p0f32")]
         fn vld3q_dup_f32_(ptr: *const f32) -> float32x4x3_t;
     }
 vld3q_dup_f32_(a as _)
@@ -11400,7 +11400,7 @@ pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8
     static_assert_uimm_bits!(LANE, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0i8")]
         fn vld3_lane_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *const i8) -> int8x8x3_t;
     }
 vld3_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -11438,7 +11438,7 @@ pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> i
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0i8")]
         fn vld3_lane_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *const i8) -> int16x4x3_t;
     }
 vld3_lane_s16_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -11476,7 +11476,7 @@ pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> i
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0i8")]
         fn vld3_lane_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *const i8) -> int32x2x3_t;
     }
 vld3_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -11514,7 +11514,7 @@ pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) ->
     static_assert_uimm_bits!(LANE, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0i8")]
         fn vld3q_lane_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *const i8) -> int16x8x3_t;
     }
 vld3q_lane_s16_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -11552,7 +11552,7 @@ pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) ->
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0i8")]
         fn vld3q_lane_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *const i8) -> int32x4x3_t;
     }
 vld3q_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -11565,7 +11565,7 @@ vld3q_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -11581,7 +11581,7 @@ pub unsafe fn vld3_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x3_t) -> uin
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -11597,7 +11597,7 @@ pub unsafe fn vld3_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x3_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -11613,7 +11613,7 @@ pub unsafe fn vld3_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x3_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -11629,7 +11629,7 @@ pub unsafe fn vld3q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x3_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -11645,7 +11645,7 @@ pub unsafe fn vld3q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x3_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -11661,7 +11661,7 @@ pub unsafe fn vld3_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x3_t) -> pol
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -11677,7 +11677,7 @@ pub unsafe fn vld3_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x3_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -11718,7 +11718,7 @@ pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) ->
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0i8")]
         fn vld3_lane_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *const i8) -> float32x2x3_t;
     }
 vld3_lane_f32_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -11756,7 +11756,7 @@ pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0i8")]
         fn vld3q_lane_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *const i8) -> float32x4x3_t;
     }
 vld3q_lane_f32_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -11790,7 +11790,7 @@ vld4_s8_(a as *const i8, 1)
 pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v8i8.p0v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v8i8.p0v8i8")]
         fn vld4_s8_(ptr: *const int8x8_t) -> int8x8x4_t;
     }
 vld4_s8_(a as _)
@@ -11824,7 +11824,7 @@ vld4_s16_(a as *const i8, 2)
 pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v4i16.p0v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v4i16.p0v4i16")]
         fn vld4_s16_(ptr: *const int16x4_t) -> int16x4x4_t;
     }
 vld4_s16_(a as _)
@@ -11858,7 +11858,7 @@ vld4_s32_(a as *const i8, 4)
 pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v2i32.p0v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v2i32.p0v2i32")]
         fn vld4_s32_(ptr: *const int32x2_t) -> int32x2x4_t;
     }
 vld4_s32_(a as _)
@@ -11892,7 +11892,7 @@ vld4q_s8_(a as *const i8, 1)
 pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v16i8.p0v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v16i8.p0v16i8")]
         fn vld4q_s8_(ptr: *const int8x16_t) -> int8x16x4_t;
     }
 vld4q_s8_(a as _)
@@ -11926,7 +11926,7 @@ vld4q_s16_(a as *const i8, 2)
 pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v8i16.p0v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v8i16.p0v8i16")]
         fn vld4q_s16_(ptr: *const int16x8_t) -> int16x8x4_t;
     }
 vld4q_s16_(a as _)
@@ -11960,7 +11960,7 @@ vld4q_s32_(a as *const i8, 4)
 pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v4i32.p0v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v4i32.p0v4i32")]
         fn vld4q_s32_(ptr: *const int32x4_t) -> int32x4x4_t;
     }
 vld4q_s32_(a as _)
@@ -11994,7 +11994,7 @@ vld4_s64_(a as *const i8, 8)
 pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v1i64.p0v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v1i64.p0v1i64")]
         fn vld4_s64_(ptr: *const int64x1_t) -> int64x1x4_t;
     }
 vld4_s64_(a as _)
@@ -12007,7 +12007,7 @@ vld4_s64_(a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
@@ -12021,7 +12021,7 @@ pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
@@ -12035,7 +12035,7 @@ pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
@@ -12049,7 +12049,7 @@ pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t {
@@ -12063,7 +12063,7 @@ pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
@@ -12077,7 +12077,7 @@ pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
@@ -12091,7 +12091,7 @@ pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
@@ -12105,7 +12105,7 @@ pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
@@ -12119,7 +12119,7 @@ pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
@@ -12133,7 +12133,7 @@ pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
@@ -12147,7 +12147,7 @@ pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t {
@@ -12161,7 +12161,7 @@ pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t {
@@ -12196,7 +12196,7 @@ vld4_f32_(a as *const i8, 4)
 pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v2f32.p0v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v2f32.p0v2f32")]
         fn vld4_f32_(ptr: *const float32x2_t) -> float32x2x4_t;
     }
 vld4_f32_(a as _)
@@ -12230,7 +12230,7 @@ vld4q_f32_(a as *const i8, 4)
 pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v4f32.p0v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v4f32.p0v4f32")]
         fn vld4q_f32_(ptr: *const float32x4_t) -> float32x4x4_t;
     }
 vld4q_f32_(a as _)
@@ -12264,7 +12264,7 @@ vld4_dup_s8_(a as *const i8, 1)
 pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v8i8.p0i8")]
         fn vld4_dup_s8_(ptr: *const i8) -> int8x8x4_t;
     }
 vld4_dup_s8_(a as _)
@@ -12298,7 +12298,7 @@ vld4_dup_s16_(a as *const i8, 2)
 pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v4i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v4i16.p0i16")]
         fn vld4_dup_s16_(ptr: *const i16) -> int16x4x4_t;
     }
 vld4_dup_s16_(a as _)
@@ -12332,7 +12332,7 @@ vld4_dup_s32_(a as *const i8, 4)
 pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v2i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v2i32.p0i32")]
         fn vld4_dup_s32_(ptr: *const i32) -> int32x2x4_t;
     }
 vld4_dup_s32_(a as _)
@@ -12366,7 +12366,7 @@ vld4q_dup_s8_(a as *const i8, 1)
 pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v16i8.p0i8")]
         fn vld4q_dup_s8_(ptr: *const i8) -> int8x16x4_t;
     }
 vld4q_dup_s8_(a as _)
@@ -12400,7 +12400,7 @@ vld4q_dup_s16_(a as *const i8, 2)
 pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v8i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v8i16.p0i16")]
         fn vld4q_dup_s16_(ptr: *const i16) -> int16x8x4_t;
     }
 vld4q_dup_s16_(a as _)
@@ -12434,7 +12434,7 @@ vld4q_dup_s32_(a as *const i8, 4)
 pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v4i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v4i32.p0i32")]
         fn vld4q_dup_s32_(ptr: *const i32) -> int32x4x4_t;
     }
 vld4q_dup_s32_(a as _)
@@ -12468,7 +12468,7 @@ vld4_dup_s64_(a as *const i8, 8)
 pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v1i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v1i64.p0i64")]
         fn vld4_dup_s64_(ptr: *const i64) -> int64x1x4_t;
     }
 vld4_dup_s64_(a as _)
@@ -12481,7 +12481,7 @@ vld4_dup_s64_(a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
@@ -12495,7 +12495,7 @@ pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
@@ -12509,7 +12509,7 @@ pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
@@ -12523,7 +12523,7 @@ pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
@@ -12537,7 +12537,7 @@ pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
@@ -12551,7 +12551,7 @@ pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
@@ -12565,7 +12565,7 @@ pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
@@ -12579,7 +12579,7 @@ pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
@@ -12593,7 +12593,7 @@ pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
@@ -12607,7 +12607,7 @@ pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
@@ -12621,7 +12621,7 @@ pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t {
@@ -12635,7 +12635,7 @@ pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t {
@@ -12670,7 +12670,7 @@ vld4_dup_f32_(a as *const i8, 4)
 pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v2f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v2f32.p0f32")]
         fn vld4_dup_f32_(ptr: *const f32) -> float32x2x4_t;
     }
 vld4_dup_f32_(a as _)
@@ -12704,7 +12704,7 @@ vld4q_dup_f32_(a as *const i8, 4)
 pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v4f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v4f32.p0f32")]
         fn vld4q_dup_f32_(ptr: *const f32) -> float32x4x4_t;
     }
 vld4q_dup_f32_(a as _)
@@ -12742,7 +12742,7 @@ pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8
     static_assert_uimm_bits!(LANE, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0i8")]
         fn vld4_lane_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *const i8) -> int8x8x4_t;
     }
 vld4_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -12780,7 +12780,7 @@ pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> i
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0i8")]
         fn vld4_lane_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, n: i64, ptr: *const i8) -> int16x4x4_t;
     }
 vld4_lane_s16_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -12818,7 +12818,7 @@ pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> i
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0i8")]
         fn vld4_lane_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, n: i64, ptr: *const i8) -> int32x2x4_t;
     }
 vld4_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -12856,7 +12856,7 @@ pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) ->
     static_assert_uimm_bits!(LANE, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0i8")]
         fn vld4q_lane_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, n: i64, ptr: *const i8) -> int16x8x4_t;
     }
 vld4q_lane_s16_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -12894,7 +12894,7 @@ pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) ->
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0i8")]
         fn vld4q_lane_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, n: i64, ptr: *const i8) -> int32x4x4_t;
     }
 vld4q_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -12907,7 +12907,7 @@ vld4q_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -12923,7 +12923,7 @@ pub unsafe fn vld4_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x4_t) -> uin
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -12939,7 +12939,7 @@ pub unsafe fn vld4_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x4_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -12955,7 +12955,7 @@ pub unsafe fn vld4_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x4_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -12971,7 +12971,7 @@ pub unsafe fn vld4q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x4_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -12987,7 +12987,7 @@ pub unsafe fn vld4q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x4_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13003,7 +13003,7 @@ pub unsafe fn vld4_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x4_t) -> pol
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13019,7 +13019,7 @@ pub unsafe fn vld4_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x4_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13060,7 +13060,7 @@ pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) ->
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0i8")]
         fn vld4_lane_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, n: i64, ptr: *const i8) -> float32x2x4_t;
     }
 vld4_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -13098,7 +13098,7 @@ pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0i8")]
         fn vld4q_lane_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, n: i64, ptr: *const i8) -> float32x4x4_t;
     }
 vld4q_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -13111,7 +13111,7 @@ vld4q_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13127,7 +13127,7 @@ pub unsafe fn vst1_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13143,7 +13143,7 @@ pub unsafe fn vst1_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13159,7 +13159,7 @@ pub unsafe fn vst1_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13175,7 +13175,7 @@ pub unsafe fn vst1_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13191,7 +13191,7 @@ pub unsafe fn vst1q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13207,7 +13207,7 @@ pub unsafe fn vst1q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13223,7 +13223,7 @@ pub unsafe fn vst1q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13239,7 +13239,7 @@ pub unsafe fn vst1q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13255,7 +13255,7 @@ pub unsafe fn vst1_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13271,7 +13271,7 @@ pub unsafe fn vst1_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13287,7 +13287,7 @@ pub unsafe fn vst1_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13303,7 +13303,7 @@ pub unsafe fn vst1_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13319,7 +13319,7 @@ pub unsafe fn vst1q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13335,7 +13335,7 @@ pub unsafe fn vst1q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13351,7 +13351,7 @@ pub unsafe fn vst1q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13367,7 +13367,7 @@ pub unsafe fn vst1q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13383,7 +13383,7 @@ pub unsafe fn vst1_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13399,7 +13399,7 @@ pub unsafe fn vst1_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13415,7 +13415,7 @@ pub unsafe fn vst1q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13431,7 +13431,7 @@ pub unsafe fn vst1q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8_t) {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13447,7 +13447,7 @@ pub unsafe fn vst1_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1_t) {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13463,7 +13463,7 @@ pub unsafe fn vst1q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13479,7 +13479,7 @@ pub unsafe fn vst1_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -13516,7 +13516,7 @@ vst1_s8_x2_(a, b.0, b.1)
 pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v8i8.p0i8")]
         fn vst1_s8_x2_(a: int8x8_t, b: int8x8_t, ptr: *mut i8);
     }
 vst1_s8_x2_(b.0, b.1, a)
@@ -13550,7 +13550,7 @@ vst1_s16_x2_(a, b.0, b.1)
 pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v4i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v4i16.p0i16")]
         fn vst1_s16_x2_(a: int16x4_t, b: int16x4_t, ptr: *mut i16);
     }
 vst1_s16_x2_(b.0, b.1, a)
@@ -13584,7 +13584,7 @@ vst1_s32_x2_(a, b.0, b.1)
 pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v2i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v2i32.p0i32")]
         fn vst1_s32_x2_(a: int32x2_t, b: int32x2_t, ptr: *mut i32);
     }
 vst1_s32_x2_(b.0, b.1, a)
@@ -13618,7 +13618,7 @@ vst1_s64_x2_(a, b.0, b.1)
 pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v1i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v1i64.p0i64")]
         fn vst1_s64_x2_(a: int64x1_t, b: int64x1_t, ptr: *mut i64);
     }
 vst1_s64_x2_(b.0, b.1, a)
@@ -13652,7 +13652,7 @@ vst1q_s8_x2_(a, b.0, b.1)
 pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v16i8.p0i8")]
         fn vst1q_s8_x2_(a: int8x16_t, b: int8x16_t, ptr: *mut i8);
     }
 vst1q_s8_x2_(b.0, b.1, a)
@@ -13686,7 +13686,7 @@ vst1q_s16_x2_(a, b.0, b.1)
 pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v8i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v8i16.p0i16")]
         fn vst1q_s16_x2_(a: int16x8_t, b: int16x8_t, ptr: *mut i16);
     }
 vst1q_s16_x2_(b.0, b.1, a)
@@ -13720,7 +13720,7 @@ vst1q_s32_x2_(a, b.0, b.1)
 pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v4i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v4i32.p0i32")]
         fn vst1q_s32_x2_(a: int32x4_t, b: int32x4_t, ptr: *mut i32);
     }
 vst1q_s32_x2_(b.0, b.1, a)
@@ -13754,7 +13754,7 @@ vst1q_s64_x2_(a, b.0, b.1)
 pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v2i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v2i64.p0i64")]
         fn vst1q_s64_x2_(a: int64x2_t, b: int64x2_t, ptr: *mut i64);
     }
 vst1q_s64_x2_(b.0, b.1, a)
@@ -13788,7 +13788,7 @@ vst1_s8_x3_(a, b.0, b.1, b.2)
 pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v8i8.p0i8")]
         fn vst1_s8_x3_(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8);
     }
 vst1_s8_x3_(b.0, b.1, b.2, a)
@@ -13822,7 +13822,7 @@ vst1_s16_x3_(a, b.0, b.1, b.2)
 pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v4i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v4i16.p0i16")]
         fn vst1_s16_x3_(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i16);
     }
 vst1_s16_x3_(b.0, b.1, b.2, a)
@@ -13856,7 +13856,7 @@ vst1_s32_x3_(a, b.0, b.1, b.2)
 pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v2i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v2i32.p0i32")]
         fn vst1_s32_x3_(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i32);
     }
 vst1_s32_x3_(b.0, b.1, b.2, a)
@@ -13890,7 +13890,7 @@ vst1_s64_x3_(a, b.0, b.1, b.2)
 pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v1i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v1i64.p0i64")]
         fn vst1_s64_x3_(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i64);
     }
 vst1_s64_x3_(b.0, b.1, b.2, a)
@@ -13924,7 +13924,7 @@ vst1q_s8_x3_(a, b.0, b.1, b.2)
 pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v16i8.p0i8")]
         fn vst1q_s8_x3_(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8);
     }
 vst1q_s8_x3_(b.0, b.1, b.2, a)
@@ -13958,7 +13958,7 @@ vst1q_s16_x3_(a, b.0, b.1, b.2)
 pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v8i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v8i16.p0i16")]
         fn vst1q_s16_x3_(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i16);
     }
 vst1q_s16_x3_(b.0, b.1, b.2, a)
@@ -13992,7 +13992,7 @@ vst1q_s32_x3_(a, b.0, b.1, b.2)
 pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v4i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v4i32.p0i32")]
         fn vst1q_s32_x3_(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i32);
     }
 vst1q_s32_x3_(b.0, b.1, b.2, a)
@@ -14026,7 +14026,7 @@ vst1q_s64_x3_(a, b.0, b.1, b.2)
 pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v2i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v2i64.p0i64")]
         fn vst1q_s64_x3_(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i64);
     }
 vst1q_s64_x3_(b.0, b.1, b.2, a)
@@ -14060,7 +14060,7 @@ vst1_s8_x4_(a, b.0, b.1, b.2, b.3)
 pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v8i8.p0i8")]
         fn vst1_s8_x4_(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8);
     }
 vst1_s8_x4_(b.0, b.1, b.2, b.3, a)
@@ -14094,7 +14094,7 @@ vst1_s16_x4_(a, b.0, b.1, b.2, b.3)
 pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v4i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v4i16.p0i16")]
         fn vst1_s16_x4_(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i16);
     }
 vst1_s16_x4_(b.0, b.1, b.2, b.3, a)
@@ -14128,7 +14128,7 @@ vst1_s32_x4_(a, b.0, b.1, b.2, b.3)
 pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v2i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v2i32.p0i32")]
         fn vst1_s32_x4_(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i32);
     }
 vst1_s32_x4_(b.0, b.1, b.2, b.3, a)
@@ -14162,7 +14162,7 @@ vst1_s64_x4_(a, b.0, b.1, b.2, b.3)
 pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v1i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v1i64.p0i64")]
         fn vst1_s64_x4_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i64);
     }
 vst1_s64_x4_(b.0, b.1, b.2, b.3, a)
@@ -14196,7 +14196,7 @@ vst1q_s8_x4_(a, b.0, b.1, b.2, b.3)
 pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v16i8.p0i8")]
         fn vst1q_s8_x4_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8);
     }
 vst1q_s8_x4_(b.0, b.1, b.2, b.3, a)
@@ -14230,7 +14230,7 @@ vst1q_s16_x4_(a, b.0, b.1, b.2, b.3)
 pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v8i16.p0i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v8i16.p0i16")]
         fn vst1q_s16_x4_(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i16);
     }
 vst1q_s16_x4_(b.0, b.1, b.2, b.3, a)
@@ -14264,7 +14264,7 @@ vst1q_s32_x4_(a, b.0, b.1, b.2, b.3)
 pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v4i32.p0i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v4i32.p0i32")]
         fn vst1q_s32_x4_(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i32);
     }
 vst1q_s32_x4_(b.0, b.1, b.2, b.3, a)
@@ -14298,7 +14298,7 @@ vst1q_s64_x4_(a, b.0, b.1, b.2, b.3)
 pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v2i64.p0i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v2i64.p0i64")]
         fn vst1q_s64_x4_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i64);
     }
 vst1q_s64_x4_(b.0, b.1, b.2, b.3, a)
@@ -14311,7 +14311,7 @@ vst1q_s64_x4_(b.0, b.1, b.2, b.3, a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) {
@@ -14325,7 +14325,7 @@ pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) {
@@ -14339,7 +14339,7 @@ pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) {
@@ -14353,7 +14353,7 @@ pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) {
@@ -14367,7 +14367,7 @@ pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) {
@@ -14381,7 +14381,7 @@ pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) {
@@ -14395,7 +14395,7 @@ pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) {
@@ -14409,7 +14409,7 @@ pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) {
@@ -14423,7 +14423,7 @@ pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) {
@@ -14437,7 +14437,7 @@ pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) {
@@ -14451,7 +14451,7 @@ pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) {
@@ -14465,7 +14465,7 @@ pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) {
@@ -14479,7 +14479,7 @@ pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) {
@@ -14493,7 +14493,7 @@ pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) {
@@ -14507,7 +14507,7 @@ pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) {
@@ -14521,7 +14521,7 @@ pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) {
@@ -14535,7 +14535,7 @@ pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) {
@@ -14549,7 +14549,7 @@ pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) {
@@ -14563,7 +14563,7 @@ pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) {
@@ -14577,7 +14577,7 @@ pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) {
@@ -14591,7 +14591,7 @@ pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) {
@@ -14605,7 +14605,7 @@ pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) {
@@ -14619,7 +14619,7 @@ pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) {
@@ -14633,7 +14633,7 @@ pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) {
@@ -14647,7 +14647,7 @@ pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) {
@@ -14661,7 +14661,7 @@ pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) {
@@ -14675,7 +14675,7 @@ pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) {
@@ -14689,7 +14689,7 @@ pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) {
@@ -14703,7 +14703,7 @@ pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) {
@@ -14717,7 +14717,7 @@ pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) {
@@ -14731,7 +14731,7 @@ pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) {
@@ -14745,7 +14745,7 @@ pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) {
@@ -14759,7 +14759,7 @@ pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) {
@@ -14773,7 +14773,7 @@ pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) {
@@ -14787,7 +14787,7 @@ pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) {
@@ -14801,7 +14801,7 @@ pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) {
@@ -14815,7 +14815,7 @@ pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) {
@@ -14829,7 +14829,7 @@ pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) {
@@ -14843,7 +14843,7 @@ pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) {
@@ -14857,7 +14857,7 @@ pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) {
@@ -14871,7 +14871,7 @@ pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) {
@@ -14885,7 +14885,7 @@ pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) {
@@ -14920,7 +14920,7 @@ vst1_f32_x2_(a, b.0, b.1)
 pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v2f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v2f32.p0f32")]
         fn vst1_f32_x2_(a: float32x2_t, b: float32x2_t, ptr: *mut f32);
     }
 vst1_f32_x2_(b.0, b.1, a)
@@ -14954,7 +14954,7 @@ vst1q_f32_x2_(a, b.0, b.1)
 pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v4f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v4f32.p0f32")]
         fn vst1q_f32_x2_(a: float32x4_t, b: float32x4_t, ptr: *mut f32);
     }
 vst1q_f32_x2_(b.0, b.1, a)
@@ -14988,7 +14988,7 @@ vst1_f32_x3_(a, b.0, b.1, b.2)
 pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v2f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v2f32.p0f32")]
         fn vst1_f32_x3_(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut f32);
     }
 vst1_f32_x3_(b.0, b.1, b.2, a)
@@ -15022,7 +15022,7 @@ vst1q_f32_x3_(a, b.0, b.1, b.2)
 pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v4f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v4f32.p0f32")]
         fn vst1q_f32_x3_(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut f32);
     }
 vst1q_f32_x3_(b.0, b.1, b.2, a)
@@ -15056,7 +15056,7 @@ vst1_f32_x4_(a, b.0, b.1, b.2, b.3)
 pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v2f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v2f32.p0f32")]
         fn vst1_f32_x4_(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut f32);
     }
 vst1_f32_x4_(b.0, b.1, b.2, b.3, a)
@@ -15090,7 +15090,7 @@ vst1q_f32_x4_(a, b.0, b.1, b.2, b.3)
 pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v4f32.p0f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v4f32.p0f32")]
         fn vst1q_f32_x4_(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut f32);
     }
 vst1q_f32_x4_(b.0, b.1, b.2, b.3, a)
@@ -15124,7 +15124,7 @@ vst2_s8_(a as _, b.0, b.1, 1)
 pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v8i8.p0i8")]
         fn vst2_s8_(a: int8x8_t, b: int8x8_t, ptr: *mut i8);
     }
 vst2_s8_(b.0, b.1, a as _)
@@ -15158,7 +15158,7 @@ vst2_s16_(a as _, b.0, b.1, 2)
 pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v4i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v4i16.p0i8")]
         fn vst2_s16_(a: int16x4_t, b: int16x4_t, ptr: *mut i8);
     }
 vst2_s16_(b.0, b.1, a as _)
@@ -15192,7 +15192,7 @@ vst2_s32_(a as _, b.0, b.1, 4)
 pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v2i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v2i32.p0i8")]
         fn vst2_s32_(a: int32x2_t, b: int32x2_t, ptr: *mut i8);
     }
 vst2_s32_(b.0, b.1, a as _)
@@ -15226,7 +15226,7 @@ vst2q_s8_(a as _, b.0, b.1, 1)
 pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v16i8.p0i8")]
         fn vst2q_s8_(a: int8x16_t, b: int8x16_t, ptr: *mut i8);
     }
 vst2q_s8_(b.0, b.1, a as _)
@@ -15260,7 +15260,7 @@ vst2q_s16_(a as _, b.0, b.1, 2)
 pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v8i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v8i16.p0i8")]
         fn vst2q_s16_(a: int16x8_t, b: int16x8_t, ptr: *mut i8);
     }
 vst2q_s16_(b.0, b.1, a as _)
@@ -15294,7 +15294,7 @@ vst2q_s32_(a as _, b.0, b.1, 4)
 pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v4i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v4i32.p0i8")]
         fn vst2q_s32_(a: int32x4_t, b: int32x4_t, ptr: *mut i8);
     }
 vst2q_s32_(b.0, b.1, a as _)
@@ -15328,7 +15328,7 @@ vst2_s64_(a as _, b.0, b.1, 8)
 pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v1i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v1i64.p0i8")]
         fn vst2_s64_(a: int64x1_t, b: int64x1_t, ptr: *mut i8);
     }
 vst2_s64_(b.0, b.1, a as _)
@@ -15341,7 +15341,7 @@ vst2_s64_(b.0, b.1, a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t) {
@@ -15355,7 +15355,7 @@ pub unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t) {
@@ -15369,7 +15369,7 @@ pub unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t) {
@@ -15383,7 +15383,7 @@ pub unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t) {
@@ -15397,7 +15397,7 @@ pub unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t) {
@@ -15411,7 +15411,7 @@ pub unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t) {
@@ -15425,7 +15425,7 @@ pub unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t) {
@@ -15439,7 +15439,7 @@ pub unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t) {
@@ -15453,7 +15453,7 @@ pub unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t) {
@@ -15467,7 +15467,7 @@ pub unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) {
@@ -15481,7 +15481,7 @@ pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t) {
@@ -15495,7 +15495,7 @@ pub unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t) {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) {
@@ -15530,7 +15530,7 @@ vst2_f32_(a as _, b.0, b.1, 4)
 pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v2f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v2f32.p0i8")]
         fn vst2_f32_(a: float32x2_t, b: float32x2_t, ptr: *mut i8);
     }
 vst2_f32_(b.0, b.1, a as _)
@@ -15564,7 +15564,7 @@ vst2q_f32_(a as _, b.0, b.1, 4)
 pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v4f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v4f32.p0i8")]
         fn vst2q_f32_(a: float32x4_t, b: float32x4_t, ptr: *mut i8);
     }
 vst2q_f32_(b.0, b.1, a as _)
@@ -15602,7 +15602,7 @@ pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
     static_assert_uimm_bits!(LANE, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v8i8.p0i8")]
         fn vst2_lane_s8_(a: int8x8_t, b: int8x8_t, n: i64, ptr: *mut i8);
     }
 vst2_lane_s8_(b.0, b.1, LANE as i64, a as _)
@@ -15640,7 +15640,7 @@ pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v4i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v4i16.p0i8")]
         fn vst2_lane_s16_(a: int16x4_t, b: int16x4_t, n: i64, ptr: *mut i8);
     }
 vst2_lane_s16_(b.0, b.1, LANE as i64, a as _)
@@ -15678,7 +15678,7 @@ pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v2i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v2i32.p0i8")]
         fn vst2_lane_s32_(a: int32x2_t, b: int32x2_t, n: i64, ptr: *mut i8);
     }
 vst2_lane_s32_(b.0, b.1, LANE as i64, a as _)
@@ -15716,7 +15716,7 @@ pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
     static_assert_uimm_bits!(LANE, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v8i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v8i16.p0i8")]
         fn vst2q_lane_s16_(a: int16x8_t, b: int16x8_t, n: i64, ptr: *mut i8);
     }
 vst2q_lane_s16_(b.0, b.1, LANE as i64, a as _)
@@ -15754,7 +15754,7 @@ pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v4i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v4i32.p0i8")]
         fn vst2q_lane_s32_(a: int32x4_t, b: int32x4_t, n: i64, ptr: *mut i8);
     }
 vst2q_lane_s32_(b.0, b.1, LANE as i64, a as _)
@@ -15767,7 +15767,7 @@ vst2q_lane_s32_(b.0, b.1, LANE as i64, a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -15783,7 +15783,7 @@ pub unsafe fn vst2_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -15799,7 +15799,7 @@ pub unsafe fn vst2_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -15815,7 +15815,7 @@ pub unsafe fn vst2_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -15831,7 +15831,7 @@ pub unsafe fn vst2q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -15847,7 +15847,7 @@ pub unsafe fn vst2q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -15863,7 +15863,7 @@ pub unsafe fn vst2_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -15879,7 +15879,7 @@ pub unsafe fn vst2_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x2_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -15920,7 +15920,7 @@ pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v2f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v2f32.p0i8")]
         fn vst2_lane_f32_(a: float32x2_t, b: float32x2_t, n: i64, ptr: *mut i8);
     }
 vst2_lane_f32_(b.0, b.1, LANE as i64, a as _)
@@ -15958,7 +15958,7 @@ pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v4f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v4f32.p0i8")]
         fn vst2q_lane_f32_(a: float32x4_t, b: float32x4_t, n: i64, ptr: *mut i8);
     }
 vst2q_lane_f32_(b.0, b.1, LANE as i64, a as _)
@@ -15992,7 +15992,7 @@ vst3_s8_(a as _, b.0, b.1, b.2, 1)
 pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v8i8.p0i8")]
         fn vst3_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8);
     }
 vst3_s8_(b.0, b.1, b.2, a as _)
@@ -16026,7 +16026,7 @@ vst3_s16_(a as _, b.0, b.1, b.2, 2)
 pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v4i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v4i16.p0i8")]
         fn vst3_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i8);
     }
 vst3_s16_(b.0, b.1, b.2, a as _)
@@ -16060,7 +16060,7 @@ vst3_s32_(a as _, b.0, b.1, b.2, 4)
 pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v2i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v2i32.p0i8")]
         fn vst3_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i8);
     }
 vst3_s32_(b.0, b.1, b.2, a as _)
@@ -16094,7 +16094,7 @@ vst3q_s8_(a as _, b.0, b.1, b.2, 1)
 pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v16i8.p0i8")]
         fn vst3q_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8);
     }
 vst3q_s8_(b.0, b.1, b.2, a as _)
@@ -16128,7 +16128,7 @@ vst3q_s16_(a as _, b.0, b.1, b.2, 2)
 pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v8i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v8i16.p0i8")]
         fn vst3q_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i8);
     }
 vst3q_s16_(b.0, b.1, b.2, a as _)
@@ -16162,7 +16162,7 @@ vst3q_s32_(a as _, b.0, b.1, b.2, 4)
 pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v4i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v4i32.p0i8")]
         fn vst3q_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i8);
     }
 vst3q_s32_(b.0, b.1, b.2, a as _)
@@ -16196,7 +16196,7 @@ vst3_s64_(a as _, b.0, b.1, b.2, 8)
 pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v1i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v1i64.p0i8")]
         fn vst3_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i8);
     }
 vst3_s64_(b.0, b.1, b.2, a as _)
@@ -16209,7 +16209,7 @@ vst3_s64_(b.0, b.1, b.2, a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) {
@@ -16223,7 +16223,7 @@ pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) {
@@ -16237,7 +16237,7 @@ pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) {
@@ -16251,7 +16251,7 @@ pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) {
@@ -16265,7 +16265,7 @@ pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) {
@@ -16279,7 +16279,7 @@ pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) {
@@ -16293,7 +16293,7 @@ pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) {
@@ -16307,7 +16307,7 @@ pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t) {
@@ -16321,7 +16321,7 @@ pub unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t) {
@@ -16335,7 +16335,7 @@ pub unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) {
@@ -16349,7 +16349,7 @@ pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) {
@@ -16363,7 +16363,7 @@ pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) {
@@ -16398,7 +16398,7 @@ vst3_f32_(a as _, b.0, b.1, b.2, 4)
 pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v2f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v2f32.p0i8")]
         fn vst3_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut i8);
     }
 vst3_f32_(b.0, b.1, b.2, a as _)
@@ -16432,7 +16432,7 @@ vst3q_f32_(a as _, b.0, b.1, b.2, 4)
 pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v4f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v4f32.p0i8")]
         fn vst3q_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut i8);
     }
 vst3q_f32_(b.0, b.1, b.2, a as _)
@@ -16470,7 +16470,7 @@ pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
     static_assert_uimm_bits!(LANE, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v8i8.p0i8")]
         fn vst3_lane_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *mut i8);
     }
 vst3_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -16508,7 +16508,7 @@ pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v4i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v4i16.p0i8")]
         fn vst3_lane_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *mut i8);
     }
 vst3_lane_s16_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -16546,7 +16546,7 @@ pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v2i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v2i32.p0i8")]
         fn vst3_lane_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *mut i8);
     }
 vst3_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -16584,7 +16584,7 @@ pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
     static_assert_uimm_bits!(LANE, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v8i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v8i16.p0i8")]
         fn vst3q_lane_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *mut i8);
     }
 vst3q_lane_s16_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -16622,7 +16622,7 @@ pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v4i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v4i32.p0i8")]
         fn vst3q_lane_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *mut i8);
     }
 vst3q_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -16635,7 +16635,7 @@ vst3q_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -16651,7 +16651,7 @@ pub unsafe fn vst3_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -16667,7 +16667,7 @@ pub unsafe fn vst3_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -16683,7 +16683,7 @@ pub unsafe fn vst3_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -16699,7 +16699,7 @@ pub unsafe fn vst3q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -16715,7 +16715,7 @@ pub unsafe fn vst3q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -16731,7 +16731,7 @@ pub unsafe fn vst3_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -16747,7 +16747,7 @@ pub unsafe fn vst3_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x3_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -16788,7 +16788,7 @@ pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v2f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v2f32.p0i8")]
         fn vst3_lane_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *mut i8);
     }
 vst3_lane_f32_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -16826,7 +16826,7 @@ pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v4f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v4f32.p0i8")]
         fn vst3q_lane_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *mut i8);
     }
 vst3q_lane_f32_(b.0, b.1, b.2, LANE as i64, a as _)
@@ -16860,7 +16860,7 @@ vst4_s8_(a as _, b.0, b.1, b.2, b.3, 1)
 pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v8i8.p0i8")]
         fn vst4_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8);
     }
 vst4_s8_(b.0, b.1, b.2, b.3, a as _)
@@ -16894,7 +16894,7 @@ vst4_s16_(a as _, b.0, b.1, b.2, b.3, 2)
 pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v4i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v4i16.p0i8")]
         fn vst4_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i8);
     }
 vst4_s16_(b.0, b.1, b.2, b.3, a as _)
@@ -16928,7 +16928,7 @@ vst4_s32_(a as _, b.0, b.1, b.2, b.3, 4)
 pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v2i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v2i32.p0i8")]
         fn vst4_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i8);
     }
 vst4_s32_(b.0, b.1, b.2, b.3, a as _)
@@ -16962,7 +16962,7 @@ vst4q_s8_(a as _, b.0, b.1, b.2, b.3, 1)
 pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v16i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v16i8.p0i8")]
         fn vst4q_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8);
     }
 vst4q_s8_(b.0, b.1, b.2, b.3, a as _)
@@ -16996,7 +16996,7 @@ vst4q_s16_(a as _, b.0, b.1, b.2, b.3, 2)
 pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v8i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v8i16.p0i8")]
         fn vst4q_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i8);
     }
 vst4q_s16_(b.0, b.1, b.2, b.3, a as _)
@@ -17030,7 +17030,7 @@ vst4q_s32_(a as _, b.0, b.1, b.2, b.3, 4)
 pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v4i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v4i32.p0i8")]
         fn vst4q_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i8);
     }
 vst4q_s32_(b.0, b.1, b.2, b.3, a as _)
@@ -17064,7 +17064,7 @@ vst4_s64_(a as _, b.0, b.1, b.2, b.3, 8)
 pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v1i64.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v1i64.p0i8")]
         fn vst4_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i8);
     }
 vst4_s64_(b.0, b.1, b.2, b.3, a as _)
@@ -17077,7 +17077,7 @@ vst4_s64_(b.0, b.1, b.2, b.3, a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t) {
@@ -17091,7 +17091,7 @@ pub unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) {
@@ -17105,7 +17105,7 @@ pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) {
@@ -17119,7 +17119,7 @@ pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t) {
@@ -17133,7 +17133,7 @@ pub unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) {
@@ -17147,7 +17147,7 @@ pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) {
@@ -17161,7 +17161,7 @@ pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) {
@@ -17175,7 +17175,7 @@ pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) {
@@ -17189,7 +17189,7 @@ pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) {
@@ -17203,7 +17203,7 @@ pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) {
@@ -17217,7 +17217,7 @@ pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t) {
@@ -17231,7 +17231,7 @@ pub unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t) {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) {
@@ -17266,7 +17266,7 @@ vst4_f32_(a as _, b.0, b.1, b.2, b.3, 4)
 pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v2f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v2f32.p0i8")]
         fn vst4_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut i8);
     }
 vst4_f32_(b.0, b.1, b.2, b.3, a as _)
@@ -17300,7 +17300,7 @@ vst4q_f32_(a as _, b.0, b.1, b.2, b.3, 4)
 pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v4f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v4f32.p0i8")]
         fn vst4q_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut i8);
     }
 vst4q_f32_(b.0, b.1, b.2, b.3, a as _)
@@ -17338,7 +17338,7 @@ pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
     static_assert_uimm_bits!(LANE, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v8i8.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v8i8.p0i8")]
         fn vst4_lane_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *mut i8);
     }
 vst4_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -17376,7 +17376,7 @@ pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v4i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v4i16.p0i8")]
         fn vst4_lane_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, n: i64, ptr: *mut i8);
     }
 vst4_lane_s16_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -17414,7 +17414,7 @@ pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v2i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v2i32.p0i8")]
         fn vst4_lane_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, n: i64, ptr: *mut i8);
     }
 vst4_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -17452,7 +17452,7 @@ pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
     static_assert_uimm_bits!(LANE, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v8i16.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v8i16.p0i8")]
         fn vst4q_lane_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, n: i64, ptr: *mut i8);
     }
 vst4q_lane_s16_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -17490,7 +17490,7 @@ pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v4i32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v4i32.p0i8")]
         fn vst4q_lane_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, n: i64, ptr: *mut i8);
     }
 vst4q_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -17503,7 +17503,7 @@ vst4q_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -17519,7 +17519,7 @@ pub unsafe fn vst4_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -17535,7 +17535,7 @@ pub unsafe fn vst4_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -17551,7 +17551,7 @@ pub unsafe fn vst4_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -17567,7 +17567,7 @@ pub unsafe fn vst4q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -17583,7 +17583,7 @@ pub unsafe fn vst4q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -17599,7 +17599,7 @@ pub unsafe fn vst4_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -17615,7 +17615,7 @@ pub unsafe fn vst4_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x4_t) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -17656,7 +17656,7 @@ pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v2f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v2f32.p0i8")]
         fn vst4_lane_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, n: i64, ptr: *mut i8);
     }
 vst4_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -17694,7 +17694,7 @@ pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v4f32.p0i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v4f32.p0i8")]
         fn vst4q_lane_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, n: i64, ptr: *mut i8);
     }
 vst4q_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
@@ -17707,14 +17707,14 @@ vst4q_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usdot))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usdot))]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v2i32.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8")]
         fn vusdot_s32_(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t;
     }
 vusdot_s32_(a, b, c)
@@ -17727,14 +17727,14 @@ vusdot_s32_(a, b, c)
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usdot))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usdot))]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v4i32.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8")]
         fn vusdotq_s32_(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t;
     }
 vusdotq_s32_(a, b, c)
@@ -17747,7 +17747,7 @@ vusdotq_s32_(a, b, c)
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usdot, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usdot, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -17765,7 +17765,7 @@ pub unsafe fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: in
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usdot, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usdot, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -17783,7 +17783,7 @@ pub unsafe fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c:
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sudot, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sudot, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -17801,7 +17801,7 @@ pub unsafe fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uin
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sudot, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sudot, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -17819,7 +17819,7 @@ pub unsafe fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
@@ -17833,7 +17833,7 @@ pub unsafe fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
@@ -17847,7 +17847,7 @@ pub unsafe fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
@@ -17861,7 +17861,7 @@ pub unsafe fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
@@ -17875,7 +17875,7 @@ pub unsafe fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
@@ -17889,7 +17889,7 @@ pub unsafe fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
@@ -17903,7 +17903,7 @@ pub unsafe fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
@@ -17917,7 +17917,7 @@ pub unsafe fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
@@ -17931,7 +17931,7 @@ pub unsafe fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
@@ -17945,7 +17945,7 @@ pub unsafe fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
@@ -17959,7 +17959,7 @@ pub unsafe fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
@@ -17973,7 +17973,7 @@ pub unsafe fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
@@ -17987,14 +17987,14 @@ pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(pmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.pmul.v8i8")]
         fn vmul_p8_(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
     }
 vmul_p8_(a, b)
@@ -18007,14 +18007,14 @@ vmul_p8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(pmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.pmul.v16i8")]
         fn vmulq_p8_(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
     }
 vmulq_p8_(a, b)
@@ -18027,7 +18027,7 @@ vmulq_p8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
@@ -18041,7 +18041,7 @@ pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
@@ -18055,7 +18055,7 @@ pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
@@ -18069,7 +18069,7 @@ pub unsafe fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
@@ -18083,7 +18083,7 @@ pub unsafe fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
@@ -18097,7 +18097,7 @@ pub unsafe fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
@@ -18111,7 +18111,7 @@ pub unsafe fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
@@ -18125,7 +18125,7 @@ pub unsafe fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
@@ -18139,7 +18139,7 @@ pub unsafe fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
@@ -18153,7 +18153,7 @@ pub unsafe fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
@@ -18167,7 +18167,7 @@ pub unsafe fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
@@ -18181,7 +18181,7 @@ pub unsafe fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
@@ -18195,7 +18195,7 @@ pub unsafe fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18211,7 +18211,7 @@ pub unsafe fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int1
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18227,7 +18227,7 @@ pub unsafe fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18243,7 +18243,7 @@ pub unsafe fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18259,7 +18259,7 @@ pub unsafe fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> in
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18275,7 +18275,7 @@ pub unsafe fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int3
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18291,7 +18291,7 @@ pub unsafe fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18307,7 +18307,7 @@ pub unsafe fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18323,7 +18323,7 @@ pub unsafe fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> in
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18339,7 +18339,7 @@ pub unsafe fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> ui
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18355,7 +18355,7 @@ pub unsafe fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18371,7 +18371,7 @@ pub unsafe fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18387,7 +18387,7 @@ pub unsafe fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18403,7 +18403,7 @@ pub unsafe fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> ui
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18419,7 +18419,7 @@ pub unsafe fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18435,7 +18435,7 @@ pub unsafe fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18451,7 +18451,7 @@ pub unsafe fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18467,7 +18467,7 @@ pub unsafe fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18483,7 +18483,7 @@ pub unsafe fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18499,7 +18499,7 @@ pub unsafe fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18515,14 +18515,14 @@ pub unsafe fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smull.v8i8")]
         fn vmull_s8_(a: int8x8_t, b: int8x8_t) -> int16x8_t;
     }
 vmull_s8_(a, b)
@@ -18535,14 +18535,14 @@ vmull_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smull.v4i16")]
         fn vmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t;
     }
 vmull_s16_(a, b)
@@ -18555,14 +18555,14 @@ vmull_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smull.v2i32")]
         fn vmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t;
     }
 vmull_s32_(a, b)
@@ -18575,14 +18575,14 @@ vmull_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umull.v8i8")]
         fn vmull_u8_(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t;
     }
 vmull_u8_(a, b)
@@ -18595,14 +18595,14 @@ vmull_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umull.v4i16")]
         fn vmull_u16_(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t;
     }
 vmull_u16_(a, b)
@@ -18615,14 +18615,14 @@ vmull_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umull.v2i32")]
         fn vmull_u32_(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t;
     }
 vmull_u32_(a, b)
@@ -18635,14 +18635,14 @@ vmull_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(pmull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.pmull.v8i8")]
         fn vmull_p8_(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t;
     }
 vmull_p8_(a, b)
@@ -18655,7 +18655,7 @@ vmull_p8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
@@ -18669,7 +18669,7 @@ pub unsafe fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
@@ -18683,7 +18683,7 @@ pub unsafe fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
@@ -18697,7 +18697,7 @@ pub unsafe fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
@@ -18711,7 +18711,7 @@ pub unsafe fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18727,7 +18727,7 @@ pub unsafe fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18743,7 +18743,7 @@ pub unsafe fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> in
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18759,7 +18759,7 @@ pub unsafe fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18775,7 +18775,7 @@ pub unsafe fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> in
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18791,7 +18791,7 @@ pub unsafe fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18807,7 +18807,7 @@ pub unsafe fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18823,7 +18823,7 @@ pub unsafe fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -18839,14 +18839,14 @@ pub unsafe fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.v2f32")]
         fn vfma_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
     }
 vfma_f32_(b, c, a)
@@ -18859,14 +18859,14 @@ vfma_f32_(b, c, a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.v4f32")]
         fn vfmaq_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
     }
 vfmaq_f32_(b, c, a)
@@ -18879,7 +18879,7 @@ vfmaq_f32_(b, c, a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
@@ -18893,7 +18893,7 @@ pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmla))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
@@ -18907,7 +18907,7 @@ pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
@@ -18922,7 +18922,7 @@ pub unsafe fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float3
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
@@ -18937,7 +18937,7 @@ pub unsafe fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
@@ -18951,7 +18951,7 @@ pub unsafe fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmls))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
@@ -18965,7 +18965,7 @@ pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
@@ -18979,7 +18979,7 @@ pub unsafe fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
@@ -18993,7 +18993,7 @@ pub unsafe fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
@@ -19007,7 +19007,7 @@ pub unsafe fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
@@ -19021,7 +19021,7 @@ pub unsafe fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
@@ -19035,7 +19035,7 @@ pub unsafe fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
@@ -19049,7 +19049,7 @@ pub unsafe fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
@@ -19063,7 +19063,7 @@ pub unsafe fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
@@ -19077,7 +19077,7 @@ pub unsafe fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
@@ -19091,7 +19091,7 @@ pub unsafe fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
@@ -19105,7 +19105,7 @@ pub unsafe fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
@@ -19119,7 +19119,7 @@ pub unsafe fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
@@ -19133,7 +19133,7 @@ pub unsafe fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
@@ -19147,7 +19147,7 @@ pub unsafe fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
@@ -19161,7 +19161,7 @@ pub unsafe fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
@@ -19175,7 +19175,7 @@ pub unsafe fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
@@ -19189,7 +19189,7 @@ pub unsafe fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
@@ -19203,7 +19203,7 @@ pub unsafe fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
@@ -19217,7 +19217,7 @@ pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vadd_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
@@ -19231,7 +19231,7 @@ pub unsafe fn vadd_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vadd_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
@@ -19245,7 +19245,7 @@ pub unsafe fn vadd_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vaddq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
@@ -19259,7 +19259,7 @@ pub unsafe fn vaddq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vaddq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
@@ -19273,7 +19273,7 @@ pub unsafe fn vaddq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vadd_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
@@ -19287,7 +19287,7 @@ pub unsafe fn vadd_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vaddq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
@@ -19301,7 +19301,7 @@ pub unsafe fn vaddq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vaddq_p128(a: p128, b: p128) -> p128 {
@@ -19315,7 +19315,7 @@ pub unsafe fn vaddq_p128(a: p128, b: p128) -> p128 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
@@ -19330,7 +19330,7 @@ pub unsafe fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
@@ -19345,7 +19345,7 @@ pub unsafe fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
@@ -19360,7 +19360,7 @@ pub unsafe fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
@@ -19375,7 +19375,7 @@ pub unsafe fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
@@ -19390,7 +19390,7 @@ pub unsafe fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
@@ -19405,7 +19405,7 @@ pub unsafe fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
@@ -19420,7 +19420,7 @@ pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x1
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
@@ -19435,7 +19435,7 @@ pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
@@ -19450,7 +19450,7 @@ pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
@@ -19465,7 +19465,7 @@ pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uin
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
@@ -19480,7 +19480,7 @@ pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> ui
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn2))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
@@ -19495,14 +19495,14 @@ pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> ui
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhsub.v8i8")]
         fn vhsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
     }
 vhsub_u8_(a, b)
@@ -19515,14 +19515,14 @@ vhsub_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhsub.v16i8")]
         fn vhsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
     }
 vhsubq_u8_(a, b)
@@ -19535,14 +19535,14 @@ vhsubq_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhsub.v4i16")]
         fn vhsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
     }
 vhsub_u16_(a, b)
@@ -19555,14 +19555,14 @@ vhsub_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhsub.v8i16")]
         fn vhsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
     }
 vhsubq_u16_(a, b)
@@ -19575,14 +19575,14 @@ vhsubq_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhsub.v2i32")]
         fn vhsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
     }
 vhsub_u32_(a, b)
@@ -19595,14 +19595,14 @@ vhsub_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhsub.v4i32")]
         fn vhsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
     }
 vhsubq_u32_(a, b)
@@ -19615,14 +19615,14 @@ vhsubq_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shsub.v8i8")]
         fn vhsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
 vhsub_s8_(a, b)
@@ -19635,14 +19635,14 @@ vhsub_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shsub.v16i8")]
         fn vhsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
 vhsubq_s8_(a, b)
@@ -19655,14 +19655,14 @@ vhsubq_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shsub.v4i16")]
         fn vhsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vhsub_s16_(a, b)
@@ -19675,14 +19675,14 @@ vhsub_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shsub.v8i16")]
         fn vhsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vhsubq_s16_(a, b)
@@ -19695,14 +19695,14 @@ vhsubq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shsub.v2i32")]
         fn vhsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vhsub_s32_(a, b)
@@ -19715,14 +19715,14 @@ vhsub_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shsub))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shsub.v4i32")]
         fn vhsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vhsubq_s32_(a, b)
@@ -19735,7 +19735,7 @@ vhsubq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssubw))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t {
@@ -19749,7 +19749,7 @@ pub unsafe fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssubw))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t {
@@ -19763,7 +19763,7 @@ pub unsafe fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssubw))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t {
@@ -19777,7 +19777,7 @@ pub unsafe fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usubw))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
@@ -19791,7 +19791,7 @@ pub unsafe fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usubw))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t {
@@ -19805,7 +19805,7 @@ pub unsafe fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usubw))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
@@ -19819,7 +19819,7 @@ pub unsafe fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssubl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
@@ -19835,7 +19835,7 @@ pub unsafe fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssubl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
@@ -19851,7 +19851,7 @@ pub unsafe fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssubl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
@@ -19867,7 +19867,7 @@ pub unsafe fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usubl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
@@ -19883,7 +19883,7 @@ pub unsafe fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usubl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
@@ -19899,7 +19899,7 @@ pub unsafe fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usubl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
@@ -19915,14 +19915,14 @@ pub unsafe fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sdot))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sdot))]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v2i32.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sdot.v2i32.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sdot.v2i32.v8i8")]
         fn vdot_s32_(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
     }
 vdot_s32_(a, b, c)
@@ -19935,14 +19935,14 @@ vdot_s32_(a, b, c)
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sdot))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sdot))]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v4i32.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sdot.v4i32.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sdot.v4i32.v16i8")]
         fn vdotq_s32_(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
     }
 vdotq_s32_(a, b, c)
@@ -19955,14 +19955,14 @@ vdotq_s32_(a, b, c)
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(udot))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(udot))]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v2i32.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.udot.v2i32.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.udot.v2i32.v8i8")]
         fn vdot_u32_(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t;
     }
 vdot_u32_(a, b, c)
@@ -19975,14 +19975,14 @@ vdot_u32_(a, b, c)
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(udot))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(udot))]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v4i32.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.udot.v4i32.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.udot.v4i32.v16i8")]
         fn vdotq_u32_(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
     }
 vdotq_u32_(a, b, c)
@@ -19995,7 +19995,7 @@ vdotq_u32_(a, b, c)
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sdot, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sdot, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -20013,7 +20013,7 @@ pub unsafe fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sdot, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sdot, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -20031,7 +20031,7 @@ pub unsafe fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(udot, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(udot, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -20049,7 +20049,7 @@ pub unsafe fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uin
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(udot, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(udot, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -20067,14 +20067,14 @@ pub unsafe fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smax.v8i8")]
         fn vmax_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
 vmax_s8_(a, b)
@@ -20087,14 +20087,14 @@ vmax_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smax.v16i8")]
         fn vmaxq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
 vmaxq_s8_(a, b)
@@ -20107,14 +20107,14 @@ vmaxq_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smax.v4i16")]
         fn vmax_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vmax_s16_(a, b)
@@ -20127,14 +20127,14 @@ vmax_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smax.v8i16")]
         fn vmaxq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vmaxq_s16_(a, b)
@@ -20147,14 +20147,14 @@ vmaxq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smax.v2i32")]
         fn vmax_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vmax_s32_(a, b)
@@ -20167,14 +20167,14 @@ vmax_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smax.v4i32")]
         fn vmaxq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vmaxq_s32_(a, b)
@@ -20187,14 +20187,14 @@ vmaxq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umax.v8i8")]
         fn vmax_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
     }
 vmax_u8_(a, b)
@@ -20207,14 +20207,14 @@ vmax_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umax.v16i8")]
         fn vmaxq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
     }
 vmaxq_u8_(a, b)
@@ -20227,14 +20227,14 @@ vmaxq_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umax.v4i16")]
         fn vmax_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
     }
 vmax_u16_(a, b)
@@ -20247,14 +20247,14 @@ vmax_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umax.v8i16")]
         fn vmaxq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
     }
 vmaxq_u16_(a, b)
@@ -20267,14 +20267,14 @@ vmaxq_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umax.v2i32")]
         fn vmax_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
     }
 vmax_u32_(a, b)
@@ -20287,14 +20287,14 @@ vmax_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umax.v4i32")]
         fn vmaxq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
     }
 vmaxq_u32_(a, b)
@@ -20307,14 +20307,14 @@ vmaxq_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmax.v2f32")]
         fn vmax_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
 vmax_f32_(a, b)
@@ -20327,14 +20327,14 @@ vmax_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmax))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmax.v4f32")]
         fn vmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
 vmaxq_f32_(a, b)
@@ -20347,14 +20347,14 @@ vmaxq_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmaxnm))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnm.v2f32")]
         fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
 vmaxnm_f32_(a, b)
@@ -20367,14 +20367,14 @@ vmaxnm_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmaxnm))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnm.v4f32")]
         fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
 vmaxnmq_f32_(a, b)
@@ -20387,14 +20387,14 @@ vmaxnmq_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smin.v8i8")]
         fn vmin_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
 vmin_s8_(a, b)
@@ -20407,14 +20407,14 @@ vmin_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smin.v16i8")]
         fn vminq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
 vminq_s8_(a, b)
@@ -20427,14 +20427,14 @@ vminq_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smin.v4i16")]
         fn vmin_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vmin_s16_(a, b)
@@ -20447,14 +20447,14 @@ vmin_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smin.v8i16")]
         fn vminq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vminq_s16_(a, b)
@@ -20467,14 +20467,14 @@ vminq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smin.v2i32")]
         fn vmin_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vmin_s32_(a, b)
@@ -20487,14 +20487,14 @@ vmin_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smin.v4i32")]
         fn vminq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vminq_s32_(a, b)
@@ -20507,14 +20507,14 @@ vminq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umin.v8i8")]
         fn vmin_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
     }
 vmin_u8_(a, b)
@@ -20527,14 +20527,14 @@ vmin_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umin.v16i8")]
         fn vminq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
     }
 vminq_u8_(a, b)
@@ -20547,14 +20547,14 @@ vminq_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umin.v4i16")]
         fn vmin_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
     }
 vmin_u16_(a, b)
@@ -20567,14 +20567,14 @@ vmin_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umin.v8i16")]
         fn vminq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
     }
 vminq_u16_(a, b)
@@ -20587,14 +20587,14 @@ vminq_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umin.v2i32")]
         fn vmin_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
     }
 vmin_u32_(a, b)
@@ -20607,14 +20607,14 @@ vmin_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umin.v4i32")]
         fn vminq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
     }
 vminq_u32_(a, b)
@@ -20627,14 +20627,14 @@ vminq_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmin.v2f32")]
         fn vmin_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
 vmin_f32_(a, b)
@@ -20647,14 +20647,14 @@ vmin_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmin))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmin.v4f32")]
         fn vminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
 vminq_f32_(a, b)
@@ -20667,14 +20667,14 @@ vminq_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fminnm))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnm.v2f32")]
         fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
 vminnm_f32_(a, b)
@@ -20687,14 +20687,14 @@ vminnm_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fminnm))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnm.v4f32")]
         fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
 vminnmq_f32_(a, b)
@@ -20707,14 +20707,14 @@ vminnmq_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(faddp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(faddp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddp.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.faddp.v2f32")]
         fn vpadd_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
 vpadd_f32_(a, b)
@@ -20727,14 +20727,14 @@ vpadd_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmull.v4i32")]
         fn vqdmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t;
     }
 vqdmull_s16_(a, b)
@@ -20747,14 +20747,14 @@ vqdmull_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmull.v2i64")]
         fn vqdmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t;
     }
 vqdmull_s32_(a, b)
@@ -20767,7 +20767,7 @@ vqdmull_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
@@ -20781,7 +20781,7 @@ pub unsafe fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmull))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
@@ -20795,7 +20795,7 @@ pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmull, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -20812,7 +20812,7 @@ pub unsafe fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int3
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmull, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -20829,7 +20829,7 @@ pub unsafe fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int6
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
@@ -20843,7 +20843,7 @@ pub unsafe fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
@@ -20857,7 +20857,7 @@ pub unsafe fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
@@ -20871,7 +20871,7 @@ pub unsafe fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
@@ -20885,7 +20885,7 @@ pub unsafe fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlal, N = 2))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -20901,7 +20901,7 @@ pub unsafe fn vqdmlal_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int1
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlal, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -20917,7 +20917,7 @@ pub unsafe fn vqdmlal_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int3
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
@@ -20931,7 +20931,7 @@ pub unsafe fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
@@ -20945,7 +20945,7 @@ pub unsafe fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
@@ -20959,7 +20959,7 @@ pub unsafe fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlsl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
@@ -20973,7 +20973,7 @@ pub unsafe fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlsl, N = 2))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -20989,7 +20989,7 @@ pub unsafe fn vqdmlsl_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int1
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlsl, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -21005,14 +21005,14 @@ pub unsafe fn vqdmlsl_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int3
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmulh.v4i16")]
         fn vqdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vqdmulh_s16_(a, b)
@@ -21025,14 +21025,14 @@ vqdmulh_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmulh.v8i16")]
         fn vqdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vqdmulhq_s16_(a, b)
@@ -21045,14 +21045,14 @@ vqdmulhq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmulh.v2i32")]
         fn vqdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vqdmulh_s32_(a, b)
@@ -21065,14 +21065,14 @@ vqdmulh_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmulh.v4i32")]
         fn vqdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vqdmulhq_s32_(a, b)
@@ -21085,7 +21085,7 @@ vqdmulhq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
@@ -21100,7 +21100,7 @@ pub unsafe fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
@@ -21115,7 +21115,7 @@ pub unsafe fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
@@ -21130,7 +21130,7 @@ pub unsafe fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
@@ -21145,7 +21145,7 @@ pub unsafe fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -21161,7 +21161,7 @@ pub unsafe fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -21177,7 +21177,7 @@ pub unsafe fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -21193,7 +21193,7 @@ pub unsafe fn vqdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -21209,14 +21209,14 @@ pub unsafe fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqxtn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqxtn.v8i8")]
         fn vqmovn_s16_(a: int16x8_t) -> int8x8_t;
     }
 vqmovn_s16_(a)
@@ -21229,14 +21229,14 @@ vqmovn_s16_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqxtn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqxtn.v4i16")]
         fn vqmovn_s32_(a: int32x4_t) -> int16x4_t;
     }
 vqmovn_s32_(a)
@@ -21249,14 +21249,14 @@ vqmovn_s32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqxtn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqxtn.v2i32")]
         fn vqmovn_s64_(a: int64x2_t) -> int32x2_t;
     }
 vqmovn_s64_(a)
@@ -21269,14 +21269,14 @@ vqmovn_s64_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqxtn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqxtn.v8i8")]
         fn vqmovn_u16_(a: uint16x8_t) -> uint8x8_t;
     }
 vqmovn_u16_(a)
@@ -21289,14 +21289,14 @@ vqmovn_u16_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqxtn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqxtn.v4i16")]
         fn vqmovn_u32_(a: uint32x4_t) -> uint16x4_t;
     }
 vqmovn_u32_(a)
@@ -21309,14 +21309,14 @@ vqmovn_u32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqxtn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqxtn.v2i32")]
         fn vqmovn_u64_(a: uint64x2_t) -> uint32x2_t;
     }
 vqmovn_u64_(a)
@@ -21329,14 +21329,14 @@ vqmovn_u64_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqxtun))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqxtun.v8i8")]
         fn vqmovun_s16_(a: int16x8_t) -> uint8x8_t;
     }
 vqmovun_s16_(a)
@@ -21349,14 +21349,14 @@ vqmovun_s16_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqxtun))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqxtun.v4i16")]
         fn vqmovun_s32_(a: int32x4_t) -> uint16x4_t;
     }
 vqmovun_s32_(a)
@@ -21369,14 +21369,14 @@ vqmovun_s32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqxtun))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqxtun.v2i32")]
         fn vqmovun_s64_(a: int64x2_t) -> uint32x2_t;
     }
 vqmovun_s64_(a)
@@ -21389,14 +21389,14 @@ vqmovun_s64_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmulh.v4i16")]
         fn vqrdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vqrdmulh_s16_(a, b)
@@ -21409,14 +21409,14 @@ vqrdmulh_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmulh.v8i16")]
         fn vqrdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vqrdmulhq_s16_(a, b)
@@ -21429,14 +21429,14 @@ vqrdmulhq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmulh.v2i32")]
         fn vqrdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vqrdmulh_s32_(a, b)
@@ -21449,14 +21449,14 @@ vqrdmulh_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmulh.v4i32")]
         fn vqrdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vqrdmulhq_s32_(a, b)
@@ -21469,7 +21469,7 @@ vqrdmulhq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
@@ -21483,7 +21483,7 @@ pub unsafe fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
@@ -21497,7 +21497,7 @@ pub unsafe fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
@@ -21511,7 +21511,7 @@ pub unsafe fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
@@ -21525,7 +21525,7 @@ pub unsafe fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -21542,7 +21542,7 @@ pub unsafe fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -21559,7 +21559,7 @@ pub unsafe fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -21576,7 +21576,7 @@ pub unsafe fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -21593,7 +21593,7 @@ pub unsafe fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -21610,7 +21610,7 @@ pub unsafe fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -21627,7 +21627,7 @@ pub unsafe fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -21644,7 +21644,7 @@ pub unsafe fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) ->
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -21661,14 +21661,14 @@ pub unsafe fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v8i8")]
         fn vqrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
 vqrshl_s8_(a, b)
@@ -21681,14 +21681,14 @@ vqrshl_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v16i8")]
         fn vqrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
 vqrshlq_s8_(a, b)
@@ -21701,14 +21701,14 @@ vqrshlq_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v4i16")]
         fn vqrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vqrshl_s16_(a, b)
@@ -21721,14 +21721,14 @@ vqrshl_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v8i16")]
         fn vqrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vqrshlq_s16_(a, b)
@@ -21741,14 +21741,14 @@ vqrshlq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v2i32")]
         fn vqrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vqrshl_s32_(a, b)
@@ -21761,14 +21761,14 @@ vqrshl_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v4i32")]
         fn vqrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vqrshlq_s32_(a, b)
@@ -21781,14 +21781,14 @@ vqrshlq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v1i64")]
         fn vqrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
 vqrshl_s64_(a, b)
@@ -21801,14 +21801,14 @@ vqrshl_s64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v2i64")]
         fn vqrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
 vqrshlq_s64_(a, b)
@@ -21821,14 +21821,14 @@ vqrshlq_s64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v8i8")]
         fn vqrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
     }
 vqrshl_u8_(a, b)
@@ -21841,14 +21841,14 @@ vqrshl_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v16i8")]
         fn vqrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
     }
 vqrshlq_u8_(a, b)
@@ -21861,14 +21861,14 @@ vqrshlq_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v4i16")]
         fn vqrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
     }
 vqrshl_u16_(a, b)
@@ -21881,14 +21881,14 @@ vqrshl_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v8i16")]
         fn vqrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
     }
 vqrshlq_u16_(a, b)
@@ -21901,14 +21901,14 @@ vqrshlq_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v2i32")]
         fn vqrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
     }
 vqrshl_u32_(a, b)
@@ -21921,14 +21921,14 @@ vqrshl_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v4i32")]
         fn vqrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
     }
 vqrshlq_u32_(a, b)
@@ -21941,14 +21941,14 @@ vqrshlq_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v1i64")]
         fn vqrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
     }
 vqrshl_u64_(a, b)
@@ -21961,14 +21961,14 @@ vqrshl_u64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v2i64")]
         fn vqrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
     }
 vqrshlq_u64_(a, b)
@@ -22006,7 +22006,7 @@ pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshrn.v8i8")]
         fn vqrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
     }
 vqrshrn_n_s16_(a, N)
@@ -22044,7 +22044,7 @@ pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshrn.v4i16")]
         fn vqrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
     }
 vqrshrn_n_s32_(a, N)
@@ -22082,7 +22082,7 @@ pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshrn.v2i32")]
         fn vqrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
     }
 vqrshrn_n_s64_(a, N)
@@ -22120,7 +22120,7 @@ pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshrn.v8i8")]
         fn vqrshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t;
     }
 vqrshrn_n_u16_(a, N)
@@ -22158,7 +22158,7 @@ pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshrn.v4i16")]
         fn vqrshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t;
     }
 vqrshrn_n_u32_(a, N)
@@ -22196,7 +22196,7 @@ pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshrn.v2i32")]
         fn vqrshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t;
     }
 vqrshrn_n_u64_(a, N)
@@ -22234,7 +22234,7 @@ pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshrun.v8i8")]
         fn vqrshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t;
     }
 vqrshrun_n_s16_(a, N)
@@ -22272,7 +22272,7 @@ pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshrun.v4i16")]
         fn vqrshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t;
     }
 vqrshrun_n_s32_(a, N)
@@ -22310,7 +22310,7 @@ pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshrun.v2i32")]
         fn vqrshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t;
     }
 vqrshrun_n_s64_(a, N)
@@ -22323,14 +22323,14 @@ vqrshrun_n_s64_(a, N)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v8i8")]
         fn vqshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
 vqshl_s8_(a, b)
@@ -22343,14 +22343,14 @@ vqshl_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v16i8")]
         fn vqshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
 vqshlq_s8_(a, b)
@@ -22363,14 +22363,14 @@ vqshlq_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v4i16")]
         fn vqshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vqshl_s16_(a, b)
@@ -22383,14 +22383,14 @@ vqshl_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v8i16")]
         fn vqshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vqshlq_s16_(a, b)
@@ -22403,14 +22403,14 @@ vqshlq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v2i32")]
         fn vqshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vqshl_s32_(a, b)
@@ -22423,14 +22423,14 @@ vqshl_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v4i32")]
         fn vqshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vqshlq_s32_(a, b)
@@ -22443,14 +22443,14 @@ vqshlq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v1i64")]
         fn vqshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
 vqshl_s64_(a, b)
@@ -22463,14 +22463,14 @@ vqshl_s64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v2i64")]
         fn vqshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
 vqshlq_s64_(a, b)
@@ -22483,14 +22483,14 @@ vqshlq_s64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v8i8")]
         fn vqshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
     }
 vqshl_u8_(a, b)
@@ -22503,14 +22503,14 @@ vqshl_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v16i8")]
         fn vqshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
     }
 vqshlq_u8_(a, b)
@@ -22523,14 +22523,14 @@ vqshlq_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v4i16")]
         fn vqshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
     }
 vqshl_u16_(a, b)
@@ -22543,14 +22543,14 @@ vqshl_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v8i16")]
         fn vqshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
     }
 vqshlq_u16_(a, b)
@@ -22563,14 +22563,14 @@ vqshlq_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v2i32")]
         fn vqshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
     }
 vqshl_u32_(a, b)
@@ -22583,14 +22583,14 @@ vqshl_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v4i32")]
         fn vqshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
     }
 vqshlq_u32_(a, b)
@@ -22603,14 +22603,14 @@ vqshlq_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v1i64")]
         fn vqshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
     }
 vqshl_u64_(a, b)
@@ -22623,14 +22623,14 @@ vqshl_u64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v2i64")]
         fn vqshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
     }
 vqshlq_u64_(a, b)
@@ -22643,7 +22643,7 @@ vqshlq_u64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22659,7 +22659,7 @@ pub unsafe fn vqshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22675,7 +22675,7 @@ pub unsafe fn vqshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22691,7 +22691,7 @@ pub unsafe fn vqshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22707,7 +22707,7 @@ pub unsafe fn vqshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22723,7 +22723,7 @@ pub unsafe fn vqshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22739,7 +22739,7 @@ pub unsafe fn vqshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22755,7 +22755,7 @@ pub unsafe fn vqshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22771,7 +22771,7 @@ pub unsafe fn vqshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22787,7 +22787,7 @@ pub unsafe fn vqshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22803,7 +22803,7 @@ pub unsafe fn vqshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22819,7 +22819,7 @@ pub unsafe fn vqshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22835,7 +22835,7 @@ pub unsafe fn vqshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22851,7 +22851,7 @@ pub unsafe fn vqshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22867,7 +22867,7 @@ pub unsafe fn vqshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22883,7 +22883,7 @@ pub unsafe fn vqshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -22924,7 +22924,7 @@ pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
     static_assert_uimm_bits!(N, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v8i8")]
         fn vqshlu_n_s8_(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
     }
 vqshlu_n_s8_(a, int8x8_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8))
@@ -22962,7 +22962,7 @@ pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(N, 4);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v4i16")]
         fn vqshlu_n_s16_(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
     }
 vqshlu_n_s16_(a, int16x4_t(N as i16, N as i16, N as i16, N as i16))
@@ -23000,7 +23000,7 @@ pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
     static_assert_uimm_bits!(N, 5);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v2i32")]
         fn vqshlu_n_s32_(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
     }
 vqshlu_n_s32_(a, int32x2_t(N as i32, N as i32))
@@ -23038,7 +23038,7 @@ pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
     static_assert_uimm_bits!(N, 6);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v1i64")]
         fn vqshlu_n_s64_(a: int64x1_t, n: int64x1_t) -> uint64x1_t;
     }
 vqshlu_n_s64_(a, int64x1_t(N as i64))
@@ -23076,7 +23076,7 @@ pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
     static_assert_uimm_bits!(N, 3);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v16i8")]
         fn vqshluq_n_s8_(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
     }
 vqshluq_n_s8_(a, int8x16_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8))
@@ -23114,7 +23114,7 @@ pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
     static_assert_uimm_bits!(N, 4);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v8i16")]
         fn vqshluq_n_s16_(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
     }
 vqshluq_n_s16_(a, int16x8_t(N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16))
@@ -23152,7 +23152,7 @@ pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(N, 5);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v4i32")]
         fn vqshluq_n_s32_(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
     }
 vqshluq_n_s32_(a, int32x4_t(N as i32, N as i32, N as i32, N as i32))
@@ -23190,7 +23190,7 @@ pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(N, 6);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v2i64")]
         fn vqshluq_n_s64_(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
     }
 vqshluq_n_s64_(a, int64x2_t(N as i64, N as i64))
@@ -23228,7 +23228,7 @@ pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrn.v8i8")]
         fn vqshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
     }
 vqshrn_n_s16_(a, N)
@@ -23266,7 +23266,7 @@ pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrn.v4i16")]
         fn vqshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
     }
 vqshrn_n_s32_(a, N)
@@ -23304,7 +23304,7 @@ pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrn.v2i32")]
         fn vqshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
     }
 vqshrn_n_s64_(a, N)
@@ -23342,7 +23342,7 @@ pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshrn.v8i8")]
         fn vqshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t;
     }
 vqshrn_n_u16_(a, N)
@@ -23380,7 +23380,7 @@ pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshrn.v4i16")]
         fn vqshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t;
     }
 vqshrn_n_u32_(a, N)
@@ -23418,7 +23418,7 @@ pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshrn.v2i32")]
         fn vqshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t;
     }
 vqshrn_n_u64_(a, N)
@@ -23456,7 +23456,7 @@ pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrun.v8i8")]
         fn vqshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t;
     }
 vqshrun_n_s16_(a, N)
@@ -23494,7 +23494,7 @@ pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrun.v4i16")]
         fn vqshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t;
     }
 vqshrun_n_s32_(a, N)
@@ -23532,7 +23532,7 @@ pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrun.v2i32")]
         fn vqshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t;
     }
 vqshrun_n_s64_(a, N)
@@ -23545,14 +23545,14 @@ vqshrun_n_s64_(a, N)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frsqrte))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrte.v2f32")]
         fn vrsqrte_f32_(a: float32x2_t) -> float32x2_t;
     }
 vrsqrte_f32_(a)
@@ -23565,14 +23565,14 @@ vrsqrte_f32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frsqrte))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrte.v4f32")]
         fn vrsqrteq_f32_(a: float32x4_t) -> float32x4_t;
     }
 vrsqrteq_f32_(a)
@@ -23585,14 +23585,14 @@ vrsqrteq_f32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursqrte))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursqrte))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ursqrte.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ursqrte.v2i32")]
         fn vrsqrte_u32_(a: uint32x2_t) -> uint32x2_t;
     }
 vrsqrte_u32_(a)
@@ -23605,14 +23605,14 @@ vrsqrte_u32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursqrte))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursqrte))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ursqrte.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ursqrte.v4i32")]
         fn vrsqrteq_u32_(a: uint32x4_t) -> uint32x4_t;
     }
 vrsqrteq_u32_(a)
@@ -23625,14 +23625,14 @@ vrsqrteq_u32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrts))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frsqrts))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrts.v2f32")]
         fn vrsqrts_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
 vrsqrts_f32_(a, b)
@@ -23645,14 +23645,14 @@ vrsqrts_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrts))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frsqrts))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrts.v4f32")]
         fn vrsqrtsq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
 vrsqrtsq_f32_(a, b)
@@ -23665,14 +23665,14 @@ vrsqrtsq_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frecpe))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpe.v2f32")]
         fn vrecpe_f32_(a: float32x2_t) -> float32x2_t;
     }
 vrecpe_f32_(a)
@@ -23685,14 +23685,14 @@ vrecpe_f32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frecpe))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpe.v4f32")]
         fn vrecpeq_f32_(a: float32x4_t) -> float32x4_t;
     }
 vrecpeq_f32_(a)
@@ -23705,14 +23705,14 @@ vrecpeq_f32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urecpe))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urecpe))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urecpe.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urecpe.v2i32")]
         fn vrecpe_u32_(a: uint32x2_t) -> uint32x2_t;
     }
 vrecpe_u32_(a)
@@ -23725,14 +23725,14 @@ vrecpe_u32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urecpe))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urecpe))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urecpe.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urecpe.v4i32")]
         fn vrecpeq_u32_(a: uint32x4_t) -> uint32x4_t;
     }
 vrecpeq_u32_(a)
@@ -23745,14 +23745,14 @@ vrecpeq_u32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecps))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frecps))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.v2f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecps.v2f32")]
         fn vrecps_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
 vrecps_f32_(a, b)
@@ -23765,14 +23765,14 @@ vrecps_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecps))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frecps))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.v4f32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecps.v4f32")]
         fn vrecpsq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
 vrecpsq_f32_(a, b)
@@ -23785,7 +23785,7 @@ vrecpsq_f32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
@@ -23799,7 +23799,7 @@ pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
@@ -23813,7 +23813,7 @@ pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
@@ -23827,7 +23827,7 @@ pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
@@ -23841,7 +23841,7 @@ pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
@@ -23855,7 +23855,7 @@ pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
@@ -23869,7 +23869,7 @@ pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
@@ -23883,7 +23883,7 @@ pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
@@ -23897,7 +23897,7 @@ pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
@@ -23911,7 +23911,7 @@ pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
@@ -23925,7 +23925,7 @@ pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
@@ -23939,7 +23939,7 @@ pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
@@ -23953,7 +23953,7 @@ pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
@@ -23967,7 +23967,7 @@ pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
@@ -23981,7 +23981,7 @@ pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
@@ -23995,7 +23995,7 @@ pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
@@ -24009,7 +24009,7 @@ pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
@@ -24023,7 +24023,7 @@ pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
@@ -24037,7 +24037,7 @@ pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
@@ -24051,7 +24051,7 @@ pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
@@ -24065,7 +24065,7 @@ pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
@@ -24079,7 +24079,7 @@ pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
@@ -24093,7 +24093,7 @@ pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
@@ -24107,7 +24107,7 @@ pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
@@ -24121,7 +24121,7 @@ pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
@@ -24135,7 +24135,7 @@ pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
@@ -24149,7 +24149,7 @@ pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
@@ -24163,7 +24163,7 @@ pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
@@ -24177,7 +24177,7 @@ pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
@@ -24191,7 +24191,7 @@ pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
@@ -24205,7 +24205,7 @@ pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
@@ -24219,7 +24219,7 @@ pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
@@ -24233,7 +24233,7 @@ pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
@@ -24247,7 +24247,7 @@ pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
@@ -24261,7 +24261,7 @@ pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
@@ -24275,7 +24275,7 @@ pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
@@ -24289,7 +24289,7 @@ pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
@@ -24303,7 +24303,7 @@ pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
@@ -24317,7 +24317,7 @@ pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
@@ -24331,7 +24331,7 @@ pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
@@ -24345,7 +24345,7 @@ pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
@@ -24359,7 +24359,7 @@ pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
@@ -24373,7 +24373,7 @@ pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
@@ -24387,7 +24387,7 @@ pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
@@ -24401,7 +24401,7 @@ pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
@@ -24415,7 +24415,7 @@ pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
@@ -24429,7 +24429,7 @@ pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
@@ -24443,7 +24443,7 @@ pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
@@ -24457,7 +24457,7 @@ pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
@@ -24471,7 +24471,7 @@ pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
@@ -24485,7 +24485,7 @@ pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
@@ -24499,7 +24499,7 @@ pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
@@ -24513,7 +24513,7 @@ pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
@@ -24527,7 +24527,7 @@ pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
@@ -24541,7 +24541,7 @@ pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
@@ -24555,7 +24555,7 @@ pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
@@ -24569,7 +24569,7 @@ pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
@@ -24583,7 +24583,7 @@ pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
@@ -24597,7 +24597,7 @@ pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
@@ -24611,7 +24611,7 @@ pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
@@ -24625,7 +24625,7 @@ pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
@@ -24639,7 +24639,7 @@ pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
@@ -24653,7 +24653,7 @@ pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
@@ -24667,7 +24667,7 @@ pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
@@ -24681,7 +24681,7 @@ pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
@@ -24695,7 +24695,7 @@ pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
@@ -24709,7 +24709,7 @@ pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
@@ -24723,7 +24723,7 @@ pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
@@ -24737,7 +24737,7 @@ pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
@@ -24751,7 +24751,7 @@ pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
@@ -24765,7 +24765,7 @@ pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
@@ -24779,7 +24779,7 @@ pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
@@ -24793,7 +24793,7 @@ pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
@@ -24807,7 +24807,7 @@ pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
@@ -24821,7 +24821,7 @@ pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
@@ -24835,7 +24835,7 @@ pub unsafe fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
@@ -24849,7 +24849,7 @@ pub unsafe fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
@@ -24863,7 +24863,7 @@ pub unsafe fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
@@ -24877,7 +24877,7 @@ pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
@@ -24891,7 +24891,7 @@ pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
@@ -24905,7 +24905,7 @@ pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
@@ -24919,7 +24919,7 @@ pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
@@ -24933,7 +24933,7 @@ pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
@@ -24947,7 +24947,7 @@ pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
@@ -24961,7 +24961,7 @@ pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
@@ -24975,7 +24975,7 @@ pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
@@ -24989,7 +24989,7 @@ pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
@@ -25003,7 +25003,7 @@ pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
@@ -25017,7 +25017,7 @@ pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
@@ -25031,7 +25031,7 @@ pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
@@ -25045,7 +25045,7 @@ pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
@@ -25059,7 +25059,7 @@ pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
@@ -25073,7 +25073,7 @@ pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
@@ -25087,7 +25087,7 @@ pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
@@ -25101,7 +25101,7 @@ pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
@@ -25115,7 +25115,7 @@ pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
@@ -25129,7 +25129,7 @@ pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
@@ -25143,7 +25143,7 @@ pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
@@ -25157,7 +25157,7 @@ pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
@@ -25171,7 +25171,7 @@ pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
@@ -25185,7 +25185,7 @@ pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
@@ -25199,7 +25199,7 @@ pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
@@ -25213,7 +25213,7 @@ pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
@@ -25227,7 +25227,7 @@ pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
@@ -25241,7 +25241,7 @@ pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
@@ -25255,7 +25255,7 @@ pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
@@ -25269,7 +25269,7 @@ pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
@@ -25283,7 +25283,7 @@ pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
@@ -25297,7 +25297,7 @@ pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
@@ -25311,7 +25311,7 @@ pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
@@ -25325,7 +25325,7 @@ pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
@@ -25339,7 +25339,7 @@ pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
@@ -25353,7 +25353,7 @@ pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
@@ -25367,7 +25367,7 @@ pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
@@ -25381,7 +25381,7 @@ pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
@@ -25395,7 +25395,7 @@ pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
@@ -25409,7 +25409,7 @@ pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
@@ -25423,7 +25423,7 @@ pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
@@ -25437,7 +25437,7 @@ pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
@@ -25451,7 +25451,7 @@ pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
@@ -25465,7 +25465,7 @@ pub unsafe fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
@@ -25479,7 +25479,7 @@ pub unsafe fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
@@ -25493,7 +25493,7 @@ pub unsafe fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
@@ -25507,7 +25507,7 @@ pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
@@ -25521,7 +25521,7 @@ pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
@@ -25535,7 +25535,7 @@ pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
@@ -25549,7 +25549,7 @@ pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
@@ -25563,7 +25563,7 @@ pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
@@ -25577,7 +25577,7 @@ pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
@@ -25591,7 +25591,7 @@ pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
@@ -25605,7 +25605,7 @@ pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
@@ -25619,7 +25619,7 @@ pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
@@ -25633,7 +25633,7 @@ pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
@@ -25647,7 +25647,7 @@ pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
@@ -25661,7 +25661,7 @@ pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
@@ -25675,7 +25675,7 @@ pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
@@ -25689,7 +25689,7 @@ pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
@@ -25703,7 +25703,7 @@ pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
@@ -25717,7 +25717,7 @@ pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
@@ -25731,7 +25731,7 @@ pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
@@ -25745,7 +25745,7 @@ pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
@@ -25759,7 +25759,7 @@ pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
@@ -25773,7 +25773,7 @@ pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
@@ -25787,7 +25787,7 @@ pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
@@ -25801,7 +25801,7 @@ pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
@@ -25815,7 +25815,7 @@ pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
@@ -25829,7 +25829,7 @@ pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
@@ -25843,7 +25843,7 @@ pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
@@ -25857,7 +25857,7 @@ pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
@@ -25871,7 +25871,7 @@ pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
@@ -25885,7 +25885,7 @@ pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
@@ -25899,7 +25899,7 @@ pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
@@ -25913,7 +25913,7 @@ pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
@@ -25927,7 +25927,7 @@ pub unsafe fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
@@ -25941,7 +25941,7 @@ pub unsafe fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
@@ -25955,7 +25955,7 @@ pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
@@ -25969,7 +25969,7 @@ pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
@@ -25983,7 +25983,7 @@ pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
@@ -25997,7 +25997,7 @@ pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
@@ -26011,7 +26011,7 @@ pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
@@ -26025,7 +26025,7 @@ pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
@@ -26039,7 +26039,7 @@ pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
@@ -26053,7 +26053,7 @@ pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
@@ -26067,7 +26067,7 @@ pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
@@ -26081,7 +26081,7 @@ pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
@@ -26095,7 +26095,7 @@ pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
@@ -26109,7 +26109,7 @@ pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
@@ -26123,7 +26123,7 @@ pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
@@ -26137,7 +26137,7 @@ pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
@@ -26151,7 +26151,7 @@ pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
@@ -26165,7 +26165,7 @@ pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
@@ -26179,7 +26179,7 @@ pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
@@ -26193,7 +26193,7 @@ pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
@@ -26207,7 +26207,7 @@ pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
@@ -26221,7 +26221,7 @@ pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
@@ -26235,7 +26235,7 @@ pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
@@ -26249,7 +26249,7 @@ pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
@@ -26263,7 +26263,7 @@ pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
@@ -26277,7 +26277,7 @@ pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
@@ -26291,7 +26291,7 @@ pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
@@ -26305,7 +26305,7 @@ pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
@@ -26319,7 +26319,7 @@ pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
@@ -26333,7 +26333,7 @@ pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
@@ -26347,7 +26347,7 @@ pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
@@ -26361,7 +26361,7 @@ pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
@@ -26375,7 +26375,7 @@ pub unsafe fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
@@ -26389,7 +26389,7 @@ pub unsafe fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
@@ -26403,7 +26403,7 @@ pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
@@ -26417,7 +26417,7 @@ pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
@@ -26431,7 +26431,7 @@ pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
@@ -26445,7 +26445,7 @@ pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
@@ -26459,7 +26459,7 @@ pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
@@ -26473,7 +26473,7 @@ pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
@@ -26487,7 +26487,7 @@ pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
@@ -26501,7 +26501,7 @@ pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
@@ -26515,7 +26515,7 @@ pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
@@ -26529,7 +26529,7 @@ pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
@@ -26543,7 +26543,7 @@ pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
@@ -26557,7 +26557,7 @@ pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
@@ -26571,7 +26571,7 @@ pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
@@ -26585,7 +26585,7 @@ pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
@@ -26599,7 +26599,7 @@ pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
@@ -26613,7 +26613,7 @@ pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
@@ -26627,7 +26627,7 @@ pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
@@ -26641,7 +26641,7 @@ pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
@@ -26655,7 +26655,7 @@ pub unsafe fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
@@ -26669,7 +26669,7 @@ pub unsafe fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
@@ -26683,7 +26683,7 @@ pub unsafe fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
@@ -26697,7 +26697,7 @@ pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
@@ -26711,7 +26711,7 @@ pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
@@ -26725,7 +26725,7 @@ pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
@@ -26739,7 +26739,7 @@ pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
@@ -26753,7 +26753,7 @@ pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
@@ -26767,7 +26767,7 @@ pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
@@ -26781,7 +26781,7 @@ pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
@@ -26795,7 +26795,7 @@ pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
@@ -26809,7 +26809,7 @@ pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
@@ -26823,7 +26823,7 @@ pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
@@ -26837,7 +26837,7 @@ pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
@@ -26851,7 +26851,7 @@ pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
@@ -26865,7 +26865,7 @@ pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
@@ -26879,7 +26879,7 @@ pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
@@ -26893,7 +26893,7 @@ pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
@@ -26907,7 +26907,7 @@ pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
@@ -26921,7 +26921,7 @@ pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
@@ -26935,7 +26935,7 @@ pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
@@ -26949,7 +26949,7 @@ pub unsafe fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
@@ -26963,7 +26963,7 @@ pub unsafe fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
@@ -26977,7 +26977,7 @@ pub unsafe fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
@@ -26991,7 +26991,7 @@ pub unsafe fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
@@ -27005,7 +27005,7 @@ pub unsafe fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
@@ -27019,7 +27019,7 @@ pub unsafe fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
@@ -27033,7 +27033,7 @@ pub unsafe fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
@@ -27047,7 +27047,7 @@ pub unsafe fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
@@ -27061,7 +27061,7 @@ pub unsafe fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
@@ -27075,7 +27075,7 @@ pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
@@ -27089,7 +27089,7 @@ pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
@@ -27103,7 +27103,7 @@ pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
@@ -27117,7 +27117,7 @@ pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
@@ -27131,7 +27131,7 @@ pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
@@ -27145,7 +27145,7 @@ pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
@@ -27159,7 +27159,7 @@ pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
@@ -27173,7 +27173,7 @@ pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
@@ -27187,7 +27187,7 @@ pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
@@ -27201,7 +27201,7 @@ pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
@@ -27215,7 +27215,7 @@ pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
@@ -27229,7 +27229,7 @@ pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
@@ -27243,7 +27243,7 @@ pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
@@ -27257,7 +27257,7 @@ pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
@@ -27271,7 +27271,7 @@ pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
@@ -27285,7 +27285,7 @@ pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
@@ -27299,7 +27299,7 @@ pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
@@ -27313,7 +27313,7 @@ pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
@@ -27327,7 +27327,7 @@ pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
@@ -27341,7 +27341,7 @@ pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
@@ -27355,7 +27355,7 @@ pub unsafe fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
@@ -27369,7 +27369,7 @@ pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
@@ -27383,7 +27383,7 @@ pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
@@ -27397,7 +27397,7 @@ pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
@@ -27411,7 +27411,7 @@ pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
@@ -27425,7 +27425,7 @@ pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
@@ -27439,7 +27439,7 @@ pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
@@ -27453,7 +27453,7 @@ pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
@@ -27467,7 +27467,7 @@ pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
@@ -27481,7 +27481,7 @@ pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
@@ -27495,7 +27495,7 @@ pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
@@ -27509,7 +27509,7 @@ pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
@@ -27523,7 +27523,7 @@ pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
@@ -27537,7 +27537,7 @@ pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
@@ -27551,7 +27551,7 @@ pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
@@ -27565,7 +27565,7 @@ pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
@@ -27579,7 +27579,7 @@ pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
@@ -27593,7 +27593,7 @@ pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
@@ -27607,7 +27607,7 @@ pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
@@ -27621,7 +27621,7 @@ pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
@@ -27635,7 +27635,7 @@ pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
@@ -27649,14 +27649,14 @@ pub unsafe fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v8i8")]
         fn vrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
 vrshl_s8_(a, b)
@@ -27669,14 +27669,14 @@ vrshl_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v16i8")]
         fn vrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
 vrshlq_s8_(a, b)
@@ -27689,14 +27689,14 @@ vrshlq_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v4i16")]
         fn vrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vrshl_s16_(a, b)
@@ -27709,14 +27709,14 @@ vrshl_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v8i16")]
         fn vrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vrshlq_s16_(a, b)
@@ -27729,14 +27729,14 @@ vrshlq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v2i32")]
         fn vrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vrshl_s32_(a, b)
@@ -27749,14 +27749,14 @@ vrshl_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v4i32")]
         fn vrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vrshlq_s32_(a, b)
@@ -27769,14 +27769,14 @@ vrshlq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v1i64")]
         fn vrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
 vrshl_s64_(a, b)
@@ -27789,14 +27789,14 @@ vrshl_s64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v2i64")]
         fn vrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
 vrshlq_s64_(a, b)
@@ -27809,14 +27809,14 @@ vrshlq_s64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v8i8")]
         fn vrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
     }
 vrshl_u8_(a, b)
@@ -27829,14 +27829,14 @@ vrshl_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v16i8")]
         fn vrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
     }
 vrshlq_u8_(a, b)
@@ -27849,14 +27849,14 @@ vrshlq_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v4i16")]
         fn vrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
     }
 vrshl_u16_(a, b)
@@ -27869,14 +27869,14 @@ vrshl_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v8i16")]
         fn vrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
     }
 vrshlq_u16_(a, b)
@@ -27889,14 +27889,14 @@ vrshlq_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v2i32")]
         fn vrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
     }
 vrshl_u32_(a, b)
@@ -27909,14 +27909,14 @@ vrshl_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v4i32")]
         fn vrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
     }
 vrshlq_u32_(a, b)
@@ -27929,14 +27929,14 @@ vrshlq_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v1i64")]
         fn vrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
     }
 vrshl_u64_(a, b)
@@ -27949,14 +27949,14 @@ vrshl_u64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v2i64")]
         fn vrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
     }
 vrshlq_u64_(a, b)
@@ -27969,7 +27969,7 @@ vrshlq_u64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -27985,7 +27985,7 @@ pub unsafe fn vrshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28001,7 +28001,7 @@ pub unsafe fn vrshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28017,7 +28017,7 @@ pub unsafe fn vrshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28033,7 +28033,7 @@ pub unsafe fn vrshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28049,7 +28049,7 @@ pub unsafe fn vrshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28065,7 +28065,7 @@ pub unsafe fn vrshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28081,7 +28081,7 @@ pub unsafe fn vrshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28097,7 +28097,7 @@ pub unsafe fn vrshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28113,7 +28113,7 @@ pub unsafe fn vrshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28129,7 +28129,7 @@ pub unsafe fn vrshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28145,7 +28145,7 @@ pub unsafe fn vrshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28161,7 +28161,7 @@ pub unsafe fn vrshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28177,7 +28177,7 @@ pub unsafe fn vrshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28193,7 +28193,7 @@ pub unsafe fn vrshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28209,7 +28209,7 @@ pub unsafe fn vrshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28250,7 +28250,7 @@ pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rshrn.v8i8")]
         fn vrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
     }
 vrshrn_n_s16_(a, N)
@@ -28288,7 +28288,7 @@ pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rshrn.v4i16")]
         fn vrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
     }
 vrshrn_n_s32_(a, N)
@@ -28326,7 +28326,7 @@ pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rshrn.v2i32")]
         fn vrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
     }
 vrshrn_n_s64_(a, N)
@@ -28339,7 +28339,7 @@ vrshrn_n_s64_(a, N)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28355,7 +28355,7 @@ pub unsafe fn vrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28371,7 +28371,7 @@ pub unsafe fn vrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28387,7 +28387,7 @@ pub unsafe fn vrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28403,7 +28403,7 @@ pub unsafe fn vrsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28419,7 +28419,7 @@ pub unsafe fn vrsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28435,7 +28435,7 @@ pub unsafe fn vrsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28451,7 +28451,7 @@ pub unsafe fn vrsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28467,7 +28467,7 @@ pub unsafe fn vrsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28483,7 +28483,7 @@ pub unsafe fn vrsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28499,7 +28499,7 @@ pub unsafe fn vrsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28515,7 +28515,7 @@ pub unsafe fn vrsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28531,7 +28531,7 @@ pub unsafe fn vrsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28547,7 +28547,7 @@ pub unsafe fn vrsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x1
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28563,7 +28563,7 @@ pub unsafe fn vrsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28579,7 +28579,7 @@ pub unsafe fn vrsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28595,7 +28595,7 @@ pub unsafe fn vrsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28611,7 +28611,7 @@ pub unsafe fn vrsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28627,7 +28627,7 @@ pub unsafe fn vrsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28643,14 +28643,14 @@ pub unsafe fn vrsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rsubhn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rsubhn.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rsubhn.v8i8")]
         fn vrsubhn_s16_(a: int16x8_t, b: int16x8_t) -> int8x8_t;
     }
 vrsubhn_s16_(a, b)
@@ -28663,14 +28663,14 @@ vrsubhn_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rsubhn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rsubhn.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rsubhn.v4i16")]
         fn vrsubhn_s32_(a: int32x4_t, b: int32x4_t) -> int16x4_t;
     }
 vrsubhn_s32_(a, b)
@@ -28683,14 +28683,14 @@ vrsubhn_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rsubhn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rsubhn.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rsubhn.v2i32")]
         fn vrsubhn_s64_(a: int64x2_t, b: int64x2_t) -> int32x2_t;
     }
 vrsubhn_s64_(a, b)
@@ -28703,7 +28703,7 @@ vrsubhn_s64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rsubhn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
@@ -28717,7 +28717,7 @@ pub unsafe fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rsubhn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
@@ -28731,7 +28731,7 @@ pub unsafe fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rsubhn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
@@ -28745,7 +28745,7 @@ pub unsafe fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28761,7 +28761,7 @@ pub unsafe fn vset_lane_s8<const LANE: i32>(a: i8, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28777,7 +28777,7 @@ pub unsafe fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28793,7 +28793,7 @@ pub unsafe fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28809,7 +28809,7 @@ pub unsafe fn vset_lane_s64<const LANE: i32>(a: i64, b: int64x1_t) -> int64x1_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28825,7 +28825,7 @@ pub unsafe fn vset_lane_u8<const LANE: i32>(a: u8, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28841,7 +28841,7 @@ pub unsafe fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28857,7 +28857,7 @@ pub unsafe fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28873,7 +28873,7 @@ pub unsafe fn vset_lane_u64<const LANE: i32>(a: u64, b: uint64x1_t) -> uint64x1_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28889,7 +28889,7 @@ pub unsafe fn vset_lane_p8<const LANE: i32>(a: p8, b: poly8x8_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28905,7 +28905,7 @@ pub unsafe fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28921,7 +28921,7 @@ pub unsafe fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28937,7 +28937,7 @@ pub unsafe fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28953,7 +28953,7 @@ pub unsafe fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28969,7 +28969,7 @@ pub unsafe fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -28985,7 +28985,7 @@ pub unsafe fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29001,7 +29001,7 @@ pub unsafe fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29017,7 +29017,7 @@ pub unsafe fn vsetq_lane_u16<const LANE: i32>(a: u16, b: uint16x8_t) -> uint16x8
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29033,7 +29033,7 @@ pub unsafe fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29049,7 +29049,7 @@ pub unsafe fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29065,7 +29065,7 @@ pub unsafe fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29081,7 +29081,7 @@ pub unsafe fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29097,7 +29097,7 @@ pub unsafe fn vsetq_lane_p64<const LANE: i32>(a: p64, b: poly64x2_t) -> poly64x2
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29113,7 +29113,7 @@ pub unsafe fn vset_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> float32x
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29129,14 +29129,14 @@ pub unsafe fn vsetq_lane_f32<const LANE: i32>(a: f32, b: float32x4_t) -> float32
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v8i8")]
         fn vshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
 vshl_s8_(a, b)
@@ -29149,14 +29149,14 @@ vshl_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v16i8")]
         fn vshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
 vshlq_s8_(a, b)
@@ -29169,14 +29169,14 @@ vshlq_s8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v4i16")]
         fn vshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
 vshl_s16_(a, b)
@@ -29189,14 +29189,14 @@ vshl_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v8i16")]
         fn vshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
 vshlq_s16_(a, b)
@@ -29209,14 +29209,14 @@ vshlq_s16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v2i32")]
         fn vshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
 vshl_s32_(a, b)
@@ -29229,14 +29229,14 @@ vshl_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v4i32")]
         fn vshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
 vshlq_s32_(a, b)
@@ -29249,14 +29249,14 @@ vshlq_s32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v1i64")]
         fn vshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
 vshl_s64_(a, b)
@@ -29269,14 +29269,14 @@ vshl_s64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v2i64")]
         fn vshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
 vshlq_s64_(a, b)
@@ -29289,14 +29289,14 @@ vshlq_s64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v8i8")]
         fn vshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
     }
 vshl_u8_(a, b)
@@ -29309,14 +29309,14 @@ vshl_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v16i8")]
         fn vshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
     }
 vshlq_u8_(a, b)
@@ -29329,14 +29329,14 @@ vshlq_u8_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v4i16")]
         fn vshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
     }
 vshl_u16_(a, b)
@@ -29349,14 +29349,14 @@ vshl_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v8i16")]
         fn vshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
     }
 vshlq_u16_(a, b)
@@ -29369,14 +29369,14 @@ vshlq_u16_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v2i32")]
         fn vshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
     }
 vshl_u32_(a, b)
@@ -29389,14 +29389,14 @@ vshl_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v4i32")]
         fn vshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
     }
 vshlq_u32_(a, b)
@@ -29409,14 +29409,14 @@ vshlq_u32_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v1i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v1i64")]
         fn vshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
     }
 vshl_u64_(a, b)
@@ -29429,14 +29429,14 @@ vshl_u64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i64")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v2i64")]
         fn vshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
     }
 vshlq_u64_(a, b)
@@ -29449,7 +29449,7 @@ vshlq_u64_(a, b)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29465,7 +29465,7 @@ pub unsafe fn vshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29481,7 +29481,7 @@ pub unsafe fn vshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29497,7 +29497,7 @@ pub unsafe fn vshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29513,7 +29513,7 @@ pub unsafe fn vshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29529,7 +29529,7 @@ pub unsafe fn vshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29545,7 +29545,7 @@ pub unsafe fn vshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29561,7 +29561,7 @@ pub unsafe fn vshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29577,7 +29577,7 @@ pub unsafe fn vshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29593,7 +29593,7 @@ pub unsafe fn vshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29609,7 +29609,7 @@ pub unsafe fn vshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29625,7 +29625,7 @@ pub unsafe fn vshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29641,7 +29641,7 @@ pub unsafe fn vshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29657,7 +29657,7 @@ pub unsafe fn vshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29673,7 +29673,7 @@ pub unsafe fn vshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29689,7 +29689,7 @@ pub unsafe fn vshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29705,7 +29705,7 @@ pub unsafe fn vshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshll, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29721,7 +29721,7 @@ pub unsafe fn vshll_n_s8<const N: i32>(a: int8x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshll, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29737,7 +29737,7 @@ pub unsafe fn vshll_n_s16<const N: i32>(a: int16x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshll, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29753,7 +29753,7 @@ pub unsafe fn vshll_n_s32<const N: i32>(a: int32x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushll, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29769,7 +29769,7 @@ pub unsafe fn vshll_n_u8<const N: i32>(a: uint8x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushll, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29785,7 +29785,7 @@ pub unsafe fn vshll_n_u16<const N: i32>(a: uint16x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushll, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29801,7 +29801,7 @@ pub unsafe fn vshll_n_u32<const N: i32>(a: uint32x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29818,7 +29818,7 @@ pub unsafe fn vshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29835,7 +29835,7 @@ pub unsafe fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29852,7 +29852,7 @@ pub unsafe fn vshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29869,7 +29869,7 @@ pub unsafe fn vshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29886,7 +29886,7 @@ pub unsafe fn vshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29903,7 +29903,7 @@ pub unsafe fn vshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29920,7 +29920,7 @@ pub unsafe fn vshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29937,7 +29937,7 @@ pub unsafe fn vshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29954,7 +29954,7 @@ pub unsafe fn vshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29971,7 +29971,7 @@ pub unsafe fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -29988,7 +29988,7 @@ pub unsafe fn vshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30005,7 +30005,7 @@ pub unsafe fn vshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30022,7 +30022,7 @@ pub unsafe fn vshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30039,7 +30039,7 @@ pub unsafe fn vshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30056,7 +30056,7 @@ pub unsafe fn vshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30073,7 +30073,7 @@ pub unsafe fn vshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30089,7 +30089,7 @@ pub unsafe fn vshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30105,7 +30105,7 @@ pub unsafe fn vshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30121,7 +30121,7 @@ pub unsafe fn vshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30137,7 +30137,7 @@ pub unsafe fn vshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30153,7 +30153,7 @@ pub unsafe fn vshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30169,7 +30169,7 @@ pub unsafe fn vshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30185,7 +30185,7 @@ pub unsafe fn vsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30201,7 +30201,7 @@ pub unsafe fn vsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30217,7 +30217,7 @@ pub unsafe fn vsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30233,7 +30233,7 @@ pub unsafe fn vsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30249,7 +30249,7 @@ pub unsafe fn vsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30265,7 +30265,7 @@ pub unsafe fn vsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30281,7 +30281,7 @@ pub unsafe fn vsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30297,7 +30297,7 @@ pub unsafe fn vsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30313,7 +30313,7 @@ pub unsafe fn vsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30329,7 +30329,7 @@ pub unsafe fn vsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30345,7 +30345,7 @@ pub unsafe fn vsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30361,7 +30361,7 @@ pub unsafe fn vsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30377,7 +30377,7 @@ pub unsafe fn vsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30393,7 +30393,7 @@ pub unsafe fn vsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30409,7 +30409,7 @@ pub unsafe fn vsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
@@ -30425,7 +30425,7 @@ pub unsafe fn vsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
@@ -30441,7 +30441,7 @@ pub unsafe fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
@@ -30457,7 +30457,7 @@ pub unsafe fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
@@ -30473,7 +30473,7 @@ pub unsafe fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
@@ -30489,7 +30489,7 @@ pub unsafe fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
@@ -30505,7 +30505,7 @@ pub unsafe fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
@@ -30521,7 +30521,7 @@ pub unsafe fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
@@ -30537,7 +30537,7 @@ pub unsafe fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
@@ -30553,7 +30553,7 @@ pub unsafe fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
@@ -30569,7 +30569,7 @@ pub unsafe fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
@@ -30585,7 +30585,7 @@ pub unsafe fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
@@ -30601,7 +30601,7 @@ pub unsafe fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
@@ -30617,7 +30617,7 @@ pub unsafe fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
@@ -30633,7 +30633,7 @@ pub unsafe fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
@@ -30649,7 +30649,7 @@ pub unsafe fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
@@ -30665,7 +30665,7 @@ pub unsafe fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
@@ -30681,7 +30681,7 @@ pub unsafe fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
@@ -30697,7 +30697,7 @@ pub unsafe fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
@@ -30713,7 +30713,7 @@ pub unsafe fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
@@ -30729,7 +30729,7 @@ pub unsafe fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
@@ -30745,7 +30745,7 @@ pub unsafe fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
@@ -30761,7 +30761,7 @@ pub unsafe fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
@@ -30777,7 +30777,7 @@ pub unsafe fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
@@ -30793,7 +30793,7 @@ pub unsafe fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
@@ -30809,7 +30809,7 @@ pub unsafe fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
@@ -30825,7 +30825,7 @@ pub unsafe fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
@@ -30841,7 +30841,7 @@ pub unsafe fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
@@ -30857,7 +30857,7 @@ pub unsafe fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
@@ -30873,7 +30873,7 @@ pub unsafe fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
@@ -30889,7 +30889,7 @@ pub unsafe fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
@@ -30905,7 +30905,7 @@ pub unsafe fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
@@ -30921,7 +30921,7 @@ pub unsafe fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
@@ -30937,7 +30937,7 @@ pub unsafe fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
@@ -30953,7 +30953,7 @@ pub unsafe fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
@@ -30969,7 +30969,7 @@ pub unsafe fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
@@ -30985,7 +30985,7 @@ pub unsafe fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
@@ -31001,7 +31001,7 @@ pub unsafe fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
@@ -31017,7 +31017,7 @@ pub unsafe fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
@@ -31033,7 +31033,7 @@ pub unsafe fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
@@ -31049,7 +31049,7 @@ pub unsafe fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
@@ -31065,7 +31065,7 @@ pub unsafe fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
@@ -31081,7 +31081,7 @@ pub unsafe fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
@@ -31097,7 +31097,7 @@ pub unsafe fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
@@ -31113,7 +31113,7 @@ pub unsafe fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
@@ -31129,7 +31129,7 @@ pub unsafe fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
@@ -31145,7 +31145,7 @@ pub unsafe fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
@@ -31161,7 +31161,7 @@ pub unsafe fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
@@ -31177,7 +31177,7 @@ pub unsafe fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
@@ -31193,7 +31193,7 @@ pub unsafe fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
@@ -31209,7 +31209,7 @@ pub unsafe fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
@@ -31225,7 +31225,7 @@ pub unsafe fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
@@ -31241,7 +31241,7 @@ pub unsafe fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
@@ -31257,7 +31257,7 @@ pub unsafe fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
@@ -31273,7 +31273,7 @@ pub unsafe fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
@@ -31289,7 +31289,7 @@ pub unsafe fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
@@ -31304,7 +31304,7 @@ pub unsafe fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
@@ -31319,7 +31319,7 @@ pub unsafe fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
@@ -31334,7 +31334,7 @@ pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
@@ -31350,7 +31350,7 @@ pub unsafe fn vabal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
@@ -31366,7 +31366,7 @@ pub unsafe fn vabal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabal))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
@@ -31382,14 +31382,14 @@ pub unsafe fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqabs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v8i8")]
         fn vqabs_s8_(a: int8x8_t) -> int8x8_t;
     }
 vqabs_s8_(a)
@@ -31402,14 +31402,14 @@ vqabs_s8_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqabs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v16i8")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v16i8")]
         fn vqabsq_s8_(a: int8x16_t) -> int8x16_t;
     }
 vqabsq_s8_(a)
@@ -31422,14 +31422,14 @@ vqabsq_s8_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqabs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v4i16")]
         fn vqabs_s16_(a: int16x4_t) -> int16x4_t;
     }
 vqabs_s16_(a)
@@ -31442,14 +31442,14 @@ vqabs_s16_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqabs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i16")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v8i16")]
         fn vqabsq_s16_(a: int16x8_t) -> int16x8_t;
     }
 vqabsq_s16_(a)
@@ -31462,14 +31462,14 @@ vqabsq_s16_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqabs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v2i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v2i32")]
         fn vqabs_s32_(a: int32x2_t) -> int32x2_t;
     }
 vqabs_s32_(a)
@@ -31482,14 +31482,14 @@ vqabs_s32_(a)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqabs))]
 #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i32")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v4i32")]
         fn vqabsq_s32_(a: int32x4_t) -> int32x4_t;
     }
 vqabsq_s32_(a)
@@ -35837,6 +35837,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vrndn_f32() {
         let a: f32x2 = f32x2::new(-1.5, 0.5);
         let e: f32x2 = f32x2::new(-2.0, 0.0);
@@ -35846,6 +35847,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vrndnq_f32() {
         let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
         let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
@@ -40497,6 +40499,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vfma_f32() {
         let a: f32x2 = f32x2::new(8.0, 18.0);
         let b: f32x2 = f32x2::new(6.0, 4.0);
@@ -40508,6 +40511,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vfmaq_f32() {
         let a: f32x4 = f32x4::new(8.0, 18.0, 12.0, 10.0);
         let b: f32x4 = f32x4::new(6.0, 4.0, 7.0, 8.0);
@@ -40519,6 +40523,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vfma_n_f32() {
         let a: f32x2 = f32x2::new(2.0, 3.0);
         let b: f32x2 = f32x2::new(6.0, 4.0);
@@ -40530,6 +40535,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vfmaq_n_f32() {
         let a: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0);
         let b: f32x4 = f32x4::new(6.0, 4.0, 7.0, 8.0);
@@ -40541,6 +40547,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vfms_f32() {
         let a: f32x2 = f32x2::new(20.0, 30.0);
         let b: f32x2 = f32x2::new(6.0, 4.0);
@@ -40552,6 +40559,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vfmsq_f32() {
         let a: f32x4 = f32x4::new(20.0, 30.0, 40.0, 50.0);
         let b: f32x4 = f32x4::new(6.0, 4.0, 7.0, 8.0);
@@ -40563,6 +40571,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vfms_n_f32() {
         let a: f32x2 = f32x2::new(50.0, 35.0);
         let b: f32x2 = f32x2::new(6.0, 4.0);
@@ -40574,6 +40583,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vfmsq_n_f32() {
         let a: f32x4 = f32x4::new(50.0, 35.0, 60.0, 69.0);
         let b: f32x4 = f32x4::new(6.0, 4.0, 7.0, 8.0);
@@ -41346,6 +41356,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vmaxnm_f32() {
         let a: f32x2 = f32x2::new(1.0, 2.0);
         let b: f32x2 = f32x2::new(8.0, 16.0);
@@ -41356,6 +41367,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vmaxnmq_f32() {
         let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
         let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
@@ -41492,6 +41504,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vminnm_f32() {
         let a: f32x2 = f32x2::new(1.0, 2.0);
         let b: f32x2 = f32x2::new(8.0, 16.0);
@@ -41502,6 +41515,7 @@ mod test {
 
     #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
     #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
+    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
     unsafe fn test_vminnmq_f32() {
         let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
         let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs
index bbee29ae7af..bdf511ecf88 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs
@@ -7,7 +7,7 @@ use super::*;
 #[cfg(target_arch = "arm")]
 use crate::core_arch::arm::*;
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 use crate::core_arch::aarch64::*;
 
 use crate::core_arch::simd::*;
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
index 3ebaa4d5abe..fd686c34090 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -1091,187 +1091,289 @@ pub struct poly64x2x4_t(
 extern "unadjusted" {
     // absolute value (64-bit)
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i8")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v8i8")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.abs.v8i8"
+    )]
     fn vabs_s8_(a: int8x8_t) -> int8x8_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i16")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v4i16")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.abs.v4i16"
+    )]
     fn vabs_s16_(a: int16x4_t) -> int16x4_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v2i32")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v2i32")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.abs.v2i32"
+    )]
     fn vabs_s32_(a: int32x2_t) -> int32x2_t;
     // absolute value (128-bit)
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v16i8")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v16i8")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.abs.v16i8"
+    )]
     fn vabsq_s8_(a: int8x16_t) -> int8x16_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i16")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v8i16")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.abs.v8i16"
+    )]
     fn vabsq_s16_(a: int16x8_t) -> int16x8_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i32")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v4i32")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.abs.v4i32"
+    )]
     fn vabsq_s32_(a: int32x4_t) -> int32x4_t;
 
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v8i8")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.sminp.v8i8"
+    )]
     fn vpmins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v4i16")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.sminp.v4i16"
+    )]
     fn vpmins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v2i32")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.sminp.v2i32"
+    )]
     fn vpmins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v8i8")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.uminp.v8i8"
+    )]
     fn vpminu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v4i16")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.uminp.v4i16"
+    )]
     fn vpminu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v2i32")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.uminp.v2i32"
+    )]
     fn vpminu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminp.v2f32")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.fminp.v2f32"
+    )]
     fn vpminf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
 
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v8i8")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.smaxp.v8i8"
+    )]
     fn vpmaxs_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v4i16")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.smaxp.v4i16"
+    )]
     fn vpmaxs_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v2i32")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.smaxp.v2i32"
+    )]
     fn vpmaxs_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v8i8")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.umaxp.v8i8"
+    )]
     fn vpmaxu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v4i16")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.umaxp.v4i16"
+    )]
     fn vpmaxu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v2i32")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.umaxp.v2i32"
+    )]
     fn vpmaxu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.fmaxp.v2f32"
+    )]
     fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
 
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.raddhn.v8i8")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.raddhn.v8i8"
+    )]
     fn vraddhn_s16_(a: int16x8_t, b: int16x8_t) -> int8x8_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.raddhn.v4i16")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.raddhn.v4i16"
+    )]
     fn vraddhn_s32_(a: int32x4_t, b: int32x4_t) -> int16x4_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.raddhn.v2i32")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.raddhn.v2i32"
+    )]
     fn vraddhn_s64_(a: int64x2_t, b: int64x2_t) -> int32x2_t;
 
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v4i16")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.addp.v4i16"
+    )]
     fn vpadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v2i32")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.addp.v2i32"
+    )]
     fn vpadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v8i8")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.neon.addp.v8i8"
+    )]
     fn vpadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
 
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")]
     #[cfg_attr(
-        target_arch = "aarch64",
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8"
     )]
     pub(crate) fn vpaddl_s8_(a: int8x8_t) -> int16x4_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")]
     #[cfg_attr(
-        target_arch = "aarch64",
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16"
     )]
     pub(crate) fn vpaddl_s16_(a: int16x4_t) -> int32x2_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")]
     #[cfg_attr(
-        target_arch = "aarch64",
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32"
     )]
     pub(crate) fn vpaddl_s32_(a: int32x2_t) -> int64x1_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")]
     #[cfg_attr(
-        target_arch = "aarch64",
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8"
     )]
     pub(crate) fn vpaddlq_s8_(a: int8x16_t) -> int16x8_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")]
     #[cfg_attr(
-        target_arch = "aarch64",
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16"
     )]
     pub(crate) fn vpaddlq_s16_(a: int16x8_t) -> int32x4_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")]
     #[cfg_attr(
-        target_arch = "aarch64",
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32"
     )]
     pub(crate) fn vpaddlq_s32_(a: int32x4_t) -> int64x2_t;
 
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")]
     #[cfg_attr(
-        target_arch = "aarch64",
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8"
     )]
     pub(crate) fn vpaddl_u8_(a: uint8x8_t) -> uint16x4_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")]
     #[cfg_attr(
-        target_arch = "aarch64",
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16"
     )]
     pub(crate) fn vpaddl_u16_(a: uint16x4_t) -> uint32x2_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")]
     #[cfg_attr(
-        target_arch = "aarch64",
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32"
     )]
     pub(crate) fn vpaddl_u32_(a: uint32x2_t) -> uint64x1_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")]
     #[cfg_attr(
-        target_arch = "aarch64",
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8"
     )]
     pub(crate) fn vpaddlq_u8_(a: uint8x16_t) -> uint16x8_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")]
     #[cfg_attr(
-        target_arch = "aarch64",
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16"
     )]
     pub(crate) fn vpaddlq_u16_(a: uint16x8_t) -> uint32x4_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")]
     #[cfg_attr(
-        target_arch = "aarch64",
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32"
     )]
     pub(crate) fn vpaddlq_u32_(a: uint32x4_t) -> uint64x2_t;
 
     #[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v8i8")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctpop.v8i8")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.ctpop.v8i8"
+    )]
     fn vcnt_s8_(a: int8x8_t) -> int8x8_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v16i8")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctpop.v16i8")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.ctpop.v16i8"
+    )]
     fn vcntq_s8_(a: int8x16_t) -> int8x16_t;
 
     #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v8i8")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v8i8")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.ctlz.v8i8"
+    )]
     fn vclz_s8_(a: int8x8_t) -> int8x8_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v16i8")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v16i8")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.ctlz.v16i8"
+    )]
     fn vclzq_s8_(a: int8x16_t) -> int8x16_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v4i16")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v4i16")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.ctlz.v4i16"
+    )]
     fn vclz_s16_(a: int16x4_t) -> int16x4_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v8i16")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v8i16")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.ctlz.v8i16"
+    )]
     fn vclzq_s16_(a: int16x8_t) -> int16x8_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v2i32")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v2i32")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.ctlz.v2i32"
+    )]
     fn vclz_s32_(a: int32x2_t) -> int32x2_t;
     #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v4i32")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v4i32")]
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.ctlz.v4i32"
+    )]
     fn vclzq_s32_(a: int32x4_t) -> int32x4_t;
 }
 
@@ -1281,7 +1383,10 @@ extern "unadjusted" {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 7)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1301,7 +1406,10 @@ pub unsafe fn vld1_lane_s8<const LANE: i32>(ptr: *const i8, src: int8x8_t) -> in
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 15))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 15)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1321,7 +1429,10 @@ pub unsafe fn vld1q_lane_s8<const LANE: i32>(ptr: *const i8, src: int8x16_t) ->
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 3)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1341,7 +1452,10 @@ pub unsafe fn vld1_lane_s16<const LANE: i32>(ptr: *const i16, src: int16x4_t) ->
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 7)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1361,7 +1475,10 @@ pub unsafe fn vld1q_lane_s16<const LANE: i32>(ptr: *const i16, src: int16x8_t) -
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 1)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1381,7 +1498,10 @@ pub unsafe fn vld1_lane_s32<const LANE: i32>(ptr: *const i32, src: int32x2_t) ->
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 3)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1401,7 +1521,10 @@ pub unsafe fn vld1q_lane_s32<const LANE: i32>(ptr: *const i32, src: int32x4_t) -
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr", LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ldr, LANE = 0)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1421,7 +1544,10 @@ pub unsafe fn vld1_lane_s64<const LANE: i32>(ptr: *const i64, src: int64x1_t) ->
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 1)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1441,7 +1567,10 @@ pub unsafe fn vld1q_lane_s64<const LANE: i32>(ptr: *const i64, src: int64x2_t) -
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 7)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1461,7 +1590,10 @@ pub unsafe fn vld1_lane_u8<const LANE: i32>(ptr: *const u8, src: uint8x8_t) -> u
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 15))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 15)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1481,7 +1613,10 @@ pub unsafe fn vld1q_lane_u8<const LANE: i32>(ptr: *const u8, src: uint8x16_t) ->
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 3)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1501,7 +1636,10 @@ pub unsafe fn vld1_lane_u16<const LANE: i32>(ptr: *const u16, src: uint16x4_t) -
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 7)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1521,7 +1659,10 @@ pub unsafe fn vld1q_lane_u16<const LANE: i32>(ptr: *const u16, src: uint16x8_t)
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 1)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1541,7 +1682,10 @@ pub unsafe fn vld1_lane_u32<const LANE: i32>(ptr: *const u32, src: uint32x2_t) -
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 3)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1561,7 +1705,10 @@ pub unsafe fn vld1q_lane_u32<const LANE: i32>(ptr: *const u32, src: uint32x4_t)
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr", LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ldr, LANE = 0)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1581,7 +1728,10 @@ pub unsafe fn vld1_lane_u64<const LANE: i32>(ptr: *const u64, src: uint64x1_t) -
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 1)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1601,7 +1751,10 @@ pub unsafe fn vld1q_lane_u64<const LANE: i32>(ptr: *const u64, src: uint64x2_t)
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 7)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1621,7 +1774,10 @@ pub unsafe fn vld1_lane_p8<const LANE: i32>(ptr: *const p8, src: poly8x8_t) -> p
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 15))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 15)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1641,7 +1797,10 @@ pub unsafe fn vld1q_lane_p8<const LANE: i32>(ptr: *const p8, src: poly8x16_t) ->
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 3)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1661,7 +1820,10 @@ pub unsafe fn vld1_lane_p16<const LANE: i32>(ptr: *const p16, src: poly16x4_t) -
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 7)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1683,7 +1845,10 @@ pub unsafe fn vld1q_lane_p16<const LANE: i32>(ptr: *const p16, src: poly16x8_t)
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr", LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ldr, LANE = 0)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1705,7 +1870,10 @@ pub unsafe fn vld1_lane_p64<const LANE: i32>(ptr: *const p64, src: poly64x1_t) -
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 1)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1725,7 +1893,10 @@ pub unsafe fn vld1q_lane_p64<const LANE: i32>(ptr: *const p64, src: poly64x2_t)
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 1)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1745,7 +1916,10 @@ pub unsafe fn vld1_lane_f32<const LANE: i32>(ptr: *const f32, src: float32x2_t)
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 3)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1764,7 +1938,10 @@ pub unsafe fn vld1q_lane_f32<const LANE: i32>(ptr: *const f32, src: float32x4_t)
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1783,7 +1960,10 @@ pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1802,7 +1982,10 @@ pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1821,7 +2004,10 @@ pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1840,7 +2026,10 @@ pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1859,7 +2048,10 @@ pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1878,7 +2070,10 @@ pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ldr)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1888,7 +2083,7 @@ pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t {
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         crate::core_arch::aarch64::vld1_s64(ptr)
     }
@@ -1903,7 +2098,10 @@ pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1922,7 +2120,10 @@ pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1941,7 +2142,10 @@ pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1960,7 +2164,10 @@ pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1979,7 +2186,10 @@ pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -1998,7 +2208,10 @@ pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2017,7 +2230,10 @@ pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2036,7 +2252,10 @@ pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ldr)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2046,7 +2265,7 @@ pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t {
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         crate::core_arch::aarch64::vld1_u64(ptr)
     }
@@ -2061,7 +2280,10 @@ pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2080,7 +2302,10 @@ pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2099,7 +2324,10 @@ pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2118,7 +2346,10 @@ pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2137,7 +2368,10 @@ pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2156,7 +2390,10 @@ pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2177,7 +2414,10 @@ pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ldr)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2187,7 +2427,7 @@ pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t {
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         crate::core_arch::aarch64::vld1_p64(ptr)
     }
@@ -2204,7 +2444,10 @@ pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t {
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2223,7 +2466,10 @@ pub unsafe fn vld1q_dup_p64(ptr: *const p64) -> poly64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2242,7 +2488,10 @@ pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("saba"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("saba")
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2258,7 +2507,10 @@ pub unsafe fn vaba_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("saba"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("saba")
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2274,7 +2526,10 @@ pub unsafe fn vaba_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("saba"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("saba")
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2290,7 +2545,10 @@ pub unsafe fn vaba_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("uaba"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("uaba")
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2306,7 +2564,10 @@ pub unsafe fn vaba_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("uaba"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("uaba")
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2322,7 +2583,10 @@ pub unsafe fn vaba_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("uaba"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("uaba")
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2339,7 +2603,10 @@ pub unsafe fn vaba_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("saba"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("saba")
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2355,7 +2622,10 @@ pub unsafe fn vabaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("saba"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("saba")
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2371,7 +2641,10 @@ pub unsafe fn vabaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("saba"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("saba")
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2387,7 +2660,10 @@ pub unsafe fn vabaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("uaba"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("uaba")
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2403,7 +2679,10 @@ pub unsafe fn vabaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("uaba"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("uaba")
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2419,7 +2698,10 @@ pub unsafe fn vabaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("uaba"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("uaba")
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2437,7 +2719,10 @@ pub unsafe fn vabaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(abs)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2454,7 +2739,10 @@ pub unsafe fn vabs_s8(a: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(abs)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2471,7 +2759,10 @@ pub unsafe fn vabs_s16(a: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(abs)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2488,7 +2779,10 @@ pub unsafe fn vabs_s32(a: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(abs)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2505,7 +2799,10 @@ pub unsafe fn vabsq_s8(a: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(abs)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2522,7 +2819,10 @@ pub unsafe fn vabsq_s16(a: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(abs)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2540,7 +2840,10 @@ pub unsafe fn vabsq_s32(a: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2557,7 +2860,10 @@ pub unsafe fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2574,7 +2880,10 @@ pub unsafe fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2591,7 +2900,10 @@ pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2608,7 +2920,10 @@ pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2625,7 +2940,10 @@ pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2643,7 +2961,10 @@ pub unsafe fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2661,7 +2982,10 @@ pub unsafe fn vadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2679,7 +3003,10 @@ pub unsafe fn vaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2697,7 +3024,10 @@ pub unsafe fn vadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2715,7 +3045,10 @@ pub unsafe fn vaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2733,7 +3066,10 @@ pub unsafe fn vadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2751,7 +3087,10 @@ pub unsafe fn vaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2769,7 +3108,10 @@ pub unsafe fn vaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2787,7 +3129,10 @@ pub unsafe fn vadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2805,7 +3150,10 @@ pub unsafe fn vaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2823,7 +3171,10 @@ pub unsafe fn vadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2841,7 +3192,10 @@ pub unsafe fn vaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2859,7 +3213,10 @@ pub unsafe fn vadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2877,7 +3234,10 @@ pub unsafe fn vaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(add)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2895,7 +3255,10 @@ pub unsafe fn vaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fadd))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fadd)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2913,7 +3276,10 @@ pub unsafe fn vadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fadd))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fadd)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2931,7 +3297,10 @@ pub unsafe fn vaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2951,7 +3320,10 @@ pub unsafe fn vaddl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2971,7 +3343,10 @@ pub unsafe fn vaddl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -2991,7 +3366,10 @@ pub unsafe fn vaddl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3011,7 +3389,10 @@ pub unsafe fn vaddl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3031,7 +3412,10 @@ pub unsafe fn vaddl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3051,7 +3435,10 @@ pub unsafe fn vaddl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddl2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3073,7 +3460,10 @@ pub unsafe fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddl2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3095,7 +3485,10 @@ pub unsafe fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddl2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3117,7 +3510,10 @@ pub unsafe fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddl2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3139,7 +3535,10 @@ pub unsafe fn vaddl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddl2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3161,7 +3560,10 @@ pub unsafe fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddl2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3183,7 +3585,10 @@ pub unsafe fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddw)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3202,7 +3607,10 @@ pub unsafe fn vaddw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddw)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3221,7 +3629,10 @@ pub unsafe fn vaddw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddw)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3240,7 +3651,10 @@ pub unsafe fn vaddw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddw)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3259,7 +3673,10 @@ pub unsafe fn vaddw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddw)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3278,7 +3695,10 @@ pub unsafe fn vaddw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddw)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3297,7 +3717,10 @@ pub unsafe fn vaddw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddw2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3317,7 +3740,10 @@ pub unsafe fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddw2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3337,7 +3763,10 @@ pub unsafe fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddw2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3357,7 +3786,10 @@ pub unsafe fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddw2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3377,7 +3809,10 @@ pub unsafe fn vaddw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddw2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3397,7 +3832,10 @@ pub unsafe fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddw2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3417,7 +3855,10 @@ pub unsafe fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addhn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3435,7 +3876,10 @@ pub unsafe fn vaddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addhn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3453,7 +3897,10 @@ pub unsafe fn vaddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addhn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3471,7 +3918,10 @@ pub unsafe fn vaddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addhn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3489,7 +3939,10 @@ pub unsafe fn vaddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addhn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3507,7 +3960,10 @@ pub unsafe fn vaddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addhn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3525,7 +3981,10 @@ pub unsafe fn vaddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addhn2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3544,7 +4003,10 @@ pub unsafe fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x1
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addhn2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3563,7 +4025,10 @@ pub unsafe fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addhn2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3582,7 +4047,10 @@ pub unsafe fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addhn2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3601,7 +4069,10 @@ pub unsafe fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uin
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addhn2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3620,7 +4091,10 @@ pub unsafe fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> ui
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addhn2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3639,7 +4113,10 @@ pub unsafe fn vaddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> ui
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i16))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3657,7 +4134,10 @@ pub unsafe fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i32))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3675,7 +4155,10 @@ pub unsafe fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i64))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3693,7 +4176,10 @@ pub unsafe fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i16))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3711,7 +4197,10 @@ pub unsafe fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i32))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3729,7 +4218,10 @@ pub unsafe fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i64))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3747,7 +4239,10 @@ pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i16))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3766,7 +4261,10 @@ pub unsafe fn vraddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i32))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3785,7 +4283,10 @@ pub unsafe fn vraddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int1
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i64))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3804,7 +4305,10 @@ pub unsafe fn vraddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int3
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i16))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3823,7 +4327,10 @@ pub unsafe fn vraddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> ui
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i32))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3842,7 +4349,10 @@ pub unsafe fn vraddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i64))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn2)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3861,7 +4371,10 @@ pub unsafe fn vraddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> u
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddlp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddlp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3879,7 +4392,10 @@ pub unsafe fn vpaddl_s8(a: int8x8_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s16))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddlp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddlp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3897,7 +4413,10 @@ pub unsafe fn vpaddl_s16(a: int16x4_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s32))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddlp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddlp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3915,7 +4434,10 @@ pub unsafe fn vpaddl_s32(a: int32x2_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddlp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddlp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3933,7 +4455,10 @@ pub unsafe fn vpaddlq_s8(a: int8x16_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s16))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddlp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddlp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3951,7 +4476,10 @@ pub unsafe fn vpaddlq_s16(a: int16x8_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s32))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddlp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddlp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3969,7 +4497,10 @@ pub unsafe fn vpaddlq_s32(a: int32x4_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddlp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddlp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -3987,7 +4518,10 @@ pub unsafe fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u16))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddlp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddlp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4005,7 +4539,10 @@ pub unsafe fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u32))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddlp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddlp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4023,7 +4560,10 @@ pub unsafe fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddlp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddlp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4041,7 +4581,10 @@ pub unsafe fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u16))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddlp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddlp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4059,7 +4602,10 @@ pub unsafe fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u32))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddlp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddlp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4077,7 +4623,10 @@ pub unsafe fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(xtn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4095,7 +4644,10 @@ pub unsafe fn vmovn_s16(a: int16x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(xtn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4113,7 +4665,10 @@ pub unsafe fn vmovn_s32(a: int32x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(xtn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4131,7 +4686,10 @@ pub unsafe fn vmovn_s64(a: int64x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(xtn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4149,7 +4707,10 @@ pub unsafe fn vmovn_u16(a: uint16x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(xtn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4167,7 +4728,10 @@ pub unsafe fn vmovn_u32(a: uint32x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(xtn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4185,7 +4749,10 @@ pub unsafe fn vmovn_u64(a: uint64x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sxtl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4203,7 +4770,10 @@ pub unsafe fn vmovl_s8(a: int8x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sxtl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4221,7 +4791,10 @@ pub unsafe fn vmovl_s16(a: int16x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sxtl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4239,7 +4812,10 @@ pub unsafe fn vmovl_s32(a: int32x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uxtl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4257,7 +4833,10 @@ pub unsafe fn vmovl_u8(a: uint8x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uxtl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4275,7 +4854,10 @@ pub unsafe fn vmovl_u16(a: uint16x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uxtl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4293,7 +4875,10 @@ pub unsafe fn vmovl_u32(a: uint32x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4312,7 +4897,10 @@ pub unsafe fn vmvn_s8(a: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4333,7 +4921,10 @@ pub unsafe fn vmvnq_s8(a: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4352,7 +4943,10 @@ pub unsafe fn vmvn_s16(a: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4371,7 +4965,10 @@ pub unsafe fn vmvnq_s16(a: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4390,7 +4987,10 @@ pub unsafe fn vmvn_s32(a: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4409,7 +5009,10 @@ pub unsafe fn vmvnq_s32(a: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4428,7 +5031,10 @@ pub unsafe fn vmvn_u8(a: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4449,7 +5055,10 @@ pub unsafe fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4468,7 +5077,10 @@ pub unsafe fn vmvn_u16(a: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4489,7 +5101,10 @@ pub unsafe fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4508,7 +5123,10 @@ pub unsafe fn vmvn_u32(a: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4527,7 +5145,10 @@ pub unsafe fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4546,7 +5167,10 @@ pub unsafe fn vmvn_p8(a: poly8x8_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mvn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4567,7 +5191,10 @@ pub unsafe fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4586,7 +5213,10 @@ pub unsafe fn vbic_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4607,7 +5237,10 @@ pub unsafe fn vbicq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4626,7 +5259,10 @@ pub unsafe fn vbic_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4645,7 +5281,10 @@ pub unsafe fn vbicq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4664,7 +5303,10 @@ pub unsafe fn vbic_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4683,7 +5325,10 @@ pub unsafe fn vbicq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4702,7 +5347,10 @@ pub unsafe fn vbic_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4721,7 +5369,10 @@ pub unsafe fn vbicq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4740,7 +5391,10 @@ pub unsafe fn vbic_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4761,7 +5415,10 @@ pub unsafe fn vbicq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4780,7 +5437,10 @@ pub unsafe fn vbic_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4799,7 +5459,10 @@ pub unsafe fn vbicq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4818,7 +5481,10 @@ pub unsafe fn vbic_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4837,7 +5503,10 @@ pub unsafe fn vbicq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4856,7 +5525,10 @@ pub unsafe fn vbic_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bic)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4879,7 +5551,10 @@ pub unsafe fn vbicq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4901,7 +5576,10 @@ pub unsafe fn vbsl_s8(a: uint8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4923,7 +5601,10 @@ pub unsafe fn vbsl_s16(a: uint16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4945,7 +5626,10 @@ pub unsafe fn vbsl_s32(a: uint32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4967,7 +5651,10 @@ pub unsafe fn vbsl_s64(a: uint64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4986,7 +5673,10 @@ pub unsafe fn vbsl_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5005,7 +5695,10 @@ pub unsafe fn vbsl_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5024,7 +5717,10 @@ pub unsafe fn vbsl_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5043,7 +5739,10 @@ pub unsafe fn vbsl_u64(a: uint64x1_t, b: uint64x1_t, c: uint64x1_t) -> uint64x1_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5065,7 +5764,10 @@ pub unsafe fn vbsl_f32(a: uint32x2_t, b: float32x2_t, c: float32x2_t) -> float32
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5087,7 +5789,10 @@ pub unsafe fn vbsl_p8(a: uint8x8_t, b: poly8x8_t, c: poly8x8_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5109,7 +5814,10 @@ pub unsafe fn vbsl_p16(a: uint16x4_t, b: poly16x4_t, c: poly16x4_t) -> poly16x4_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5133,7 +5841,10 @@ pub unsafe fn vbslq_s8(a: uint8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5155,7 +5866,10 @@ pub unsafe fn vbslq_s16(a: uint16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5177,7 +5891,10 @@ pub unsafe fn vbslq_s32(a: uint32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5199,7 +5916,10 @@ pub unsafe fn vbslq_s64(a: uint64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5220,7 +5940,10 @@ pub unsafe fn vbslq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5239,7 +5962,10 @@ pub unsafe fn vbslq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5258,7 +5984,10 @@ pub unsafe fn vbslq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5277,7 +6006,10 @@ pub unsafe fn vbslq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5301,7 +6033,10 @@ pub unsafe fn vbslq_p8(a: uint8x16_t, b: poly8x16_t, c: poly8x16_t) -> poly8x16_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5323,7 +6058,10 @@ pub unsafe fn vbslq_p16(a: uint16x8_t, b: poly16x8_t, c: poly16x8_t) -> poly16x8
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(bsl)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5345,7 +6083,10 @@ pub unsafe fn vbslq_f32(a: uint32x4_t, b: float32x4_t, c: float32x4_t) -> float3
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5364,7 +6105,10 @@ pub unsafe fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5385,7 +6129,10 @@ pub unsafe fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5404,7 +6151,10 @@ pub unsafe fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5423,7 +6173,10 @@ pub unsafe fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5442,7 +6195,10 @@ pub unsafe fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5461,7 +6217,10 @@ pub unsafe fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5480,7 +6239,10 @@ pub unsafe fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5499,7 +6261,10 @@ pub unsafe fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5518,7 +6283,10 @@ pub unsafe fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5539,7 +6307,10 @@ pub unsafe fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5558,7 +6329,10 @@ pub unsafe fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5577,7 +6351,10 @@ pub unsafe fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5596,7 +6373,10 @@ pub unsafe fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5615,7 +6395,10 @@ pub unsafe fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5634,7 +6417,10 @@ pub unsafe fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5653,7 +6439,10 @@ pub unsafe fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sminp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5671,7 +6460,10 @@ pub unsafe fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sminp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5689,7 +6481,10 @@ pub unsafe fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sminp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5707,7 +6502,10 @@ pub unsafe fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uminp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5725,7 +6523,10 @@ pub unsafe fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uminp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5743,7 +6544,10 @@ pub unsafe fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uminp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5761,7 +6565,10 @@ pub unsafe fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fminp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5779,7 +6586,10 @@ pub unsafe fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smaxp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5797,7 +6607,10 @@ pub unsafe fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smaxp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5815,7 +6628,10 @@ pub unsafe fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smaxp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5833,7 +6649,10 @@ pub unsafe fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umaxp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5851,7 +6670,10 @@ pub unsafe fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umaxp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5869,7 +6691,10 @@ pub unsafe fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umaxp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5887,7 +6712,10 @@ pub unsafe fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmaxp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6361,7 +7189,10 @@ pub unsafe fn vgetq_lane_p8<const IMM5: i32>(v: poly8x16_t) -> p8 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6379,7 +7210,10 @@ pub unsafe fn vget_high_s8(a: int8x16_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6397,7 +7231,10 @@ pub unsafe fn vget_high_s16(a: int16x8_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6415,7 +7252,10 @@ pub unsafe fn vget_high_s32(a: int32x4_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6433,7 +7273,10 @@ pub unsafe fn vget_high_s64(a: int64x2_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6451,7 +7294,10 @@ pub unsafe fn vget_high_u8(a: uint8x16_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6469,7 +7315,10 @@ pub unsafe fn vget_high_u16(a: uint16x8_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6487,7 +7336,10 @@ pub unsafe fn vget_high_u32(a: uint32x4_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6505,7 +7357,10 @@ pub unsafe fn vget_high_u64(a: uint64x2_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6523,7 +7378,10 @@ pub unsafe fn vget_high_p8(a: poly8x16_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6541,7 +7399,10 @@ pub unsafe fn vget_high_p16(a: poly16x8_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6746,7 +7607,10 @@ pub unsafe fn vget_low_f32(a: float32x4_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6767,7 +7631,10 @@ pub unsafe fn vdupq_n_s8(value: i8) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6785,7 +7652,10 @@ pub unsafe fn vdupq_n_s16(value: i16) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6803,7 +7673,10 @@ pub unsafe fn vdupq_n_s32(value: i32) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6821,7 +7694,10 @@ pub unsafe fn vdupq_n_s64(value: i64) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6842,7 +7718,10 @@ pub unsafe fn vdupq_n_u8(value: u8) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6860,7 +7739,10 @@ pub unsafe fn vdupq_n_u16(value: u16) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6878,7 +7760,10 @@ pub unsafe fn vdupq_n_u32(value: u32) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6896,7 +7781,10 @@ pub unsafe fn vdupq_n_u64(value: u64) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6917,7 +7805,10 @@ pub unsafe fn vdupq_n_p8(value: p8) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6935,7 +7826,10 @@ pub unsafe fn vdupq_n_p16(value: p16) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6956,7 +7850,10 @@ pub unsafe fn vdupq_n_f32(value: f32) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6970,7 +7867,10 @@ unsafe fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6988,7 +7888,10 @@ pub unsafe fn vdup_n_s8(value: i8) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7006,7 +7909,10 @@ pub unsafe fn vdup_n_s16(value: i16) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7024,7 +7930,10 @@ pub unsafe fn vdup_n_s32(value: i32) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmov)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7042,7 +7951,10 @@ pub unsafe fn vdup_n_s64(value: i64) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7060,7 +7972,10 @@ pub unsafe fn vdup_n_u8(value: u8) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7078,7 +7993,10 @@ pub unsafe fn vdup_n_u16(value: u16) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7096,7 +8014,10 @@ pub unsafe fn vdup_n_u32(value: u32) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmov)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7114,7 +8035,10 @@ pub unsafe fn vdup_n_u64(value: u64) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7132,7 +8056,10 @@ pub unsafe fn vdup_n_p8(value: p8) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7150,7 +8077,10 @@ pub unsafe fn vdup_n_p16(value: p16) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7171,7 +8101,10 @@ pub unsafe fn vdup_n_f32(value: f32) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7185,7 +8118,10 @@ unsafe fn vdup_n_f32_vfp4(value: f32) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7203,7 +8139,10 @@ pub unsafe fn vldrq_p128(a: *const p128) -> p128 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7221,7 +8160,10 @@ pub unsafe fn vstrq_p128(a: *mut p128, b: p128) {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7239,7 +8181,10 @@ pub unsafe fn vmov_n_s8(value: i8) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7257,7 +8202,10 @@ pub unsafe fn vmov_n_s16(value: i16) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7275,7 +8223,10 @@ pub unsafe fn vmov_n_s32(value: i32) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmov)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7293,7 +8244,10 @@ pub unsafe fn vmov_n_s64(value: i64) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7311,7 +8265,10 @@ pub unsafe fn vmov_n_u8(value: u8) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7329,7 +8286,10 @@ pub unsafe fn vmov_n_u16(value: u16) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7347,7 +8307,10 @@ pub unsafe fn vmov_n_u32(value: u32) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmov)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7365,7 +8328,10 @@ pub unsafe fn vmov_n_u64(value: u64) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7383,7 +8349,10 @@ pub unsafe fn vmov_n_p8(value: p8) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7401,7 +8370,10 @@ pub unsafe fn vmov_n_p16(value: p16) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7419,7 +8391,10 @@ pub unsafe fn vmov_n_f32(value: f32) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7437,7 +8412,10 @@ pub unsafe fn vmovq_n_s8(value: i8) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7455,7 +8433,10 @@ pub unsafe fn vmovq_n_s16(value: i16) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7473,7 +8454,10 @@ pub unsafe fn vmovq_n_s32(value: i32) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7491,7 +8475,10 @@ pub unsafe fn vmovq_n_s64(value: i64) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7509,7 +8496,10 @@ pub unsafe fn vmovq_n_u8(value: u8) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7527,7 +8517,10 @@ pub unsafe fn vmovq_n_u16(value: u16) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7545,7 +8538,10 @@ pub unsafe fn vmovq_n_u32(value: u32) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7563,7 +8559,10 @@ pub unsafe fn vmovq_n_u64(value: u64) -> uint64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7581,7 +8580,10 @@ pub unsafe fn vmovq_n_p8(value: p8) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7599,7 +8601,10 @@ pub unsafe fn vmovq_n_p16(value: p16) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7617,7 +8622,10 @@ pub unsafe fn vmovq_n_f32(value: f32) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", N = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("nop", N = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("nop", N = 0)
+)]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7637,7 +8645,10 @@ pub unsafe fn vext_s64<const N: i32>(a: int64x1_t, _b: int64x1_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", N = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("nop", N = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr("nop", N = 0)
+)]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7657,7 +8668,10 @@ pub unsafe fn vext_u64<const N: i32>(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cnt)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7674,7 +8688,10 @@ pub unsafe fn vcnt_s8(a: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cnt)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7691,7 +8708,10 @@ pub unsafe fn vcntq_s8(a: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cnt)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7708,7 +8728,10 @@ pub unsafe fn vcnt_u8(a: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cnt)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7725,7 +8748,10 @@ pub unsafe fn vcntq_u8(a: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cnt)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7742,7 +8768,10 @@ pub unsafe fn vcnt_p8(a: poly8x8_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cnt)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7760,7 +8789,10 @@ pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev16)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7778,7 +8810,10 @@ pub unsafe fn vrev16_s8(a: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev16)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7796,7 +8831,10 @@ pub unsafe fn vrev16q_s8(a: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev16)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7814,7 +8852,10 @@ pub unsafe fn vrev16_u8(a: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev16)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7832,7 +8873,10 @@ pub unsafe fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev16)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7850,7 +8894,10 @@ pub unsafe fn vrev16_p8(a: poly8x8_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev16)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7868,7 +8915,10 @@ pub unsafe fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev32)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7886,7 +8936,10 @@ pub unsafe fn vrev32_s8(a: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev32)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7904,7 +8957,10 @@ pub unsafe fn vrev32q_s8(a: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev32)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7922,7 +8978,10 @@ pub unsafe fn vrev32_u8(a: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev32)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7940,7 +8999,10 @@ pub unsafe fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev32)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7958,7 +9020,10 @@ pub unsafe fn vrev32_s16(a: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev32)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7976,7 +9041,10 @@ pub unsafe fn vrev32q_s16(a: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev32)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7994,7 +9062,10 @@ pub unsafe fn vrev32_p16(a: poly16x4_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev32)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8012,7 +9083,10 @@ pub unsafe fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev32)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8030,7 +9104,10 @@ pub unsafe fn vrev32_u16(a: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev32)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8048,7 +9125,10 @@ pub unsafe fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev32)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8066,7 +9146,10 @@ pub unsafe fn vrev32_p8(a: poly8x8_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev32)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8084,7 +9167,10 @@ pub unsafe fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8102,7 +9188,10 @@ pub unsafe fn vrev64_s8(a: int8x8_t) -> int8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8120,7 +9209,10 @@ pub unsafe fn vrev64q_s8(a: int8x16_t) -> int8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8138,7 +9230,10 @@ pub unsafe fn vrev64_s16(a: int16x4_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8156,7 +9251,10 @@ pub unsafe fn vrev64q_s16(a: int16x8_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8174,7 +9272,10 @@ pub unsafe fn vrev64_s32(a: int32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8192,7 +9293,10 @@ pub unsafe fn vrev64q_s32(a: int32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8210,7 +9314,10 @@ pub unsafe fn vrev64_u8(a: uint8x8_t) -> uint8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8228,7 +9335,10 @@ pub unsafe fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8246,7 +9356,10 @@ pub unsafe fn vrev64_u16(a: uint16x4_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8264,7 +9377,10 @@ pub unsafe fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8282,7 +9398,10 @@ pub unsafe fn vrev64_u32(a: uint32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8300,7 +9419,10 @@ pub unsafe fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8318,7 +9440,10 @@ pub unsafe fn vrev64_f32(a: float32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8336,7 +9461,10 @@ pub unsafe fn vrev64q_f32(a: float32x4_t) -> float32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8354,7 +9482,10 @@ pub unsafe fn vrev64_p8(a: poly8x8_t) -> poly8x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8372,7 +9503,10 @@ pub unsafe fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8390,7 +9524,10 @@ pub unsafe fn vrev64_p16(a: poly16x4_t) -> poly16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rev64)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8408,7 +9545,10 @@ pub unsafe fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sadalp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8422,7 +9562,7 @@ pub unsafe fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t {
     {
         crate::core_arch::arm::neon::vpadal_s8_(a, b)
     }
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         simd_add(vpaddl_s8_(b), a)
     }
@@ -8433,7 +9573,10 @@ pub unsafe fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sadalp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8447,7 +9590,7 @@ pub unsafe fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t {
     {
         crate::core_arch::arm::neon::vpadal_s16_(a, b)
     }
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         simd_add(vpaddl_s16_(b), a)
     }
@@ -8458,7 +9601,10 @@ pub unsafe fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sadalp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8472,7 +9618,7 @@ pub unsafe fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t {
     {
         crate::core_arch::arm::neon::vpadal_s32_(a, b)
     }
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         simd_add(vpaddl_s32_(b), a)
     }
@@ -8483,7 +9629,10 @@ pub unsafe fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sadalp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8497,7 +9646,7 @@ pub unsafe fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
     {
         crate::core_arch::arm::neon::vpadalq_s8_(a, b)
     }
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         simd_add(vpaddlq_s8_(b), a)
     }
@@ -8508,7 +9657,10 @@ pub unsafe fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sadalp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8522,7 +9674,7 @@ pub unsafe fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
     {
         crate::core_arch::arm::neon::vpadalq_s16_(a, b)
     }
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         simd_add(vpaddlq_s16_(b), a)
     }
@@ -8533,7 +9685,10 @@ pub unsafe fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sadalp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8547,7 +9702,7 @@ pub unsafe fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
     {
         crate::core_arch::arm::neon::vpadalq_s32_(a, b)
     }
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         simd_add(vpaddlq_s32_(b), a)
     }
@@ -8558,7 +9713,10 @@ pub unsafe fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uadalp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8572,7 +9730,7 @@ pub unsafe fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t {
     {
         crate::core_arch::arm::neon::vpadal_u8_(a, b)
     }
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         simd_add(vpaddl_u8_(b), a)
     }
@@ -8583,7 +9741,10 @@ pub unsafe fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uadalp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8597,7 +9758,7 @@ pub unsafe fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t {
     {
         crate::core_arch::arm::neon::vpadal_u16_(a, b)
     }
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         simd_add(vpaddl_u16_(b), a)
     }
@@ -8608,7 +9769,10 @@ pub unsafe fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uadalp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8622,7 +9786,7 @@ pub unsafe fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t {
     {
         crate::core_arch::arm::neon::vpadal_u32_(a, b)
     }
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         simd_add(vpaddl_u32_(b), a)
     }
@@ -8633,7 +9797,10 @@ pub unsafe fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uadalp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8647,7 +9814,7 @@ pub unsafe fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
     {
         crate::core_arch::arm::neon::vpadalq_u8_(a, b)
     }
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         simd_add(vpaddlq_u8_(b), a)
     }
@@ -8658,7 +9825,10 @@ pub unsafe fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uadalp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8672,7 +9842,7 @@ pub unsafe fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
     {
         crate::core_arch::arm::neon::vpadalq_u16_(a, b)
     }
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         simd_add(vpaddlq_u16_(b), a)
     }
@@ -8683,7 +9853,10 @@ pub unsafe fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uadalp)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8697,7 +9870,7 @@ pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
     {
         crate::core_arch::arm::neon::vpadalq_u32_(a, b)
     }
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     {
         simd_add(vpaddlq_u32_(b), a)
     }
@@ -8708,7 +9881,10 @@ pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smmla))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smmla)
+)]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
@@ -8722,7 +9898,7 @@ pub unsafe fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")]
         #[cfg_attr(
-            target_arch = "aarch64",
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8"
         )]
         fn vmmlaq_s32_(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
@@ -8735,7 +9911,10 @@ pub unsafe fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ummla))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ummla)
+)]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
@@ -8749,7 +9928,7 @@ pub unsafe fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")]
         #[cfg_attr(
-            target_arch = "aarch64",
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8"
         )]
         fn vmmlaq_u32_(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
@@ -8762,7 +9941,10 @@ pub unsafe fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usmmla))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usmmla)
+)]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
@@ -8776,7 +9958,7 @@ pub unsafe fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usmmla.v4i32.v16i8")]
         #[cfg_attr(
-            target_arch = "aarch64",
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.usmmla.v4i32.v16i8"
         )]
         fn vusmmlaq_s32_(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t;
@@ -8968,7 +10150,10 @@ pub unsafe fn vcombine_u16(low: uint16x4_t, high: uint16x4_t) -> uint16x8_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mov)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -9018,7 +10203,7 @@ pub unsafe fn vcombine_p64(low: poly64x1_t, high: poly64x1_t) -> poly64x2_t {
 #[cfg(test)]
 mod tests {
     use super::*;
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     use crate::core_arch::aarch64::*;
     #[cfg(target_arch = "arm")]
     use crate::core_arch::arm::*;
@@ -14624,7 +15809,7 @@ mod tests {
     test_vcombine!(test_vcombine_s64 => vcombine_s64([-3_i64], [13_i64]));
     test_vcombine!(test_vcombine_u64 => vcombine_u64([3_u64], [13_u64]));
     test_vcombine!(test_vcombine_p64 => vcombine_p64([3_u64], [13_u64]));
-    #[cfg(target_arch = "aarch64")]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
     test_vcombine!(test_vcombine_f64 => vcombine_f64([-3_f64], [13_f64]));
 }
 
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs
index 54bffa45047..04ebe8ab3b2 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs
@@ -4,7 +4,7 @@
 
 use super::*;
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 use crate::core_arch::aarch64::*;
 
 #[cfg(target_arch = "arm")]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs
index cad660e877d..6b5d4a19ad5 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs
@@ -7,7 +7,7 @@ use super::*;
 #[cfg(target_arch = "arm")]
 use crate::core_arch::arm::*;
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 use crate::core_arch::aarch64::*;
 
 use crate::core_arch::simd::*;
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
index 15aa2f2695d..83062068950 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
@@ -4,7 +4,7 @@
 
 use super::*;
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 use crate::core_arch::aarch64::*;
 
 #[cfg(target_arch = "arm")]
@@ -309,7 +309,7 @@ test_vtbx!(
 
 // Aarch64 tests
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl1_s8 => vqtbl1_s8:
     - table[int8x16_t]: [
@@ -320,7 +320,7 @@ test_vtbl!(
     - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, 19, 7, 18] => [68_i8, -117, 0, -84, 102, 0, 119, 0]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl1q_s8 => vqtbl1q_s8:
     - table[int8x16_t]: [
@@ -331,7 +331,7 @@ test_vtbl!(
         => [0_i8, -121, -17, -72, 34, -116, 51, -104, 68, -117, 0, -84, 102, 0, 119, 0]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl1_u8 => vqtbl1_u8:
     - table[uint8x16_t]: [
@@ -342,7 +342,7 @@ test_vtbl!(
     - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 0, 84, 102, 0, 119, 0]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl1q_u8 => vqtbl1q_u8:
     - table[uint8x16_t]: [
@@ -353,7 +353,7 @@ test_vtbl!(
         => [0_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 0, 84, 102, 0, 119, 0]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl1_p8 => vqtbl1_p8:
     - table[poly8x16_t]: [
@@ -364,7 +364,7 @@ test_vtbl!(
     - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 0, 84, 102, 0, 119, 0]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl1q_p8 => vqtbl1q_p8:
     - table[poly8x16_t]: [
@@ -375,7 +375,7 @@ test_vtbl!(
         => [0_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 0, 84, 102, 0, 119, 0]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl2_s8 => vqtbl2_s8:
     - table[int8x16x2_t]: [
@@ -388,7 +388,7 @@ test_vtbl!(
     - ctrl[i8x8]: [4_i8, 31, 32, 10, 6, 49, 7, 18] => [4_i8, -31, 0, 10, 6, 0, -7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl2q_s8 => vqtbl2q_s8:
     - table[int8x16x2_t]: [
@@ -401,7 +401,7 @@ test_vtbl!(
         => [0_i8, -15, -1, 24, 2, -13, -3, -29, 4, -31, 0, 10, 6, 0, -7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl2_u8 => vqtbl2_u8:
     - table[uint8x16x2_t]: [
@@ -414,7 +414,7 @@ test_vtbl!(
     - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 0, 10, 6, 0, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl2q_u8 => vqtbl2q_u8:
     - table[uint8x16x2_t]: [
@@ -427,7 +427,7 @@ test_vtbl!(
         => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 0, 10, 6, 0, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl2_p8 => vqtbl2_p8:
     - table[poly8x16x2_t]: [
@@ -440,7 +440,7 @@ test_vtbl!(
     - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 0, 10, 6, 0, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl2q_p8 => vqtbl2q_p8:
     - table[poly8x16x2_t]: [
@@ -453,7 +453,7 @@ test_vtbl!(
         => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 0, 10, 6, 0, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl3_s8 => vqtbl3_s8:
     - table[int8x16x3_t]: [
@@ -468,7 +468,7 @@ test_vtbl!(
     - ctrl[i8x8]: [4_i8, 32, 46, 51, 6, 49, 7, 18] => [4_i8, 32, 46, 0, 6, 0, -7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl3q_s8 => vqtbl3q_s8:
     - table[int8x16x3_t]: [
@@ -483,7 +483,7 @@ test_vtbl!(
         => [0_i8, -15, -1, 24, 2, -13, -3, -29, 4, 32, 46, 0, 6, 0, -7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl3_u8 => vqtbl3_u8:
     - table[uint8x16x3_t]: [
@@ -498,7 +498,7 @@ test_vtbl!(
     - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 0, 6, 0, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl3q_u8 => vqtbl3q_u8:
     - table[uint8x16x3_t]: [
@@ -513,7 +513,7 @@ test_vtbl!(
         => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 0, 6, 0, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl3_p8 => vqtbl3_p8:
     - table[poly8x16x3_t]: [
@@ -528,7 +528,7 @@ test_vtbl!(
     - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 0, 6, 0, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl3q_p8 => vqtbl3q_p8:
     - table[poly8x16x3_t]: [
@@ -543,7 +543,7 @@ test_vtbl!(
         => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 0, 6, 0, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl4_s8 => vqtbl4_s8:
     - table[int8x16x4_t]: [
@@ -560,7 +560,7 @@ test_vtbl!(
     - ctrl[i8x8]: [4_i8, 46, 64, 51, 6, 71, 7, 18] => [4_i8, 46, 0, -51, 6, 0, -7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl4q_s8 => vqtbl4q_s8:
     - table[int8x16x4_t]: [
@@ -577,7 +577,7 @@ test_vtbl!(
         => [0_i8, -15, -1, 24, 2, -13, -3, -29, 4, 46, 0, -51, 6, 0, -7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl4_u8 => vqtbl4_u8:
     - table[uint8x16x4_t]: [
@@ -594,7 +594,7 @@ test_vtbl!(
     - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 0, 51, 6, 0, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl4q_u8 => vqtbl4q_u8:
     - table[uint8x16x4_t]: [
@@ -611,7 +611,7 @@ test_vtbl!(
         => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 0, 51, 6, 0, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl4_p8 => vqtbl4_p8:
     - table[poly8x16x4_t]: [
@@ -628,7 +628,7 @@ test_vtbl!(
     - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 0, 51, 6, 0, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbl!(
     test_vqtbl4q_p8 => vqtbl4q_p8:
     - table[poly8x16x4_t]: [
@@ -645,7 +645,7 @@ test_vtbl!(
         => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 0, 51, 6, 0, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx1_s8 => vqtbx1_s8:
     - table[int8x16_t]: [
@@ -657,7 +657,7 @@ test_vtbx!(
     - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, 19, 7, 18] => [68_i8, -117, 102, -84, 102, -105, 119, -107]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx1q_s8 => vqtbx1q_s8:
     - table[int8x16_t]: [
@@ -672,7 +672,7 @@ test_vtbx!(
         => [100_i8, -121, -17, -72, 34, -116, 51, -104, 68, -117, 110, -84, 102, -113, 119, -115]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx1_u8 => vqtbx1_u8:
     - table[uint8x16_t]: [
@@ -684,7 +684,7 @@ test_vtbx!(
     - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 102, 84, 102, 105, 119, 107]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx1q_u8 => vqtbx1q_u8:
     - table[uint8x16_t]: [
@@ -699,7 +699,7 @@ test_vtbx!(
         => [100_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 110, 84, 102, 113, 119, 115]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx1_p8 => vqtbx1_p8:
     - table[poly8x16_t]: [
@@ -711,7 +711,7 @@ test_vtbx!(
     - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 102, 84, 102, 105, 119, 107]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx1q_p8 => vqtbx1q_p8:
     - table[poly8x16_t]: [
@@ -726,7 +726,7 @@ test_vtbx!(
         => [100_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 110, 84, 102, 113, 119, 115]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx2_s8 => vqtbx2_s8:
     - table[int8x16x2_t]: [
@@ -740,7 +740,7 @@ test_vtbx!(
     - ctrl[i8x8]: [4_i8, 31, 32, 10, 6, 49, 7, 18] => [4_i8, -31, 102, 10, 6, -105, -7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx2q_s8 => vqtbx2q_s8:
     - table[int8x16x2_t]: [
@@ -757,7 +757,7 @@ test_vtbx!(
         => [100_i8, -15, -1, 24, 2, -13, -3, -29, 4, -31, 110, 10, 6, -113, -7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx2_u8 => vqtbx2_u8:
     - table[uint8x16x2_t]: [
@@ -771,7 +771,7 @@ test_vtbx!(
     - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 102, 10, 6, 105, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx2q_u8 => vqtbx2q_u8:
     - table[uint8x16x2_t]: [
@@ -788,7 +788,7 @@ test_vtbx!(
         => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 110, 10, 6, 113, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx2_p8 => vqtbx2_p8:
     - table[poly8x16x2_t]: [
@@ -802,7 +802,7 @@ test_vtbx!(
     - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 102, 10, 6, 105, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx2q_p8 => vqtbx2q_p8:
     - table[poly8x16x2_t]: [
@@ -819,7 +819,7 @@ test_vtbx!(
         => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 110, 10, 6, 113, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx3_s8 => vqtbx3_s8:
     - table[int8x16x3_t]: [
@@ -835,7 +835,7 @@ test_vtbx!(
     - ctrl[i8x8]: [4_i8, 32, 46, 51, 6, 49, 7, 18] => [4_i8, 32, 46, -103, 6, -105, -7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx3q_s8 => vqtbx3q_s8:
     - table[int8x16x3_t]: [
@@ -854,7 +854,7 @@ test_vtbx!(
         => [100_i8, -15, -1, 24, 2, -13, -3, -29, 4, 32, 46, -111, 6, -113, -7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx3_u8 => vqtbx3_u8:
     - table[uint8x16x3_t]: [
@@ -870,7 +870,7 @@ test_vtbx!(
     - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 103, 6, 105, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx3q_u8 => vqtbx3q_u8:
     - table[uint8x16x3_t]: [
@@ -889,7 +889,7 @@ test_vtbx!(
         => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 111, 6, 113, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx3_p8 => vqtbx3_p8:
     - table[poly8x16x3_t]: [
@@ -905,7 +905,7 @@ test_vtbx!(
     - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 103, 6, 105, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx3q_p8 => vqtbx3q_p8:
     - table[poly8x16x3_t]: [
@@ -924,7 +924,7 @@ test_vtbx!(
         => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 111, 6, 113, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx4_s8 => vqtbx4_s8:
     - table[int8x16x4_t]: [
@@ -942,7 +942,7 @@ test_vtbx!(
     - ctrl[i8x8]: [4_i8, 46, 64, 51, 6, 71, 7, 18] => [4_i8, 46, 102, -51, 6, -105, -7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx4q_s8 => vqtbx4q_s8:
     - table[int8x16x4_t]: [
@@ -963,7 +963,7 @@ test_vtbx!(
         => [100_i8, -15, -1, 24, 2, -13, -3, -29, 4, 46, 110, -51, 6, -113, -7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx4_u8 => vqtbx4_u8:
     - table[uint8x16x4_t]: [
@@ -981,7 +981,7 @@ test_vtbx!(
     - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 102, 51, 6, 105, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx4q_u8 => vqtbx4q_u8:
     - table[uint8x16x4_t]: [
@@ -1002,7 +1002,7 @@ test_vtbx!(
         => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 110, 51, 6, 113, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx4_p8 => vqtbx4_p8:
     - table[poly8x16x4_t]: [
@@ -1020,7 +1020,7 @@ test_vtbx!(
     - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 102, 51, 6, 105, 7, 18]
 );
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 test_vtbx!(
     test_vqtbx4q_p8 => vqtbx4q_p8:
     - table[poly8x16x4_t]: [
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/test_support.rs b/library/stdarch/crates/core_arch/src/arm_shared/test_support.rs
index ff752f25b31..0776a03717c 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/test_support.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/test_support.rs
@@ -1,7 +1,7 @@
 #[cfg(target_arch = "arm")]
 use crate::core_arch::arm::*;
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 use crate::core_arch::aarch64::*;
 
 use crate::core_arch::simd::*;
diff --git a/library/stdarch/crates/core_arch/src/mod.rs b/library/stdarch/crates/core_arch/src/mod.rs
index 7da35946554..7f5f81e378a 100644
--- a/library/stdarch/crates/core_arch/src/mod.rs
+++ b/library/stdarch/crates/core_arch/src/mod.rs
@@ -6,7 +6,12 @@ mod macros;
 #[cfg(any(target_arch = "riscv32", target_arch = "riscv64", doc))]
 mod riscv_shared;
 
-#[cfg(any(target_arch = "arm", target_arch = "aarch64", doc))]
+#[cfg(any(
+    target_arch = "arm",
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    doc
+))]
 mod arm_shared;
 
 mod simd;
@@ -52,8 +57,8 @@ pub mod arch {
     /// Platform-specific intrinsics for the `aarch64` platform.
     ///
     /// See the [module documentation](../index.html) for more details.
-    #[cfg(any(target_arch = "aarch64", doc))]
-    #[doc(cfg(target_arch = "aarch64"))]
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", doc))]
+    #[doc(cfg(any(target_arch = "aarch64", target_arch = "arm64ec")))]
     #[stable(feature = "neon_intrinsics", since = "1.59.0")]
     pub mod aarch64 {
         #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -282,8 +287,8 @@ mod x86;
 #[doc(cfg(target_arch = "x86_64"))]
 mod x86_64;
 
-#[cfg(any(target_arch = "aarch64", doc))]
-#[doc(cfg(target_arch = "aarch64"))]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", doc))]
+#[doc(cfg(any(target_arch = "aarch64", target_arch = "arm64ec")))]
 mod aarch64;
 #[cfg(any(target_arch = "arm", doc))]
 #[doc(cfg(any(target_arch = "arm")))]
diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs
index 00331e67c41..9471e1736f3 100644
--- a/library/stdarch/crates/intrinsic-test/src/main.rs
+++ b/library/stdarch/crates/intrinsic-test/src/main.rs
@@ -187,12 +187,12 @@ fn generate_rust_program(notices: &str, intrinsic: &Intrinsic, a32: bool) -> Str
 #![feature(link_llvm_intrinsics)]
 #![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))]
 #![feature(stdarch_arm_crc32)]
-#![cfg_attr(target_arch = "aarch64", feature(stdarch_neon_fcma))]
-#![cfg_attr(target_arch = "aarch64", feature(stdarch_neon_dotprod))]
-#![cfg_attr(target_arch = "aarch64", feature(stdarch_neon_i8mm))]
-#![cfg_attr(target_arch = "aarch64", feature(stdarch_neon_sha3))]
-#![cfg_attr(target_arch = "aarch64", feature(stdarch_neon_sm4))]
-#![cfg_attr(target_arch = "aarch64", feature(stdarch_neon_ftts))]
+#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))]
+#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_dotprod))]
+#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))]
+#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sha3))]
+#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))]
+#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))]
 #![allow(non_upper_case_globals)]
 use core_arch::arch::{target_arch}::*;
 
diff --git a/library/stdarch/crates/simd-test-macro/src/lib.rs b/library/stdarch/crates/simd-test-macro/src/lib.rs
index b6eb4ee3504..e7291dd111c 100644
--- a/library/stdarch/crates/simd-test-macro/src/lib.rs
+++ b/library/stdarch/crates/simd-test-macro/src/lib.rs
@@ -59,7 +59,7 @@ pub fn simd_test(
     {
         "i686" | "x86_64" | "i586" => "is_x86_feature_detected",
         "arm" | "armv7" => "is_arm_feature_detected",
-        "aarch64" => "is_aarch64_feature_detected",
+        "aarch64" | "arm64ec" => "is_aarch64_feature_detected",
         maybe_riscv if maybe_riscv.starts_with("riscv") => "is_riscv_feature_detected",
         "powerpc" | "powerpcle" => "is_powerpc_feature_detected",
         "powerpc64" | "powerpc64le" => "is_powerpc64_feature_detected",
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index f697ae857e8..742c2bebd98 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -2,7 +2,7 @@
 
 features! {
     @TARGET: aarch64;
-    @CFG: target_arch = "aarch64";
+    @CFG: any(target_arch = "aarch64", target_arch = "arm64ec");
     @MACRO_NAME: is_aarch64_feature_detected;
     @MACRO_ATTRS:
     /// This macro tests, at runtime, whether an `aarch64` feature is enabled on aarch64 platforms.
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
index 2123fec8bff..d0f5fab7491 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
@@ -29,7 +29,7 @@ cfg_if! {
     } else if #[cfg(target_arch = "arm")] {
         #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")]
         pub use arm::*;
-    } else if #[cfg(target_arch = "aarch64")] {
+    } else if #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] {
         #[stable(feature = "simd_aarch64", since = "1.60.0")]
         pub use aarch64::*;
     } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] {
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index c938abf17da..ae86ca987ee 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -63,7 +63,7 @@ cfg_if! {
         mod aarch64;
         #[path = "os/openbsd/aarch64.rs"]
         mod os;
-    } else if #[cfg(all(target_os = "windows", target_arch = "aarch64"))] {
+    } else if #[cfg(all(target_os = "windows", any(target_arch = "aarch64", target_arch = "arm64ec")))] {
         #[path = "os/windows/aarch64.rs"]
         mod os;
     } else if #[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "libc"))] {
@@ -93,6 +93,7 @@ pub fn features() -> impl Iterator<Item = (&'static str, bool)> {
             target_arch = "x86_64",
             target_arch = "arm",
             target_arch = "aarch64",
+            target_arch = "arm64ec",
             target_arch = "riscv32",
             target_arch = "riscv64",
             target_arch = "powerpc",
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 1053de3a826..fcad846fbd9 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -9,6 +9,7 @@
     any(
         target_arch = "arm",
         target_arch = "aarch64",
+        target_arch = "arm64ec",
         target_arch = "x86",
         target_arch = "x86_64",
         target_arch = "powerpc",
@@ -101,7 +102,10 @@ fn aarch64_linux() {
 }
 
 #[test]
-#[cfg(all(target_arch = "aarch64", target_os = "windows"))]
+#[cfg(all(
+    any(target_arch = "aarch64", target_arch = "arm64ec"),
+    target_os = "windows"
+))]
 fn aarch64_windows() {
     println!("asimd: {:?}", is_aarch64_feature_detected!("asimd"));
     println!("fp: {:?}", is_aarch64_feature_detected!("fp"));
diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
index d37629ec0a4..1d1ed4d3868 100644
--- a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
+++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
@@ -3,6 +3,7 @@
     any(
         target_arch = "arm",
         target_arch = "aarch64",
+        target_arch = "arm64ec",
         target_arch = "x86",
         target_arch = "x86_64",
         target_arch = "powerpc",
@@ -18,6 +19,7 @@
 #[cfg(any(
     target_arch = "arm",
     target_arch = "aarch64",
+    target_arch = "arm64ec",
     target_arch = "x86",
     target_arch = "x86_64",
     target_arch = "powerpc",
diff --git a/library/stdarch/crates/stdarch-gen-arm/src/main.rs b/library/stdarch/crates/stdarch-gen-arm/src/main.rs
index 39bafaa35ef..322c669efbb 100644
--- a/library/stdarch/crates/stdarch-gen-arm/src/main.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/main.rs
@@ -577,7 +577,7 @@ impl TargetFeature {
         format!(r#"#[cfg_attr(target_arch = "{arch}", {name}(enable = "{value}"))]"#)
     }
 
-    /// Generate target_feature attributes for a test that will compile for both "arm" and "aarch64".
+    /// Generate target_feature attributes for a test that will compile for "arm", "aarch64" and "arm64ec".
     fn to_target_feature_attr_shared(&self) -> Lines {
         let arm = self.as_target_feature_arg_arm().split(',');
         let aarch64 = self.as_target_feature_arg_aarch64().split(',');
@@ -603,11 +603,16 @@ impl TargetFeature {
                 "target_feature",
                 aarch64.join(","),
             ));
+            lines.push(Self::attr_for_arch(
+                "arm64ec",
+                "target_feature",
+                aarch64.join(","),
+            ));
         }
         lines.into()
     }
 
-    /// Generate a target_feature attribute for a test that will compile only for "aarch64".
+    /// Generate a target_feature attribute for a test that will compile only for "aarch64" and "arm64ec".
     fn to_target_feature_attr_aarch64(&self) -> Lines {
         Lines::single(Self::attr(
             "target_feature",
@@ -623,7 +628,7 @@ impl TargetFeature {
         ))
     }
 
-    /// Generate simd_test attributes for a test that will compile for both "arm" and "aarch64".
+    /// Generate simd_test attributes for a test that will compile for "arm", "aarch64" and "arm64ec".
     fn to_simd_test_attr_shared(&self) -> Lines {
         let arm = self.as_simd_test_arg_arm();
         let aarch64 = self.as_simd_test_arg_aarch64();
@@ -633,12 +638,13 @@ impl TargetFeature {
             vec![
                 Self::attr_for_arch("arm", "simd_test", arm),
                 Self::attr_for_arch("aarch64", "simd_test", aarch64),
+                Self::attr_for_arch("arm64ec", "simd_test", aarch64),
             ]
             .into()
         }
     }
 
-    /// Generate a simd_test attribute for a test that will compile only for "aarch64".
+    /// Generate a simd_test attribute for a test that will compile only for "aarch64" and "arm64ec".
     fn to_simd_test_attr_aarch64(&self) -> Lines {
         Lines::single(Self::attr("simd_test", self.as_simd_test_arg_aarch64()))
     }
@@ -1453,7 +1459,7 @@ fn gen_aarch64(
         ext_c = format!(
             r#"#[allow(improper_ctypes)]
     extern "unadjusted" {{
-        #[cfg_attr(target_arch = "aarch64", link_name = "{}")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "{}")]
         fn {}({}){};
     }}
     "#,
@@ -2087,7 +2093,7 @@ fn gen_arm(
                 r#"#[allow(improper_ctypes)]
     extern "unadjusted" {{
         #[cfg_attr(target_arch = "arm", link_name = "{}")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "{}")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "{}")]
         fn {}({}) -> {};
     }}
 "#,
@@ -2297,7 +2303,7 @@ fn gen_arm(
         ext_c_aarch64.push_str(&format!(
             r#"#[allow(improper_ctypes)]
     extern "unadjusted" {{
-        #[cfg_attr(target_arch = "aarch64", link_name = "{}")]
+        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "{}")]
         fn {}({}){};
     }}
 "#,
@@ -2567,7 +2573,7 @@ fn gen_arm(
 {function_doc}
 #[inline]{target_feature}
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr({assert_arm}{const_assert}))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({assert_aarch64}{const_assert}))]{const_legacy}
+#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr({assert_aarch64}{const_assert}))]{const_legacy}
 #[cfg_attr(not(target_arch = "arm"), {stable_aarch64})]
 #[cfg_attr(target_arch = "arm", {stable_arm})]
 {call}
diff --git a/library/stdarch/crates/stdarch-test/src/disassembly.rs b/library/stdarch/crates/stdarch-test/src/disassembly.rs
index d2b344dd5d3..1ab9e179fe1 100644
--- a/library/stdarch/crates/stdarch-test/src/disassembly.rs
+++ b/library/stdarch/crates/stdarch-test/src/disassembly.rs
@@ -163,7 +163,7 @@ fn parse(output: &str) -> HashSet<Function> {
                     .collect::<Vec<String>>()
             };
 
-            if cfg!(target_arch = "aarch64") {
+            if cfg!(any(target_arch = "aarch64", target_arch = "arm64ec")) {
                 // Normalize [us]shll.* ..., #0 instructions to the preferred form: [us]xtl.* ...
                 // as LLVM objdump does not do that.
                 // See https://developer.arm.com/documentation/ddi0602/latest/SIMD-FP-Instructions/UXTL--UXTL2--Unsigned-extend-Long--an-alias-of-USHLL--USHLL2-
diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs
index 910bfe76c4f..5f1472d4c17 100644
--- a/library/stdarch/crates/stdarch-test/src/lib.rs
+++ b/library/stdarch/crates/stdarch-test/src/lib.rs
@@ -99,7 +99,7 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
             // failed inlining something.
             s[0].starts_with("call ") && s[1].starts_with("pop") // FIXME: original logic but does not match comment
         })
-    } else if cfg!(target_arch = "aarch64") {
+    } else if cfg!(any(target_arch = "aarch64", target_arch = "arm64ec")) {
         instrs.iter().any(|s| s.starts_with("bl "))
     } else {
         // FIXME: Add detection for other archs
-- 
cgit 1.4.1-3-g733a5


From 130bc8669409298dd1a9cc14b723ba581e5e027f Mon Sep 17 00:00:00 2001
From: Daniel Paoliello <danpao@microsoft.com>
Date: Wed, 1 May 2024 09:57:31 -0700
Subject: Remove libc dependency on Windows by using Win32 to get env vars

---
 library/stdarch/.github/workflows/main.yml         | 14 ++++---
 .../stdarch/crates/std_detect/src/detect/cache.rs  | 43 +++++++++++++++++-----
 2 files changed, 43 insertions(+), 14 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/.github/workflows/main.yml b/library/stdarch/.github/workflows/main.yml
index 50a3347bc4d..c3e9d2109c5 100644
--- a/library/stdarch/.github/workflows/main.yml
+++ b/library/stdarch/.github/workflows/main.yml
@@ -15,7 +15,7 @@ jobs:
     steps:
     - uses: actions/checkout@v4
     - name: Install Rust
-      run: rustup update nightly && rustup default nightly
+      run: rustup update nightly --no-self-update && rustup default nightly
     - run: ci/style.sh
 
   docs:
@@ -25,7 +25,7 @@ jobs:
     steps:
     - uses: actions/checkout@v4
     - name: Install Rust
-      run: rustup update nightly && rustup default nightly
+      run: rustup update nightly --no-self-update && rustup default nightly
     - run: ci/dox.sh
       env:
         CI: 1
@@ -45,18 +45,22 @@ jobs:
     steps:
     - uses: actions/checkout@v4
     - name: Install Rust
-      run: rustup update nightly && rustup default nightly
+      run: rustup update nightly --no-self-update && rustup default nightly
     - run: cargo test --manifest-path crates/stdarch-verify/Cargo.toml
 
   env_override:
     name: Env Override
     needs: [style]
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest]
     steps:
     - uses: actions/checkout@v4
     - name: Install Rust
-      run: rustup update nightly && rustup default nightly
+      run: rustup update nightly --no-self-update && rustup default nightly
     - run: RUST_STD_DETECT_UNSTABLE=avx cargo test --features=std_detect_env_override --manifest-path crates/std_detect/Cargo.toml env_override_no_avx
+      shell: bash
 
   test:
     needs: [style]
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 64b2841bbf3..478d4ee72f1 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -118,17 +118,42 @@ impl Cache {
 
 cfg_if::cfg_if! {
     if #[cfg(feature = "std_detect_env_override")] {
+        #[inline]
+        fn disable_features(disable: &[u8], value: &mut Initializer) {
+            if let Ok(disable) = core::str::from_utf8(disable) {
+                for v in disable.split(" ") {
+                    let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32));
+                }
+            }
+        }
+
         #[inline]
         fn initialize(mut value: Initializer) -> Initializer {
-            let env = unsafe {
-                libc::getenv(b"RUST_STD_DETECT_UNSTABLE\0".as_ptr() as *const libc::c_char)
-            };
-            if !env.is_null() {
-                let len = unsafe { libc::strlen(env) };
-                let env = unsafe { core::slice::from_raw_parts(env as *const u8, len) };
-                if let Ok(disable) = core::str::from_utf8(env) {
-                    for v in disable.split(" ") {
-                        let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32));
+            const RUST_STD_DETECT_UNSTABLE: &[u8] = b"RUST_STD_DETECT_UNSTABLE\0";
+            cfg_if::cfg_if! {
+                if #[cfg(windows)] {
+                    use alloc::vec;
+                    #[link(name = "kernel32")]
+                    extern "system" {
+                        fn GetEnvironmentVariableA(name: *const u8, buffer: *mut u8, size: u32) -> u32;
+                    }
+                    let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr(), core::ptr::null_mut(), 0) };
+                    if len > 0 {
+                        // +1 to include the null terminator.
+                        let mut env = vec![0; len as usize + 1];
+                        let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr(), env.as_mut_ptr(), len + 1) };
+                        if len > 0 {
+                            disable_features(&env[..len as usize], &mut value);
+                        }
+                    }
+                } else {
+                    let env = unsafe {
+                        libc::getenv(RUST_STD_DETECT_UNSTABLE.as_ptr() as *const libc::c_char)
+                    };
+                    if !env.is_null() {
+                        let len = unsafe { libc::strlen(env) };
+                        let env = unsafe { core::slice::from_raw_parts(env as *const u8, len) };
+                        disable_features(env, &mut value);
                     }
                 }
             }
-- 
cgit 1.4.1-3-g733a5


From 1f779b7b408a45dc896dcd0f1eec1b139ddb712a Mon Sep 17 00:00:00 2001
From: sayantn <sayantan.chakraborty@students.iiserpune.ac.in>
Date: Tue, 18 Jun 2024 12:29:04 +0530
Subject: Added runtime detection

Expanded the cache size to 93 (we will need this in near future)
Fixed detection of VAES, GFNI and VPCLMULQDQ
Could not test with `cupid` because they do not support these yet
---
 .../crates/std_detect/src/detect/arch/x86.rs       | 15 +++++++++++
 .../stdarch/crates/std_detect/src/detect/cache.rs  | 30 ++++++++++++++--------
 .../stdarch/crates/std_detect/src/detect/os/x86.rs | 30 +++++++++++++++-------
 .../crates/std_detect/tests/x86-specific.rs        | 13 +++++++++-
 4 files changed, 67 insertions(+), 21 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index f4f45750ed6..f4c2129156c 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -76,6 +76,11 @@ features! {
     /// * `"avx512bf16"`
     /// * `"avx512vp2intersect"`
     /// * `"avx512fp16"`
+    /// * `"avxvnni"`
+    /// * `"avxifma"`
+    /// * `"avxneconvert"`
+    /// * `"avxvnniint8"`
+    /// * `"avxvnniint16"`
     /// * `"f16c"`
     /// * `"fma"`
     /// * `"bmi1"`
@@ -172,6 +177,16 @@ features! {
     /// AVX-512 P2INTERSECT
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512fp16: "avx512fp16";
     /// AVX-512 FP16 (FLOAT16 instructions)
+    @FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxifma: "avxifma";
+    /// AVX-IFMA (Integer Fused Multiply Add)
+    @FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxneconvert: "avxneconvert";
+    /// AVX-NE-CONVERT (Exceptionless Convert)
+    @FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxvnni: "avxvnni";
+    /// AVX-VNNI (Vector Neural Network Instructions)
+    @FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxvnniint16: "avxvnniint16";
+    /// AVX-VNNI_INT8 (VNNI with 16-bit Integers)
+    @FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxvnniint8: "avxvnniint8";
+    /// AVX-VNNI_INT16 (VNNI with 8-bit integers)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c";
     /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma";
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 478d4ee72f1..182513d883b 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -9,30 +9,30 @@ use core::sync::atomic::AtomicUsize;
 
 /// Sets the `bit` of `x`.
 #[inline]
-const fn set_bit(x: u64, bit: u32) -> u64 {
+const fn set_bit(x: u128, bit: u32) -> u128 {
     x | 1 << bit
 }
 
 /// Tests the `bit` of `x`.
 #[inline]
-const fn test_bit(x: u64, bit: u32) -> bool {
+const fn test_bit(x: u128, bit: u32) -> bool {
     x & (1 << bit) != 0
 }
 
 /// Unset the `bit of `x`.
 #[inline]
-const fn unset_bit(x: u64, bit: u32) -> u64 {
+const fn unset_bit(x: u128, bit: u32) -> u128 {
     x & !(1 << bit)
 }
 
 /// Maximum number of features that can be cached.
-const CACHE_CAPACITY: u32 = 62;
+const CACHE_CAPACITY: u32 = 93;
 
 /// This type is used to initialize the cache
 // The derived `Default` implementation will initialize the field to zero,
 // which is what we want.
 #[derive(Copy, Clone, Default)]
-pub(crate) struct Initializer(u64);
+pub(crate) struct Initializer(u128);
 
 // NOTE: the `debug_assert!` would catch that we do not add more Features than
 // the one fitting our cache.
@@ -71,10 +71,15 @@ impl Initializer {
 }
 
 /// This global variable is a cache of the features supported by the CPU.
-// Note: on x64, we only use the first slot
-static CACHE: [Cache; 2] = [Cache::uninitialized(), Cache::uninitialized()];
-
-/// Feature cache with capacity for `size_of::<usize::MAX>() * 8 - 1` features.
+// Note: the third slot is only used in x86
+// Another Slot can be added if needed without any change to `Initializer`
+static CACHE: [Cache; 3] = [
+    Cache::uninitialized(),
+    Cache::uninitialized(),
+    Cache::uninitialized(),
+];
+
+/// Feature cache with capacity for `size_of::<usize>() * 8 - 1` features.
 ///
 /// Note: 0 is used to represent an uninitialized cache, and (at least) the most
 /// significant bit is set on any cache which has been initialized.
@@ -102,7 +107,7 @@ impl Cache {
         if cached == 0 {
             None
         } else {
-            Some(test_bit(cached as u64, bit))
+            Some(test_bit(cached as u128, bit))
         }
     }
 
@@ -173,6 +178,7 @@ cfg_if::cfg_if! {
 fn do_initialize(value: Initializer) {
     CACHE[0].initialize((value.0) as usize & Cache::MASK);
     CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK);
+    CACHE[2].initialize((value.0 >> 2 * Cache::CAPACITY) as usize & Cache::MASK);
 }
 
 // We only have to detect features once, and it's fairly costly, so hint to LLVM
@@ -205,8 +211,10 @@ fn detect_and_initialize() -> Initializer {
 pub(crate) fn test(bit: u32) -> bool {
     let (relative_bit, idx) = if bit < Cache::CAPACITY {
         (bit, 0)
-    } else {
+    } else if bit < 2 * Cache::CAPACITY {
         (bit - Cache::CAPACITY, 1)
+    } else {
+        (bit - 2 * Cache::CAPACITY, 2)
     };
     CACHE[idx]
         .test(relative_bit)
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 3e55baa7d8c..027df571f82 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -74,13 +74,17 @@ pub(crate) fn detect_features() -> cache::Initializer {
         extended_features_ecx,
         extended_features_edx,
         extended_features_eax_leaf_1,
+        extended_features_edx_leaf_1,
     ) = if max_basic_leaf >= 7 {
         let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
-        let CpuidResult { eax: eax_1, .. } =
-            unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) };
-        (ebx, ecx, edx, eax_1)
+        let CpuidResult {
+            eax: eax_1,
+            edx: edx_1,
+            ..
+        } = unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) };
+        (ebx, ecx, edx, eax_1, edx_1)
     } else {
-        (0, 0, 0, 0) // CPUID does not support "Extended Features"
+        (0, 0, 0, 0, 0) // CPUID does not support "Extended Features"
     };
 
     // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
@@ -129,6 +133,10 @@ pub(crate) fn detect_features() -> cache::Initializer {
         enable(proc_info_edx, 26, Feature::sse2);
         enable(extended_features_ebx, 29, Feature::sha);
 
+        enable(extended_features_ecx, 8, Feature::gfni);
+        enable(extended_features_ecx, 9, Feature::vaes);
+        enable(extended_features_ecx, 10, Feature::vpclmulqdq);
+
         enable(extended_features_ebx, 3, Feature::bmi1);
         enable(extended_features_ebx, 8, Feature::bmi2);
 
@@ -165,8 +173,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
                 let xcr0 = unsafe { _xgetbv(0) };
                 // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`:
                 let os_avx_support = xcr0 & 6 == 6;
-                // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 224`:
-                let os_avx512_support = xcr0 & 224 == 224;
+                // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 0xe0`:
+                let os_avx512_support = xcr0 & 0xe0 == 0xe0;
 
                 // Only if the OS and the CPU support saving/restoring the AVX
                 // registers we enable `xsave` support:
@@ -203,6 +211,13 @@ pub(crate) fn detect_features() -> cache::Initializer {
                     enable(proc_info_ecx, 28, Feature::avx);
                     enable(extended_features_ebx, 5, Feature::avx2);
 
+                    // "Short" versions of AVX512 instructions
+                    enable(extended_features_eax_leaf_1, 4, Feature::avxvnni);
+                    enable(extended_features_eax_leaf_1, 23, Feature::avxifma);
+                    enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8);
+                    enable(extended_features_edx_leaf_1, 5, Feature::avxneconvert);
+                    enable(extended_features_edx_leaf_1, 10, Feature::avxvnniint16);
+
                     // For AVX-512 the OS also needs to support saving/restoring
                     // the extended state, only then we enable AVX-512 support:
                     if os_avx512_support {
@@ -216,9 +231,6 @@ pub(crate) fn detect_features() -> cache::Initializer {
                         enable(extended_features_ebx, 31, Feature::avx512vl);
                         enable(extended_features_ecx, 1, Feature::avx512vbmi);
                         enable(extended_features_ecx, 6, Feature::avx512vbmi2);
-                        enable(extended_features_ecx, 8, Feature::gfni);
-                        enable(extended_features_ecx, 9, Feature::vaes);
-                        enable(extended_features_ecx, 10, Feature::vpclmulqdq);
                         enable(extended_features_ecx, 11, Feature::avx512vnni);
                         enable(extended_features_ecx, 12, Feature::avx512bitalg);
                         enable(extended_features_ecx, 14, Feature::avx512vpopcntdq);
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
index ae7f677ed4d..d2b2675716d 100644
--- a/library/stdarch/crates/std_detect/tests/x86-specific.rs
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -1,6 +1,6 @@
 #![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #![allow(internal_features)]
-#![feature(stdarch_internal)]
+#![feature(stdarch_internal, avx512_target_feature)]
 
 extern crate cupid;
 #[macro_use]
@@ -68,6 +68,17 @@ fn dump() {
     println!("adx: {:?}", is_x86_feature_detected!("adx"));
     println!("rtm: {:?}", is_x86_feature_detected!("rtm"));
     println!("movbe: {:?}", is_x86_feature_detected!("movbe"));
+    println!("avxvnni: {:?}", is_x86_feature_detected!("avxvnni"));
+    println!("avxvnniint8: {:?}", is_x86_feature_detected!("avxvnniint8"));
+    println!(
+        "avxneconvert: {:?}",
+        is_x86_feature_detected!("avxneconvert")
+    );
+    println!("avxifma: {:?}", is_x86_feature_detected!("avxifma"));
+    println!(
+        "avxvnniint16: {:?}",
+        is_x86_feature_detected!("avxvnniint16")
+    );
 }
 
 #[cfg(feature = "std_detect_env_override")]
-- 
cgit 1.4.1-3-g733a5


From 8b77e779cbabd67733f3cda9c18aea1661608a32 Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee@gmail.com>
Date: Mon, 24 Jun 2024 17:01:19 -0700
Subject: Remove `has_cpuid`

---
 library/stdarch/crates/core_arch/src/x86/cpuid.rs  | 86 ----------------------
 .../stdarch/crates/std_detect/src/detect/os/x86.rs |  5 +-
 library/stdarch/crates/std_detect/src/lib.rs       |  4 -
 3 files changed, 2 insertions(+), 93 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/x86/cpuid.rs b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
index c7600ce791b..3a5b4fd2ef3 100644
--- a/library/stdarch/crates/core_arch/src/x86/cpuid.rs
+++ b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
@@ -96,73 +96,6 @@ pub unsafe fn __cpuid(leaf: u32) -> CpuidResult {
     __cpuid_count(leaf, 0)
 }
 
-/// Does the host support the `cpuid` instruction?
-#[inline]
-#[unstable(feature = "stdarch_x86_has_cpuid", issue = "60123")]
-pub fn has_cpuid() -> bool {
-    #[cfg(target_env = "sgx")]
-    {
-        false
-    }
-    #[cfg(all(not(target_env = "sgx"), target_arch = "x86_64"))]
-    {
-        true
-    }
-    #[cfg(all(not(target_env = "sgx"), target_arch = "x86"))]
-    {
-        // Optimization for i586 and i686 Rust targets which SSE enabled
-        // and support cpuid:
-        #[cfg(target_feature = "sse")]
-        {
-            true
-        }
-
-        // If SSE is not enabled, detect whether cpuid is available:
-        #[cfg(not(target_feature = "sse"))]
-        unsafe {
-            // On `x86` the `cpuid` instruction is not always available.
-            // This follows the approach indicated in:
-            // http://wiki.osdev.org/CPUID#Checking_CPUID_availability
-            // https://software.intel.com/en-us/articles/using-cpuid-to-detect-the-presence-of-sse-41-and-sse-42-instruction-sets/
-            // which detects whether `cpuid` is available by checking whether
-            // the 21st bit of the EFLAGS register is modifiable or not.
-            // If it is, then `cpuid` is available.
-            let result: u32;
-            asm!(
-                // Read eflags and save a copy of it
-                "pushfd",
-                "pop {result}",
-                "mov {result}, {saved_flags}",
-                // Flip 21st bit of the flags
-                "xor $0x200000, {result}",
-                // Load the modified flags and read them back.
-                // Bit 21 can only be modified if cpuid is available.
-                "push {result}",
-                "popfd",
-                "pushfd",
-                "pop {result}",
-                // Use xor to find out whether bit 21 has changed
-                "xor {saved_flags}, {result}",
-                result = out(reg) result,
-                saved_flags = out(reg) _,
-                options(nomem, att_syntax),
-            );
-            // There is a race between popfd (A) and pushfd (B)
-            // where other bits beyond 21st may have been modified due to
-            // interrupts, a debugger stepping through the asm, etc.
-            //
-            // Therefore, explicitly check whether the 21st bit
-            // was modified or not.
-            //
-            // If the result is zero, the cpuid bit was not modified.
-            // If the result is `0x200000` (non-zero), then the cpuid
-            // was correctly modified and the CPU supports the cpuid
-            // instruction:
-            (result & 0x200000) != 0
-        }
-    }
-}
-
 /// Returns the highest-supported `leaf` (`EAX`) and sub-leaf (`ECX`) `cpuid`
 /// values.
 ///
@@ -179,22 +112,3 @@ pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) {
     let CpuidResult { eax, ebx, .. } = __cpuid(leaf);
     (eax, ebx)
 }
-
-#[cfg(test)]
-mod tests {
-    use crate::core_arch::x86::*;
-
-    #[test]
-    #[cfg_attr(miri, ignore)] // Uses inline assembly
-    fn test_always_has_cpuid() {
-        // all currently-tested targets have the instruction
-        // FIXME: add targets without `cpuid` to CI
-        assert!(cpuid::has_cpuid());
-    }
-
-    #[test]
-    #[cfg_attr(miri, ignore)] // Uses inline assembly
-    fn test_has_cpuid_idempotent() {
-        assert_eq!(cpuid::has_cpuid(), cpuid::has_cpuid());
-    }
-}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 027df571f82..f8811e8cf13 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -28,9 +28,8 @@ use crate::detect::{bit, cache, Feature};
 pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
 
-    // If the x86 CPU does not support the CPUID instruction then it is too
-    // old to support any of the currently-detectable features.
-    if !has_cpuid() {
+    if cfg!(target_env = "sgx") {
+        // doesn't support this because it is untrusted data
         return value;
     }
 
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 846b17b01d5..881e12cf9c3 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -20,10 +20,6 @@
 #![cfg_attr(test, allow(unused_imports))]
 #![no_std]
 #![allow(internal_features)]
-#![cfg_attr(
-    any(target_arch = "x86", target_arch = "x86_64"),
-    feature(stdarch_x86_has_cpuid)
-)]
 
 #[cfg(test)]
 #[macro_use]
-- 
cgit 1.4.1-3-g733a5


From 268ac7fe921a6cf92c8393cb23fe3bd4d2ffbf13 Mon Sep 17 00:00:00 2001
From: sayantn <sayantan.chakraborty@students.iiserpune.ac.in>
Date: Thu, 20 Jun 2024 00:34:46 +0530
Subject: Add detection for SHA512, SM3 and SM4

Cannot cross-verify with `cupid` because they do not have these features yet.
---
 library/stdarch/crates/core_arch/src/lib.rs              |  3 ++-
 library/stdarch/crates/core_arch/src/mod.rs              | 12 ++++++++++++
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs |  9 +++++++++
 library/stdarch/crates/std_detect/src/detect/os/x86.rs   |  4 ++++
 library/stdarch/crates/std_detect/tests/cpu-detection.rs |  7 +++++++
 library/stdarch/crates/std_detect/tests/x86-specific.rs  |  5 ++++-
 6 files changed, 38 insertions(+), 2 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index 532924604cd..19011490746 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -33,7 +33,8 @@
     asm_const,
     target_feature_11,
     generic_arg_infer,
-    asm_experimental_arch
+    asm_experimental_arch,
+    sha512_sm_x86
 )]
 #![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
 #![deny(clippy::missing_inline_in_public_items)]
diff --git a/library/stdarch/crates/core_arch/src/mod.rs b/library/stdarch/crates/core_arch/src/mod.rs
index 7f5f81e378a..1c30fb1597b 100644
--- a/library/stdarch/crates/core_arch/src/mod.rs
+++ b/library/stdarch/crates/core_arch/src/mod.rs
@@ -325,3 +325,15 @@ mod nvptx;
 #[cfg(any(target_arch = "loongarch64", doc))]
 #[doc(cfg(target_arch = "loongarch64"))]
 mod loongarch64;
+
+// TODO: remove after merge of rustc #126704
+#[unstable(feature = "sha512_sm_x86", issue = "126624")]
+unsafe fn dummy() {
+    // This has to be here until PR #126704 gets merged into rustc,
+    // because otherwise rustc cannot compile because aarch64 also has
+    // a target feature named sm4, and that is stable. For `doc` env this
+    // gets compiled also in x86, but in x86 the feature sm4 is unstable
+    // So we need `feature(sha512_sm_x86)` somewhere, but if we place it without
+    // any unstable attr, rustc cannot compile stage0, because it doesn't know about
+    // this feature yet.
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index f4c2129156c..8867c59b119 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -57,6 +57,9 @@ features! {
     /// * `"sha"`
     /// * `"avx"`
     /// * `"avx2"`
+    /// * `"sha512"`
+    /// * `"sm3"`
+    /// * `"sm4"`
     /// * `"avx512f"`
     /// * `"avx512cd"`
     /// * `"avx512er"`
@@ -138,6 +141,12 @@ features! {
     /// AVX (Advanced Vector Extensions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx2: "avx2";
     /// AVX2 (Advanced Vector Extensions 2)
+    @FEATURE: #[unstable(feature = "sha512_sm_x86", issue = "126624")] sha512: "sha512";
+    /// SHA512
+    @FEATURE: #[unstable(feature = "sha512_sm_x86", issue = "126624")] sm3: "sm3";
+    /// SM3
+    @FEATURE: #[unstable(feature = "sha512_sm_x86", issue = "126624")] sm4: "sm4";
+    /// SM4
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512f: "avx512f" ;
     /// AVX-512 F (Foundation)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512cd: "avx512cd" ;
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index f8811e8cf13..c0d86d4b3a6 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -217,6 +217,10 @@ pub(crate) fn detect_features() -> cache::Initializer {
                     enable(extended_features_edx_leaf_1, 5, Feature::avxneconvert);
                     enable(extended_features_edx_leaf_1, 10, Feature::avxvnniint16);
 
+                    enable(extended_features_eax_leaf_1, 0, Feature::sha512);
+                    enable(extended_features_eax_leaf_1, 1, Feature::sm3);
+                    enable(extended_features_eax_leaf_1, 2, Feature::sm4);
+
                     // For AVX-512 the OS also needs to support saving/restoring
                     // the extended state, only then we enable AVX-512 support:
                     if os_avx512_support {
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index fcad846fbd9..6cf74a67218 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -3,6 +3,10 @@
 #![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))]
 #![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))]
 #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))]
+#![cfg_attr(
+    any(target_arch = "x86", target_arch = "x86_64"),
+    feature(sha512_sm_x86)
+)]
 #![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)]
 
 #[cfg_attr(
@@ -210,6 +214,9 @@ fn x86_all() {
     println!("sha: {:?}", is_x86_feature_detected!("sha"));
     println!("avx: {:?}", is_x86_feature_detected!("avx"));
     println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
+    println!("sha512: {:?}", is_x86_feature_detected!("sha512"));
+    println!("sm3: {:?}", is_x86_feature_detected!("sm3"));
+    println!("sm4: {:?}", is_x86_feature_detected!("sm4"));
     println!("avx512f: {:?}", is_x86_feature_detected!("avx512f"));
     println!("avx512cd: {:?}", is_x86_feature_detected!("avx512cd"));
     println!("avx512er: {:?}", is_x86_feature_detected!("avx512er"));
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
index d2b2675716d..74326f4a5a1 100644
--- a/library/stdarch/crates/std_detect/tests/x86-specific.rs
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -1,6 +1,6 @@
 #![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #![allow(internal_features)]
-#![feature(stdarch_internal, avx512_target_feature)]
+#![feature(stdarch_internal, avx512_target_feature, sha512_sm_x86)]
 
 extern crate cupid;
 #[macro_use]
@@ -24,6 +24,9 @@ fn dump() {
     println!("f16c: {:?}", is_x86_feature_detected!("f16c"));
     println!("avx: {:?}", is_x86_feature_detected!("avx"));
     println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
+    println!("sha512: {:?}", is_x86_feature_detected!("sha512"));
+    println!("sm3: {:?}", is_x86_feature_detected!("sm3"));
+    println!("sm4: {:?}", is_x86_feature_detected!("sm4"));
     println!("avx512f {:?}", is_x86_feature_detected!("avx512f"));
     println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd"));
     println!("avx512er {:?}", is_x86_feature_detected!("avx512er"));
-- 
cgit 1.4.1-3-g733a5


From d67ca1fe09c6bf9f80d5ae20eeb72162ca50c54b Mon Sep 17 00:00:00 2001
From: sayantn <sayantan.chakraborty@students.iiserpune.ac.in>
Date: Wed, 19 Jun 2024 20:07:00 +0530
Subject: Added runtime detection

Cannot do a `cupid` test because they don't support `amx`.
---
 .../crates/std_detect/src/detect/arch/x86.rs       | 15 +++++++
 .../stdarch/crates/std_detect/src/detect/os/x86.rs | 11 +++++
 .../crates/std_detect/tests/cpu-detection.rs       |  7 +++-
 .../crates/std_detect/tests/x86-specific.rs        | 48 +++++++++++++---------
 4 files changed, 61 insertions(+), 20 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 8867c59b119..ef7091ec7b9 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -84,6 +84,11 @@ features! {
     /// * `"avxneconvert"`
     /// * `"avxvnniint8"`
     /// * `"avxvnniint16"`
+    /// * `"amx-tile"`
+    /// * `"amx-int8"`
+    /// * `"amx-bf16"`
+    /// * `"amx-fp16"`
+    /// * `"amx-complex"`
     /// * `"f16c"`
     /// * `"fma"`
     /// * `"bmi1"`
@@ -196,6 +201,16 @@ features! {
     /// AVX-VNNI_INT8 (VNNI with 16-bit Integers)
     @FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxvnniint8: "avxvnniint8";
     /// AVX-VNNI_INT16 (VNNI with 8-bit integers)
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tile: "amx-tile";
+    /// AMX (Advanced Matrix Extensions) - Tile load/store
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_int8: "amx-int8";
+    /// AMX-INT8 (Operations on 8-bit integers)
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_bf16: "amx-bf16";
+    /// AMX-BF16 (BFloat16 Operations)
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp16: "amx-fp16";
+    /// AMX-FP16 (Float16 Operations)
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_complex: "amx-complex";
+    /// AMX-COMPLEX (Complex number Operations)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c";
     /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma";
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index c0d86d4b3a6..4a0e2c9b18f 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -164,6 +164,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
                 // * SSE -> `XCR0.SSE[1]`
                 // * AVX -> `XCR0.AVX[2]`
                 // * AVX-512 -> `XCR0.AVX-512[7:5]`.
+                // * AMX -> `XCR0.AMX[18:17]`
                 //
                 // by setting the corresponding bits of `XCR0` to `1`.
                 //
@@ -174,6 +175,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
                 let os_avx_support = xcr0 & 6 == 6;
                 // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 0xe0`:
                 let os_avx512_support = xcr0 & 0xe0 == 0xe0;
+                // Test `XCR0.AMX[18:17]` with the mask `0b110_0000_0000_0000_0000 == 0x60000`
+                let os_amx_support = xcr0 & 0x60000 == 0x60000;
 
                 // Only if the OS and the CPU support saving/restoring the AVX
                 // registers we enable `xsave` support:
@@ -240,6 +243,14 @@ pub(crate) fn detect_features() -> cache::Initializer {
                         enable(extended_features_edx, 8, Feature::avx512vp2intersect);
                         enable(extended_features_edx, 23, Feature::avx512fp16);
                         enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16);
+
+                        if os_amx_support {
+                            enable(extended_features_edx, 24, Feature::amx_tile);
+                            enable(extended_features_edx, 25, Feature::amx_int8);
+                            enable(extended_features_edx, 22, Feature::amx_bf16);
+                            enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
+                            enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
+                        }
                     }
                 }
             }
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 6cf74a67218..47bb499dee1 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -5,7 +5,7 @@
 #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))]
 #![cfg_attr(
     any(target_arch = "x86", target_arch = "x86_64"),
-    feature(sha512_sm_x86)
+    feature(sha512_sm_x86, x86_amx_intrinsics)
 )]
 #![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)]
 
@@ -259,6 +259,11 @@ fn x86_all() {
     println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt"));
     println!("xsaves: {:?}", is_x86_feature_detected!("xsaves"));
     println!("xsavec: {:?}", is_x86_feature_detected!("xsavec"));
+    println!("amx-bf16: {:?}", is_x86_feature_detected!("amx-bf16"));
+    println!("amx-tile: {:?}", is_x86_feature_detected!("amx-tile"));
+    println!("amx-int8: {:?}", is_x86_feature_detected!("amx-int8"));
+    println!("amx-fp16: {:?}", is_x86_feature_detected!("amx-fp16"));
+    println!("amx-complex: {:?}", is_x86_feature_detected!("amx-complex"));
 }
 
 #[test]
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
index 74326f4a5a1..611e41c941b 100644
--- a/library/stdarch/crates/std_detect/tests/x86-specific.rs
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -1,6 +1,11 @@
 #![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #![allow(internal_features)]
-#![feature(stdarch_internal, avx512_target_feature, sha512_sm_x86)]
+#![feature(
+    stdarch_internal,
+    avx512_target_feature,
+    sha512_sm_x86,
+    x86_amx_intrinsics
+)]
 
 extern crate cupid;
 #[macro_use]
@@ -27,34 +32,34 @@ fn dump() {
     println!("sha512: {:?}", is_x86_feature_detected!("sha512"));
     println!("sm3: {:?}", is_x86_feature_detected!("sm3"));
     println!("sm4: {:?}", is_x86_feature_detected!("sm4"));
-    println!("avx512f {:?}", is_x86_feature_detected!("avx512f"));
-    println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd"));
-    println!("avx512er {:?}", is_x86_feature_detected!("avx512er"));
-    println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf"));
-    println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw"));
-    println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq"));
-    println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl"));
-    println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma"));
+    println!("avx512f: {:?}", is_x86_feature_detected!("avx512f"));
+    println!("avx512cd: {:?}", is_x86_feature_detected!("avx512cd"));
+    println!("avx512er: {:?}", is_x86_feature_detected!("avx512er"));
+    println!("avx512pf: {:?}", is_x86_feature_detected!("avx512pf"));
+    println!("avx512bw: {:?}", is_x86_feature_detected!("avx512bw"));
+    println!("avx512dq: {:?}", is_x86_feature_detected!("avx512dq"));
+    println!("avx512vl: {:?}", is_x86_feature_detected!("avx512vl"));
+    println!("avx512_ifma: {:?}", is_x86_feature_detected!("avx512ifma"));
     println!("avx512vbmi {:?}", is_x86_feature_detected!("avx512vbmi"));
     println!(
-        "avx512_vpopcntdq {:?}",
+        "avx512_vpopcntdq: {:?}",
         is_x86_feature_detected!("avx512vpopcntdq")
     );
-    println!("avx512vbmi2 {:?}", is_x86_feature_detected!("avx512vbmi2"));
-    println!("gfni {:?}", is_x86_feature_detected!("gfni"));
-    println!("vaes {:?}", is_x86_feature_detected!("vaes"));
-    println!("vpclmulqdq {:?}", is_x86_feature_detected!("vpclmulqdq"));
-    println!("avx512vnni {:?}", is_x86_feature_detected!("avx512vnni"));
+    println!("avx512vbmi2: {:?}", is_x86_feature_detected!("avx512vbmi2"));
+    println!("gfni: {:?}", is_x86_feature_detected!("gfni"));
+    println!("vaes: {:?}", is_x86_feature_detected!("vaes"));
+    println!("vpclmulqdq: {:?}", is_x86_feature_detected!("vpclmulqdq"));
+    println!("avx512vnni: {:?}", is_x86_feature_detected!("avx512vnni"));
     println!(
-        "avx512bitalg {:?}",
+        "avx512bitalg: {:?}",
         is_x86_feature_detected!("avx512bitalg")
     );
-    println!("avx512bf16 {:?}", is_x86_feature_detected!("avx512bf16"));
+    println!("avx512bf16: {:?}", is_x86_feature_detected!("avx512bf16"));
     println!(
-        "avx512vp2intersect {:?}",
+        "avx512vp2intersect: {:?}",
         is_x86_feature_detected!("avx512vp2intersect")
     );
-    println!("avx512fp16 {:?}", is_x86_feature_detected!("avx512fp16"));
+    println!("avx512fp16: {:?}", is_x86_feature_detected!("avx512fp16"));
     println!("fma: {:?}", is_x86_feature_detected!("fma"));
     println!("abm: {:?}", is_x86_feature_detected!("abm"));
     println!("bmi: {:?}", is_x86_feature_detected!("bmi1"));
@@ -82,6 +87,11 @@ fn dump() {
         "avxvnniint16: {:?}",
         is_x86_feature_detected!("avxvnniint16")
     );
+    println!("amx-bf16: {:?}", is_x86_feature_detected!("amx-bf16"));
+    println!("amx-tile: {:?}", is_x86_feature_detected!("amx-tile"));
+    println!("amx-int8: {:?}", is_x86_feature_detected!("amx-int8"));
+    println!("amx-fp16: {:?}", is_x86_feature_detected!("amx-fp16"));
+    println!("amx-complex: {:?}", is_x86_feature_detected!("amx-complex"));
 }
 
 #[cfg(feature = "std_detect_env_override")]
-- 
cgit 1.4.1-3-g733a5


From 94153c46e9f1c05ced0047de2e03de90806896e9 Mon Sep 17 00:00:00 2001
From: sayantn <sayantan.chakraborty@students.iiserpune.ac.in>
Date: Mon, 1 Jul 2024 21:24:11 +0530
Subject: Implemented runtime detection of `xop` target-feature

---
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 2 ++
 library/stdarch/crates/std_detect/src/detect/os/x86.rs   | 1 +
 library/stdarch/crates/std_detect/tests/cpu-detection.rs | 3 ++-
 library/stdarch/crates/std_detect/tests/x86-specific.rs  | 4 +++-
 4 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index ef7091ec7b9..2890c7ee2f6 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -245,4 +245,6 @@ features! {
     /// MOVBE (Move Data After Swapping Bytes)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ermsb: "ermsb";
     /// ERMSB, Enhanced REP MOVSB and STOSB
+    @FEATURE: #[unstable(feature = "xop_target_feature", issue = "127208")] xop: "xop";
+    /// XOP: eXtended Operations (AMD)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 4a0e2c9b18f..979fd9ca1f7 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -279,6 +279,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
             // These features are available on AMD arch CPUs:
             enable(extended_proc_info_ecx, 6, Feature::sse4a);
             enable(extended_proc_info_ecx, 21, Feature::tbm);
+            enable(extended_proc_info_ecx, 11, Feature::xop);
         }
     }
 
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 47bb499dee1..615268e8768 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -5,7 +5,7 @@
 #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))]
 #![cfg_attr(
     any(target_arch = "x86", target_arch = "x86_64"),
-    feature(sha512_sm_x86, x86_amx_intrinsics)
+    feature(sha512_sm_x86, x86_amx_intrinsics, xop_target_feature)
 )]
 #![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)]
 
@@ -264,6 +264,7 @@ fn x86_all() {
     println!("amx-int8: {:?}", is_x86_feature_detected!("amx-int8"));
     println!("amx-fp16: {:?}", is_x86_feature_detected!("amx-fp16"));
     println!("amx-complex: {:?}", is_x86_feature_detected!("amx-complex"));
+    println!("xop: {:?}", is_x86_feature_detected!("xop"));
 }
 
 #[test]
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
index 611e41c941b..60b6756f866 100644
--- a/library/stdarch/crates/std_detect/tests/x86-specific.rs
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -4,7 +4,8 @@
     stdarch_internal,
     avx512_target_feature,
     sha512_sm_x86,
-    x86_amx_intrinsics
+    x86_amx_intrinsics,
+    xop_target_feature
 )]
 
 extern crate cupid;
@@ -92,6 +93,7 @@ fn dump() {
     println!("amx-int8: {:?}", is_x86_feature_detected!("amx-int8"));
     println!("amx-fp16: {:?}", is_x86_feature_detected!("amx-fp16"));
     println!("amx-complex: {:?}", is_x86_feature_detected!("amx-complex"));
+    println!("xop: {:?}", is_x86_feature_detected!("xop"));
 }
 
 #[cfg(feature = "std_detect_env_override")]
-- 
cgit 1.4.1-3-g733a5


From dfc5dfc8ef80587aef07c6a51014de724b779c8e Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski@arm.com>
Date: Fri, 14 Jun 2024 17:45:59 +0100
Subject: std_detect: Add aarch64/linux/LLVM features

Add detection for various aarch64 CPU features already supported by LLVM and Linux.

This commit adds feature detection for the following features:

- FEAT_CSSC
- FEAT_ECV
- FEAT_FAMINMAX
- FEAT_FLAGM2
- FEAT_FP8
- FEAT_FP8DOT2
- FEAT_FP8DOT4
- FEAT_FP8FMA
- FEAT_HBC
- FEAT_LSE128
- FEAT_LUT
- FEAT_MOPS
- FEAT_LRCPC3
- FEAT_SVE_B16B16
- FEAT_SVE2p1
- FEAT_WFxT

It also adds feature detection for FEAT_FPMR. It is somewhat of a
special case because FPMR only exists as a feature in LLVM 18, it has
been removed from the LLVM upstream. On that account the intention is
for it to be detectable at runtime through stdarch but not have a
corresponding compile-time Rust target feature.

Linux features: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
LLVM features: llvm-project/llvm/lib/Target/AArch64/AArch64.td
---
 .../crates/std_detect/src/detect/arch/aarch64.rs   |  63 ++++++++++-
 .../std_detect/src/detect/os/linux/aarch64.rs      | 117 +++++++++++++++++++--
 .../crates/std_detect/tests/cpu-detection.rs       |  18 ++++
 .../std_detect/tests/macro_trailing_commas.rs      |   1 +
 4 files changed, 190 insertions(+), 9 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 742c2bebd98..2b833599b50 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -22,22 +22,27 @@ features! {
     /// * `"crc"` - FEAT_CRC
     /// * `"lse"` - FEAT_LSE
     /// * `"lse2"` - FEAT_LSE2
+    /// * `"lse128"` - FEAT_LSE128
     /// * `"rdm"` - FEAT_RDM
     /// * `"rcpc"` - FEAT_LRCPC
     /// * `"rcpc2"` - FEAT_LRCPC2
+    /// * `"rcpc3"` - FEAT_LRCPC3
     /// * `"dotprod"` - FEAT_DotProd
     /// * `"tme"` - FEAT_TME
     /// * `"fhm"` - FEAT_FHM
     /// * `"dit"` - FEAT_DIT
     /// * `"flagm"` - FEAT_FLAGM
+    /// * `"flagm2"` - FEAT_FLAGM2
     /// * `"ssbs"` - FEAT_SSBS & FEAT_SSBS2
     /// * `"sb"` - FEAT_SB
     /// * `"paca"` - FEAT_PAuth (address authentication)
     /// * `"pacg"` - FEAT_Pauth (generic authentication)
     /// * `"dpb"` - FEAT_DPB
     /// * `"dpb2"` - FEAT_DPB2
+    /// * `"sve-b16b16"` - FEAT_SVE_B16B16
     /// * `"sve2"` - FEAT_SVE2
-    /// * `"sve2-aes"` - FEAT_SVE2_AES
+    /// * `"sve2p1"` - FEAT_SVE2p1
+    /// * `"sve2-aes"` - FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto)
     /// * `"sve2-sm4"` - FEAT_SVE2_SM4
     /// * `"sve2-sha3"` - FEAT_SVE2_SHA3
     /// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm
@@ -55,6 +60,18 @@ features! {
     /// * `"sha2"` - FEAT_SHA1 & FEAT_SHA256
     /// * `"sha3"` - FEAT_SHA512 & FEAT_SHA3
     /// * `"sm4"` - FEAT_SM3 & FEAT_SM4
+    /// * `"hbc"` - FEAT_HBC
+    /// * `"mops"` - FEAT_MOPS
+    /// * `"ecv"` - FEAT_ECV
+    /// * `"cssc"` - FEAT_CSSC
+    /// * `"fpmr"` - FEAT_FPMR
+    /// * `"lut"` - FEAT_LUT
+    /// * `"faminmax"` - FEAT_FAMINMAX
+    /// * `"fp8"` - FEAT_FP8
+    /// * `"fp8fma"` - FEAT_FP8FMA
+    /// * `"fp8dot4"` - FEAT_FP8DOT4
+    /// * `"fp8dot2"` - FEAT_FP8DOT2
+    /// * `"wfxt"` - FEAT_WFxT
     ///
     /// [docs]: https://developer.arm.com/documentation/ddi0487/latest
     #[stable(feature = "simd_aarch64", since = "1.60.0")]
@@ -67,6 +84,14 @@ features! {
     @NO_RUNTIME_DETECTION: "v8.5a";
     @NO_RUNTIME_DETECTION: "v8.6a";
     @NO_RUNTIME_DETECTION: "v8.7a";
+    @NO_RUNTIME_DETECTION: "v8.8a";
+    @NO_RUNTIME_DETECTION: "v8.9a";
+    @NO_RUNTIME_DETECTION: "v9.1a";
+    @NO_RUNTIME_DETECTION: "v9.2a";
+    @NO_RUNTIME_DETECTION: "v9.3a";
+    @NO_RUNTIME_DETECTION: "v9.4a";
+    @NO_RUNTIME_DETECTION: "v9.5a";
+    @NO_RUNTIME_DETECTION: "v9a";
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] asimd: "neon";
     /// FEAT_AdvSIMD (Advanced SIMD/NEON)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pmull: "pmull";
@@ -85,12 +110,16 @@ features! {
     /// FEAT_LSE (Large System Extension - atomics)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse2: "lse2";
     /// FEAT_LSE2 (unaligned and register-pair atomics)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lse128: "lse128";
+    /// FEAT_LSE128 (128-bit atomics)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rdm: "rdm";
     /// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc";
     /// FEAT_LRCPC (Release consistent Processor consistent)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc2: "rcpc2";
     /// FEAT_LRCPC2 (RCPC with immediate offsets)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] rcpc3: "rcpc3";
+    /// FEAT_LRCPC3 (RCPC Instructions v3)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dotprod: "dotprod";
     /// FEAT_DotProd (Vector Dot-Product - ASIMDDP)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] tme: "tme";
@@ -101,6 +130,8 @@ features! {
     /// FEAT_DIT (Data Independent Timing instructions)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] flagm: "flagm";
     /// FEAT_FLAGM (flag manipulation instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] flagm2: "flagm2";
+    /// FEAT_FLAGM2 (flag manipulation instructions)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs";
     /// FEAT_SSBS & FEAT_SSBS2 (speculative store bypass safe)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb";
@@ -115,14 +146,18 @@ features! {
     /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2: "sve2";
     /// FEAT_SVE2 (Scalable Vector Extension 2)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve2p1: "sve2p1";
+    /// FEAT_SVE2p1 (Scalable Vector Extension 2.1)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes";
-    /// FEAT_SVE_AES (SVE2 AES crypto)
+    /// FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sm4: "sve2-sm4";
     /// FEAT_SVE_SM4 (SVE2 SM4 crypto)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3";
     /// FEAT_SVE_SHA3 (SVE2 SHA3 crypto)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm";
     /// FEAT_SVE_BitPerm (SVE2 bit permutation instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve_b16b16: "sve-b16b16";
+    /// FEAT_SVE_B16B16 (SVE or SME Instructions)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] frintts: "frintts";
     /// FEAT_FRINTTS (float to integer rounding instructions)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] i8mm: "i8mm";
@@ -151,4 +186,28 @@ features! {
     /// FEAT_SHA512 & FEAT_SHA3 (SHA2-512 & SHA3 instructions)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sm4: "sm4";
     /// FEAT_SM3 & FEAT_SM4 (SM3 & SM4 instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] hbc: "hbc";
+    /// FEAT_HBC (Hinted conditional branches)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] mops: "mops";
+    /// FEAT_MOPS (Standardization of memory operations)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ecv: "ecv";
+    /// FEAT_ECV (Enhanced Counter Virtualization)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] cssc: "cssc";
+    /// FEAT_CSSC (Common Short Sequence Compression instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fpmr: "fpmr";
+    /// FEAT_FPMR (Special-purpose AArch64-FPMR register)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lut: "lut";
+    /// FEAT_LUT (Lookup Table Instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] faminmax: "faminmax";
+    /// FEAT_FAMINMAX (FAMIN and FAMAX SIMD/SVE/SME instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8: "fp8";
+    /// FEAT_FP8 (F8CVT Instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8fma: "fp8fma";
+    /// FEAT_FP8FMA (F8FMA Instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot4: "fp8dot4";
+    /// FEAT_FP8DOT4 (F8DP4 Instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot2: "fp8dot2";
+    /// FEAT_FP8DOT2 (F8DP2 Instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt";
+    /// FEAT_WFxT (WFET and WFIT Instructions)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index 01ce47806cc..e8053b644aa 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -83,11 +83,11 @@ struct AtHwcap {
     dcpodp: bool,
     sve2: bool,
     sveaes: bool,
-    // svepmull: No LLVM support.
+    svepmull: bool,
     svebitperm: bool,
     svesha3: bool,
     svesm4: bool,
-    // flagm2: No LLVM support.
+    flagm2: bool,
     frint: bool,
     // svei8mm: See i8mm feature.
     svef32mm: bool,
@@ -99,6 +99,31 @@ struct AtHwcap {
     rng: bool,
     bti: bool,
     mte: bool,
+    ecv: bool,
+    // afp: bool,
+    // rpres: bool,
+    // mte3: bool,
+    wfxt: bool,
+    // ebf16: bool,
+    // sveebf16: bool,
+    cssc: bool,
+    // rprfm: bool,
+    sve2p1: bool,
+    smeb16b16: bool,
+    mops: bool,
+    hbc: bool,
+    sveb16b16: bool,
+    lrcpc3: bool,
+    lse128: bool,
+    fpmr: bool,
+    lut: bool,
+    faminmax: bool,
+    f8cvt: bool,
+    f8fma: bool,
+    f8dp4: bool,
+    f8dp2: bool,
+    f8e4m3: bool,
+    f8e5m2: bool,
 }
 
 impl From<auxvec::AuxVec> for AtHwcap {
@@ -137,14 +162,16 @@ impl From<auxvec::AuxVec> for AtHwcap {
             sb: bit::test(auxv.hwcap, 29),
             paca: bit::test(auxv.hwcap, 30),
             pacg: bit::test(auxv.hwcap, 31),
+
+            // AT_HWCAP2
             dcpodp: bit::test(auxv.hwcap2, 0),
             sve2: bit::test(auxv.hwcap2, 1),
             sveaes: bit::test(auxv.hwcap2, 2),
-            // svepmull: bit::test(auxv.hwcap2, 3),
+            svepmull: bit::test(auxv.hwcap2, 3),
             svebitperm: bit::test(auxv.hwcap2, 4),
             svesha3: bit::test(auxv.hwcap2, 5),
             svesm4: bit::test(auxv.hwcap2, 6),
-            // flagm2: bit::test(auxv.hwcap2, 7),
+            flagm2: bit::test(auxv.hwcap2, 7),
             frint: bit::test(auxv.hwcap2, 8),
             // svei8mm: bit::test(auxv.hwcap2, 9),
             svef32mm: bit::test(auxv.hwcap2, 10),
@@ -156,6 +183,31 @@ impl From<auxvec::AuxVec> for AtHwcap {
             rng: bit::test(auxv.hwcap2, 16),
             bti: bit::test(auxv.hwcap2, 17),
             mte: bit::test(auxv.hwcap2, 18),
+            ecv: bit::test(auxv.hwcap2, 19),
+            // afp: bit::test(auxv.hwcap2, 20),
+            // rpres: bit::test(auxv.hwcap2, 21),
+            // mte3: bit::test(auxv.hwcap2, 22),
+            wfxt: bit::test(auxv.hwcap2, 31),
+            // ebf16: bit::test(auxv.hwcap2, 32),
+            // sveebf16: bit::test(auxv.hwcap2, 33),
+            cssc: bit::test(auxv.hwcap2, 34),
+            // rprfm: bit::test(auxv.hwcap2, 35),
+            sve2p1: bit::test(auxv.hwcap2, 36),
+            smeb16b16: bit::test(auxv.hwcap2, 41),
+            mops: bit::test(auxv.hwcap2, 43),
+            hbc: bit::test(auxv.hwcap2, 44),
+            sveb16b16: bit::test(auxv.hwcap2, 45),
+            lrcpc3: bit::test(auxv.hwcap2, 46),
+            lse128: bit::test(auxv.hwcap2, 47),
+            fpmr: bit::test(auxv.hwcap2, 48),
+            lut: bit::test(auxv.hwcap2, 49),
+            faminmax: bit::test(auxv.hwcap2, 50),
+            f8cvt: bit::test(auxv.hwcap2, 51),
+            f8fma: bit::test(auxv.hwcap2, 52),
+            f8dp4: bit::test(auxv.hwcap2, 53),
+            f8dp2: bit::test(auxv.hwcap2, 54),
+            f8e4m3: bit::test(auxv.hwcap2, 55),
+            f8e5m2: bit::test(auxv.hwcap2, 56),
         }
     }
 }
@@ -201,14 +253,16 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
             sb: f.has("sb"),
             paca: f.has("paca"),
             pacg: f.has("pacg"),
+
+            // AT_HWCAP2
             dcpodp: f.has("dcpodp"),
             sve2: f.has("sve2"),
             sveaes: f.has("sveaes"),
-            // svepmull: f.has("svepmull"),
+            svepmull: f.has("svepmull"),
             svebitperm: f.has("svebitperm"),
             svesha3: f.has("svesha3"),
             svesm4: f.has("svesm4"),
-            // flagm2: f.has("flagm2"),
+            flagm2: f.has("flagm2"),
             frint: f.has("frint"),
             // svei8mm: f.has("svei8mm"),
             svef32mm: f.has("svef32mm"),
@@ -220,6 +274,31 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
             rng: f.has("rng"),
             bti: f.has("bti"),
             mte: f.has("mte"),
+            ecv: f.has("ecv"),
+            // afp: f.has("afp"),
+            // rpres: f.has("rpres"),
+            // mte3: f.has("mte3"),
+            wfxt: f.has("wfxt"),
+            // ebf16: f.has("ebf16"),
+            // sveebf16: f.has("sveebf16"),
+            cssc: f.has("cssc"),
+            // rprfm: f.has("rprfm"),
+            sve2p1: f.has("sve2p1"),
+            smeb16b16: f.has("smeb16b16"),
+            mops: f.has("mops"),
+            hbc: f.has("hbc"),
+            sveb16b16: f.has("sveb16b16"),
+            lrcpc3: f.has("lrcpc3"),
+            lse128: f.has("lse128"),
+            fpmr: f.has("fpmr"),
+            lut: f.has("lut"),
+            faminmax: f.has("faminmax"),
+            f8cvt: f.has("f8cvt"),
+            f8fma: f.has("f8fma"),
+            f8dp4: f.has("f8dp4"),
+            f8dp2: f.has("f8dp2"),
+            f8e4m3: f.has("f8e4m3"),
+            f8e5m2: f.has("f8e5m2"),
         }
     }
 }
@@ -267,11 +346,14 @@ impl AtHwcap {
             enable_feature(Feature::crc, self.crc32);
             enable_feature(Feature::lse, self.atomics);
             enable_feature(Feature::lse2, self.uscat);
+            enable_feature(Feature::lse128, self.lse128);
             enable_feature(Feature::rcpc, self.lrcpc);
             // RCPC2 (rcpc-immo in LLVM) requires RCPC support
             enable_feature(Feature::rcpc2, self.ilrcpc && self.lrcpc);
+            enable_feature(Feature::rcpc3, self.lrcpc3);
             enable_feature(Feature::dit, self.dit);
             enable_feature(Feature::flagm, self.flagm);
+            enable_feature(Feature::flagm2, self.flagm2);
             enable_feature(Feature::ssbs, self.ssbs);
             enable_feature(Feature::sb, self.sb);
             enable_feature(Feature::paca, self.paca);
@@ -317,8 +399,12 @@ impl AtHwcap {
             // SVE2 requires SVE
             let sve2 = self.sve2 && self.sve && asimd;
             enable_feature(Feature::sve2, sve2);
+            enable_feature(Feature::sve2p1, self.sve2p1);
             // SVE2 extensions require SVE2 and crypto features
-            enable_feature(Feature::sve2_aes, self.sveaes && sve2 && self.aes);
+            enable_feature(
+                Feature::sve2_aes,
+                self.sveaes && self.svepmull && sve2 && self.aes,
+            );
             enable_feature(
                 Feature::sve2_sm4,
                 self.svesm4 && sve2 && self.sm3 && self.sm4,
@@ -328,6 +414,23 @@ impl AtHwcap {
                 self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2,
             );
             enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2);
+            // SVE_B16B16 can be implemented either for SVE or SME
+            enable_feature(
+                Feature::sve_b16b16,
+                self.bf16 && (self.sveb16b16 || self.smeb16b16),
+            );
+            enable_feature(Feature::hbc, self.hbc);
+            enable_feature(Feature::mops, self.mops);
+            enable_feature(Feature::ecv, self.ecv);
+            enable_feature(Feature::lut, self.lut);
+            enable_feature(Feature::cssc, self.cssc);
+            enable_feature(Feature::fpmr, self.fpmr);
+            enable_feature(Feature::faminmax, self.faminmax);
+            enable_feature(Feature::fp8, self.f8cvt);
+            enable_feature(Feature::fp8fma, self.f8fma);
+            enable_feature(Feature::fp8dot4, self.f8dp4);
+            enable_feature(Feature::fp8dot2, self.f8dp2);
+            enable_feature(Feature::wfxt, self.wfxt);
         }
         value
     }
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 615268e8768..b43449c7f6f 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -1,6 +1,7 @@
 #![allow(internal_features)]
 #![feature(stdarch_internal)]
 #![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))]
+#![cfg_attr(target_arch = "aarch64", feature(stdarch_aarch64_feature_detection))]
 #![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))]
 #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))]
 #![cfg_attr(
@@ -67,21 +68,26 @@ fn aarch64_linux() {
     println!("crc: {}", is_aarch64_feature_detected!("crc"));
     println!("lse: {}", is_aarch64_feature_detected!("lse"));
     println!("lse2: {}", is_aarch64_feature_detected!("lse2"));
+    println!("lse128: {}", is_aarch64_feature_detected!("lse128"));
     println!("rdm: {}", is_aarch64_feature_detected!("rdm"));
     println!("rcpc: {}", is_aarch64_feature_detected!("rcpc"));
     println!("rcpc2: {}", is_aarch64_feature_detected!("rcpc2"));
+    println!("rcpc3: {}", is_aarch64_feature_detected!("rcpc3"));
     println!("dotprod: {}", is_aarch64_feature_detected!("dotprod"));
     println!("tme: {}", is_aarch64_feature_detected!("tme"));
     println!("fhm: {}", is_aarch64_feature_detected!("fhm"));
     println!("dit: {}", is_aarch64_feature_detected!("dit"));
     println!("flagm: {}", is_aarch64_feature_detected!("flagm"));
+    println!("flagm2: {}", is_aarch64_feature_detected!("flagm2"));
     println!("ssbs: {}", is_aarch64_feature_detected!("ssbs"));
     println!("sb: {}", is_aarch64_feature_detected!("sb"));
     println!("paca: {}", is_aarch64_feature_detected!("paca"));
     println!("pacg: {}", is_aarch64_feature_detected!("pacg"));
     println!("dpb: {}", is_aarch64_feature_detected!("dpb"));
     println!("dpb2: {}", is_aarch64_feature_detected!("dpb2"));
+    println!("sve-b16b16: {}", is_aarch64_feature_detected!("sve-b16b16"));
     println!("sve2: {}", is_aarch64_feature_detected!("sve2"));
+    println!("sve2p1: {}", is_aarch64_feature_detected!("sve2p1"));
     println!("sve2-aes: {}", is_aarch64_feature_detected!("sve2-aes"));
     println!("sve2-sm4: {}", is_aarch64_feature_detected!("sve2-sm4"));
     println!("sve2-sha3: {}", is_aarch64_feature_detected!("sve2-sha3"));
@@ -103,6 +109,18 @@ fn aarch64_linux() {
     println!("sha2: {}", is_aarch64_feature_detected!("sha2"));
     println!("sha3: {}", is_aarch64_feature_detected!("sha3"));
     println!("sm4: {}", is_aarch64_feature_detected!("sm4"));
+    println!("hbc: {}", is_aarch64_feature_detected!("hbc"));
+    println!("mops: {}", is_aarch64_feature_detected!("mops"));
+    println!("ecv: {}", is_aarch64_feature_detected!("ecv"));
+    println!("cssc: {}", is_aarch64_feature_detected!("cssc"));
+    println!("fpmr: {}", is_aarch64_feature_detected!("fpmr"));
+    println!("lut: {}", is_aarch64_feature_detected!("lut"));
+    println!("faminmax: {}", is_aarch64_feature_detected!("faminmax"));
+    println!("fp8: {}", is_aarch64_feature_detected!("fp8"));
+    println!("fp8fma: {}", is_aarch64_feature_detected!("fp8fma"));
+    println!("fp8dot4: {}", is_aarch64_feature_detected!("fp8dot4"));
+    println!("fp8dot2: {}", is_aarch64_feature_detected!("fp8dot2"));
+    println!("wfxt: {}", is_aarch64_feature_detected!("wfxt"));
 }
 
 #[test]
diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
index 1d1ed4d3868..9f6ef074d00 100644
--- a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
+++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
@@ -12,6 +12,7 @@
     feature(stdarch_internal)
 )]
 #![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))]
+#![cfg_attr(target_arch = "aarch64", feature(stdarch_aarch64_feature_detection))]
 #![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))]
 #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))]
 #![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)]
-- 
cgit 1.4.1-3-g733a5


From ef538bc614a9c4e2c5730400576d53136ea2080a Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski@arm.com>
Date: Mon, 17 Jun 2024 14:25:16 +0100
Subject: std_detect: Add aarch64/linux/LLVM SME features

Add detection for SME features supported by LLVM and the Linux Kernel.
Include commented-out hwcap fields for features supported by Linux but not by LLVM.

This commit adds feature detection for the following features:

- FEAT_SME
- FEAT_SME_F16F16
- FEAT_SME_F64F64
- FEAT_SME_F8F16
- FEAT_SME_F8F32
- FEAT_SME_FA64
- FEAT_SME_I16I64
- FEAT_SME_LUTv2
- FEAT_SME2
- FEAT_SME2p1
- FEAT_SSVE_FP8DOT2
- FEAT_SSVE_FP8DOT4
- FEAT_SSVE_FP8FMA

Linux features: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
LLVM features: llvm-project/llvm/lib/Target/AArch64/AArch64.td
---
 .../crates/std_detect/src/detect/arch/aarch64.rs   | 39 +++++++++++
 .../std_detect/src/detect/os/linux/aarch64.rs      | 76 ++++++++++++++++++++++
 .../crates/std_detect/tests/cpu-detection.rs       | 22 +++++++
 3 files changed, 137 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 2b833599b50..ad64bb3588b 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -72,6 +72,19 @@ features! {
     /// * `"fp8dot4"` - FEAT_FP8DOT4
     /// * `"fp8dot2"` - FEAT_FP8DOT2
     /// * `"wfxt"` - FEAT_WFxT
+    /// * `"sme"` - FEAT_SME
+    /// * `"sme-i16i64"` - FEAT_SME_I16I64
+    /// * `"sme-f64f64"` - FEAT_SME_F64F64
+    /// * `"sme-fa64"` - FEAT_SME_FA64
+    /// * `"sme2"` - FEAT_SME2
+    /// * `"sme2p1"` - FEAT_SME2p1
+    /// * `"sme-f16f16"` - FEAT_SME_F16F16
+    /// * `"sme-lutv2"` - FEAT_SME_LUTv2
+    /// * `"sme-f8f16"` - FEAT_SME_F8F16
+    /// * `"sme-f8f32"` - FEAT_SME_F8F32
+    /// * `"ssve-fp8fma"` - FEAT_SSVE_FP8FMA
+    /// * `"ssve-fp8dot4"` - FEAT_SSVE_FP8DOT4
+    /// * `"ssve-fp8dot2"` - FEAT_SSVE_FP8DOT2
     ///
     /// [docs]: https://developer.arm.com/documentation/ddi0487/latest
     #[stable(feature = "simd_aarch64", since = "1.60.0")]
@@ -210,4 +223,30 @@ features! {
     /// FEAT_FP8DOT2 (F8DP2 Instructions)
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt";
     /// FEAT_WFxT (WFET and WFIT Instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme: "sme";
+    /// FEAT_SME (Scalable Matrix Extension)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_i16i64: "sme-i16i64";
+    /// FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f64f64: "sme-f64f64";
+    /// FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_fa64: "sme-fa64";
+    /// FEAT_SME_FA64 (Full A64 instruction set support in Streaming SVE mode)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2: "sme2";
+    /// FEAT_SME2 (SME Version 2)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2p1: "sme2p1";
+    /// FEAT_SME2p1 (SME Version 2.1)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f16f16: "sme-f16f16";
+    /// FEAT_SME_F16F16 (Non-widening half-precision FP16 to FP16 arithmetic for SME2)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_lutv2: "sme-lutv2";
+    /// FEAT_SME_LUTv2 (LUTI4 Instruction)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f16: "sme-f8f16";
+    /// FEAT_SME_F8F16
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f32: "sme-f8f32";
+    /// FEAT_SME_F8F32
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8fma: "ssve-fp8fma";
+    /// FEAT_SSVE_FP8FMA
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot4: "ssve-fp8dot4";
+    /// FEAT_SSVE_FP8DOT4
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot2: "ssve-fp8dot2";
+    /// FEAT_SSVE_FP8DOT2
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index e8053b644aa..6209a4a3443 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -103,13 +103,26 @@ struct AtHwcap {
     // afp: bool,
     // rpres: bool,
     // mte3: bool,
+    sme: bool,
+    smei16i64: bool,
+    smef64f64: bool,
+    // smei8i32: bool,
+    // smef16f32: bool,
+    // smeb16f32: bool,
+    // smef32f32: bool,
+    smefa64: bool,
     wfxt: bool,
     // ebf16: bool,
     // sveebf16: bool,
     cssc: bool,
     // rprfm: bool,
     sve2p1: bool,
+    sme2: bool,
+    sme2p1: bool,
+    // smei16i32: bool,
+    // smebi32i32: bool,
     smeb16b16: bool,
+    smef16f16: bool,
     mops: bool,
     hbc: bool,
     sveb16b16: bool,
@@ -124,6 +137,12 @@ struct AtHwcap {
     f8dp2: bool,
     f8e4m3: bool,
     f8e5m2: bool,
+    smelutv2: bool,
+    smef8f16: bool,
+    smef8f32: bool,
+    smesf8fma: bool,
+    smesf8dp4: bool,
+    smesf8dp2: bool,
 }
 
 impl From<auxvec::AuxVec> for AtHwcap {
@@ -187,13 +206,26 @@ impl From<auxvec::AuxVec> for AtHwcap {
             // afp: bit::test(auxv.hwcap2, 20),
             // rpres: bit::test(auxv.hwcap2, 21),
             // mte3: bit::test(auxv.hwcap2, 22),
+            sme: bit::test(auxv.hwcap2, 23),
+            smei16i64: bit::test(auxv.hwcap2, 24),
+            smef64f64: bit::test(auxv.hwcap2, 25),
+            // smei8i32: bit::test(auxv.hwcap2, 26),
+            // smef16f32: bit::test(auxv.hwcap2, 27),
+            // smeb16f32: bit::test(auxv.hwcap2, 28),
+            // smef32f32: bit::test(auxv.hwcap2, 29),
+            smefa64: bit::test(auxv.hwcap2, 30),
             wfxt: bit::test(auxv.hwcap2, 31),
             // ebf16: bit::test(auxv.hwcap2, 32),
             // sveebf16: bit::test(auxv.hwcap2, 33),
             cssc: bit::test(auxv.hwcap2, 34),
             // rprfm: bit::test(auxv.hwcap2, 35),
             sve2p1: bit::test(auxv.hwcap2, 36),
+            sme2: bit::test(auxv.hwcap2, 37),
+            sme2p1: bit::test(auxv.hwcap2, 38),
+            // smei16i32: bit::test(auxv.hwcap2, 39),
+            // smebi32i32: bit::test(auxv.hwcap2, 40),
             smeb16b16: bit::test(auxv.hwcap2, 41),
+            smef16f16: bit::test(auxv.hwcap2, 42),
             mops: bit::test(auxv.hwcap2, 43),
             hbc: bit::test(auxv.hwcap2, 44),
             sveb16b16: bit::test(auxv.hwcap2, 45),
@@ -208,6 +240,12 @@ impl From<auxvec::AuxVec> for AtHwcap {
             f8dp2: bit::test(auxv.hwcap2, 54),
             f8e4m3: bit::test(auxv.hwcap2, 55),
             f8e5m2: bit::test(auxv.hwcap2, 56),
+            smelutv2: bit::test(auxv.hwcap2, 57),
+            smef8f16: bit::test(auxv.hwcap2, 58),
+            smef8f32: bit::test(auxv.hwcap2, 59),
+            smesf8fma: bit::test(auxv.hwcap2, 60),
+            smesf8dp4: bit::test(auxv.hwcap2, 61),
+            smesf8dp2: bit::test(auxv.hwcap2, 62),
         }
     }
 }
@@ -278,13 +316,26 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
             // afp: f.has("afp"),
             // rpres: f.has("rpres"),
             // mte3: f.has("mte3"),
+            sme: f.has("sme"),
+            smei16i64: f.has("smei16i64"),
+            smef64f64: f.has("smef64f64"),
+            // smei8i32: f.has("smei8i32"),
+            // smef16f32: f.has("smef16f32"),
+            // smeb16f32: f.has("smeb16f32"),
+            // smef32f32: f.has("smef32f32"),
+            smefa64: f.has("smefa64"),
             wfxt: f.has("wfxt"),
             // ebf16: f.has("ebf16"),
             // sveebf16: f.has("sveebf16"),
             cssc: f.has("cssc"),
             // rprfm: f.has("rprfm"),
             sve2p1: f.has("sve2p1"),
+            sme2: f.has("sme2"),
+            sme2p1: f.has("sme2p1"),
+            // smei16i32: f.has("smei16i32"),
+            // smebi32i32: f.has("smebi32i32"),
             smeb16b16: f.has("smeb16b16"),
+            smef16f16: f.has("smef16f16"),
             mops: f.has("mops"),
             hbc: f.has("hbc"),
             sveb16b16: f.has("sveb16b16"),
@@ -299,6 +350,12 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
             f8dp2: f.has("f8dp2"),
             f8e4m3: f.has("f8e4m3"),
             f8e5m2: f.has("f8e5m2"),
+            smelutv2: f.has("smelutv2"),
+            smef8f16: f.has("smef8f16"),
+            smef8f32: f.has("smef8f32"),
+            smesf8fma: f.has("smesf8fma"),
+            smesf8dp4: f.has("smesf8dp4"),
+            smesf8dp2: f.has("smesf8dp2"),
         }
     }
 }
@@ -431,6 +488,25 @@ impl AtHwcap {
             enable_feature(Feature::fp8dot4, self.f8dp4);
             enable_feature(Feature::fp8dot2, self.f8dp2);
             enable_feature(Feature::wfxt, self.wfxt);
+            enable_feature(Feature::sme, self.sme && self.bf16);
+            enable_feature(Feature::sme_i16i64, self.smei16i64 && self.sme);
+            enable_feature(Feature::sme_f64f64, self.smef64f64 && self.sme);
+            // enable_feature(Feature::sme_i8i32, self.smei8i32);
+            // enable_feature(Feature::sme_f16f32, self.smef16f32);
+            // enable_feature(Feature::sme_b16f32, self.smeb16f32);
+            // enable_feature(Feature::sme_f32f32, self.smef32f32);
+            enable_feature(Feature::sme_fa64, self.smefa64 && self.sme && sve2);
+            enable_feature(Feature::sme2, self.sme2 && self.sme);
+            enable_feature(Feature::sme2p1, self.sme2p1 && self.sme2 && self.sme);
+            // enable_feature(Feature::sme_i16i32, self.smei16i32);
+            // enable_feature(Feature::sme_bi32i32, self.smebi32i32);
+            enable_feature(Feature::sme_f16f16, self.smef16f16);
+            enable_feature(Feature::sme_lutv2, self.smelutv2);
+            enable_feature(Feature::sme_f8f16, self.smef8f16 && self.sme2 && self.f8cvt);
+            enable_feature(Feature::sme_f8f32, self.smef8f32 && self.sme2 && self.f8cvt);
+            enable_feature(Feature::ssve_fp8fma, self.smesf8fma && self.sme2);
+            enable_feature(Feature::ssve_fp8dot4, self.smesf8dp4 && self.sme2);
+            enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && self.sme2);
         }
         value
     }
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index b43449c7f6f..cecc53afa4b 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -121,6 +121,28 @@ fn aarch64_linux() {
     println!("fp8dot4: {}", is_aarch64_feature_detected!("fp8dot4"));
     println!("fp8dot2: {}", is_aarch64_feature_detected!("fp8dot2"));
     println!("wfxt: {}", is_aarch64_feature_detected!("wfxt"));
+    println!("sme: {}", is_aarch64_feature_detected!("sme"));
+    println!("sme-i16i64: {}", is_aarch64_feature_detected!("sme-i16i64"));
+    println!("sme-f64f64: {}", is_aarch64_feature_detected!("sme-f64f64"));
+    println!("sme-fa64: {}", is_aarch64_feature_detected!("sme-fa64"));
+    println!("sme2: {}", is_aarch64_feature_detected!("sme2"));
+    println!("sme2p1: {}", is_aarch64_feature_detected!("sme2p1"));
+    println!("sme-f16f16: {}", is_aarch64_feature_detected!("sme-f16f16"));
+    println!("sme-lutv2: {}", is_aarch64_feature_detected!("sme-lutv2"));
+    println!("sme-f8f16: {}", is_aarch64_feature_detected!("sme-f8f16"));
+    println!("sme-f8f32: {}", is_aarch64_feature_detected!("sme-f8f32"));
+    println!(
+        "ssve-fp8fma: {}",
+        is_aarch64_feature_detected!("ssve-fp8fma")
+    );
+    println!(
+        "ssve-fp8dot4: {}",
+        is_aarch64_feature_detected!("ssve-fp8dot4")
+    );
+    println!(
+        "ssve-fp8dot2: {}",
+        is_aarch64_feature_detected!("ssve-fp8dot2")
+    );
 }
 
 #[test]
-- 
cgit 1.4.1-3-g733a5


From 41dc17d3e5ca246f818419342d18bb6ac338c600 Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski@arm.com>
Date: Tue, 2 Jul 2024 17:30:53 +0100
Subject: std_detect: Sort aarch64 features

Alphabetically sort the list of aarch64 features.
The list was getting a bit too chaotic so it was worth properly
sorting.
---
 .../crates/std_detect/src/detect/arch/aarch64.rs   | 306 ++++++++++-----------
 1 file changed, 153 insertions(+), 153 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index ad64bb3588b..b75fe933d2f 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -14,77 +14,77 @@ features! {
     ///
     /// ## Supported arguments
     ///
+    /// * `"aes"` - FEAT_AES & FEAT_PMULL
     /// * `"asimd"` or "neon" - FEAT_AdvSIMD
-    /// * `"pmull"` - FEAT_PMULL
-    /// * `"fp"` - FEAT_FP
-    /// * `"fp16"` - FEAT_FP16
-    /// * `"sve"` - FEAT_SVE
+    /// * `"bf16"` - FEAT_BF16
+    /// * `"bti"` - FEAT_BTI
     /// * `"crc"` - FEAT_CRC
-    /// * `"lse"` - FEAT_LSE
-    /// * `"lse2"` - FEAT_LSE2
-    /// * `"lse128"` - FEAT_LSE128
-    /// * `"rdm"` - FEAT_RDM
-    /// * `"rcpc"` - FEAT_LRCPC
-    /// * `"rcpc2"` - FEAT_LRCPC2
-    /// * `"rcpc3"` - FEAT_LRCPC3
+    /// * `"cssc"` - FEAT_CSSC
+    /// * `"dit"` - FEAT_DIT
     /// * `"dotprod"` - FEAT_DotProd
-    /// * `"tme"` - FEAT_TME
+    /// * `"dpb"` - FEAT_DPB
+    /// * `"dpb2"` - FEAT_DPB2
+    /// * `"ecv"` - FEAT_ECV
+    /// * `"f32mm"` - FEAT_F32MM
+    /// * `"f64mm"` - FEAT_F64MM
+    /// * `"faminmax"` - FEAT_FAMINMAX
+    /// * `"fcma"` - FEAT_FCMA
     /// * `"fhm"` - FEAT_FHM
-    /// * `"dit"` - FEAT_DIT
     /// * `"flagm"` - FEAT_FLAGM
     /// * `"flagm2"` - FEAT_FLAGM2
-    /// * `"ssbs"` - FEAT_SSBS & FEAT_SSBS2
-    /// * `"sb"` - FEAT_SB
-    /// * `"paca"` - FEAT_PAuth (address authentication)
-    /// * `"pacg"` - FEAT_Pauth (generic authentication)
-    /// * `"dpb"` - FEAT_DPB
-    /// * `"dpb2"` - FEAT_DPB2
-    /// * `"sve-b16b16"` - FEAT_SVE_B16B16
-    /// * `"sve2"` - FEAT_SVE2
-    /// * `"sve2p1"` - FEAT_SVE2p1
-    /// * `"sve2-aes"` - FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto)
-    /// * `"sve2-sm4"` - FEAT_SVE2_SM4
-    /// * `"sve2-sha3"` - FEAT_SVE2_SHA3
-    /// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm
+    /// * `"fp"` - FEAT_FP
+    /// * `"fp16"` - FEAT_FP16
+    /// * `"fp8"` - FEAT_FP8
+    /// * `"fp8dot2"` - FEAT_FP8DOT2
+    /// * `"fp8dot4"` - FEAT_FP8DOT4
+    /// * `"fp8fma"` - FEAT_FP8FMA
+    /// * `"fpmr"` - FEAT_FPMR
     /// * `"frintts"` - FEAT_FRINTTS
+    /// * `"hbc"` - FEAT_HBC
     /// * `"i8mm"` - FEAT_I8MM
-    /// * `"f32mm"` - FEAT_F32MM
-    /// * `"f64mm"` - FEAT_F64MM
-    /// * `"bf16"` - FEAT_BF16
-    /// * `"rand"` - FEAT_RNG
-    /// * `"bti"` - FEAT_BTI
-    /// * `"mte"` - FEAT_MTE & FEAT_MTE2
     /// * `"jsconv"` - FEAT_JSCVT
-    /// * `"fcma"` - FEAT_FCMA
-    /// * `"aes"` - FEAT_AES & FEAT_PMULL
+    /// * `"lse"` - FEAT_LSE
+    /// * `"lse128"` - FEAT_LSE128
+    /// * `"lse2"` - FEAT_LSE2
+    /// * `"lut"` - FEAT_LUT
+    /// * `"mops"` - FEAT_MOPS
+    /// * `"mte"` - FEAT_MTE & FEAT_MTE2
+    /// * `"paca"` - FEAT_PAuth (address authentication)
+    /// * `"pacg"` - FEAT_Pauth (generic authentication)
+    /// * `"pmull"` - FEAT_PMULL
+    /// * `"rand"` - FEAT_RNG
+    /// * `"rcpc"` - FEAT_LRCPC
+    /// * `"rcpc2"` - FEAT_LRCPC2
+    /// * `"rcpc3"` - FEAT_LRCPC3
+    /// * `"rdm"` - FEAT_RDM
+    /// * `"sb"` - FEAT_SB
     /// * `"sha2"` - FEAT_SHA1 & FEAT_SHA256
     /// * `"sha3"` - FEAT_SHA512 & FEAT_SHA3
     /// * `"sm4"` - FEAT_SM3 & FEAT_SM4
-    /// * `"hbc"` - FEAT_HBC
-    /// * `"mops"` - FEAT_MOPS
-    /// * `"ecv"` - FEAT_ECV
-    /// * `"cssc"` - FEAT_CSSC
-    /// * `"fpmr"` - FEAT_FPMR
-    /// * `"lut"` - FEAT_LUT
-    /// * `"faminmax"` - FEAT_FAMINMAX
-    /// * `"fp8"` - FEAT_FP8
-    /// * `"fp8fma"` - FEAT_FP8FMA
-    /// * `"fp8dot4"` - FEAT_FP8DOT4
-    /// * `"fp8dot2"` - FEAT_FP8DOT2
-    /// * `"wfxt"` - FEAT_WFxT
     /// * `"sme"` - FEAT_SME
-    /// * `"sme-i16i64"` - FEAT_SME_I16I64
+    /// * `"sme-f16f16"` - FEAT_SME_F16F16
     /// * `"sme-f64f64"` - FEAT_SME_F64F64
+    /// * `"sme-f8f16"` - FEAT_SME_F8F16
+    /// * `"sme-f8f32"` - FEAT_SME_F8F32
     /// * `"sme-fa64"` - FEAT_SME_FA64
+    /// * `"sme-i16i64"` - FEAT_SME_I16I64
+    /// * `"sme-lutv2"` - FEAT_SME_LUTv2
     /// * `"sme2"` - FEAT_SME2
     /// * `"sme2p1"` - FEAT_SME2p1
-    /// * `"sme-f16f16"` - FEAT_SME_F16F16
-    /// * `"sme-lutv2"` - FEAT_SME_LUTv2
-    /// * `"sme-f8f16"` - FEAT_SME_F8F16
-    /// * `"sme-f8f32"` - FEAT_SME_F8F32
-    /// * `"ssve-fp8fma"` - FEAT_SSVE_FP8FMA
-    /// * `"ssve-fp8dot4"` - FEAT_SSVE_FP8DOT4
+    /// * `"ssbs"` - FEAT_SSBS & FEAT_SSBS2
     /// * `"ssve-fp8dot2"` - FEAT_SSVE_FP8DOT2
+    /// * `"ssve-fp8dot4"` - FEAT_SSVE_FP8DOT4
+    /// * `"ssve-fp8fma"` - FEAT_SSVE_FP8FMA
+    /// * `"sve"` - FEAT_SVE
+    /// * `"sve-b16b16"` - FEAT_SVE_B16B16
+    /// * `"sve2"` - FEAT_SVE2
+    /// * `"sve2-aes"` - FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto)
+    /// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm
+    /// * `"sve2-sha3"` - FEAT_SVE2_SHA3
+    /// * `"sve2-sm4"` - FEAT_SVE2_SM4
+    /// * `"sve2p1"` - FEAT_SVE2p1
+    /// * `"tme"` - FEAT_TME
+    /// * `"wfxt"` - FEAT_WFxT
     ///
     /// [docs]: https://developer.arm.com/documentation/ddi0487/latest
     #[stable(feature = "simd_aarch64", since = "1.60.0")]
@@ -113,140 +113,140 @@ features! {
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp: "fp";
     implied by target_features: ["neon"];
     /// FEAT_FP (Floating point support) - Implied by `neon` target_feature
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp16: "fp16";
-    /// FEAT_FP16 (Half-float support)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve: "sve";
-    /// FEAT_SVE (Scalable Vector Extension)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] aes: "aes";
+    /// FEAT_AES (AES SIMD instructions) & FEAT_PMULL (PMULL{2}, 64-bit operand variants)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bf16: "bf16";
+    /// FEAT_BF16 (BFloat16 type, plus MM instructions, plus ASIMD support)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bti: "bti";
+    /// FEAT_BTI (Branch Target Identification)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] crc: "crc";
     /// FEAT_CRC32 (Cyclic Redundancy Check)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse: "lse";
-    /// FEAT_LSE (Large System Extension - atomics)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse2: "lse2";
-    /// FEAT_LSE2 (unaligned and register-pair atomics)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lse128: "lse128";
-    /// FEAT_LSE128 (128-bit atomics)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rdm: "rdm";
-    /// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc";
-    /// FEAT_LRCPC (Release consistent Processor consistent)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc2: "rcpc2";
-    /// FEAT_LRCPC2 (RCPC with immediate offsets)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] rcpc3: "rcpc3";
-    /// FEAT_LRCPC3 (RCPC Instructions v3)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] cssc: "cssc";
+    /// FEAT_CSSC (Common Short Sequence Compression instructions)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dit: "dit";
+    /// FEAT_DIT (Data Independent Timing instructions)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb: "dpb";
+    /// FEAT_DPB (aka dcpop - data cache clean to point of persistence)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb2: "dpb2";
+    /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dotprod: "dotprod";
     /// FEAT_DotProd (Vector Dot-Product - ASIMDDP)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] tme: "tme";
-    /// FEAT_TME (Transactional Memory Extensions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ecv: "ecv";
+    /// FEAT_ECV (Enhanced Counter Virtualization)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f32mm: "f32mm";
+    /// FEAT_F32MM (single-precision matrix multiplication)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f64mm: "f64mm";
+    /// FEAT_F64MM (double-precision matrix multiplication)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] faminmax: "faminmax";
+    /// FEAT_FAMINMAX (FAMIN and FAMAX SIMD/SVE/SME instructions)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fcma: "fcma";
+    /// FEAT_FCMA (float complex number operations)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fhm: "fhm";
     /// FEAT_FHM (fp16 multiplication instructions)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dit: "dit";
-    /// FEAT_DIT (Data Independent Timing instructions)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] flagm: "flagm";
     /// FEAT_FLAGM (flag manipulation instructions)
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] flagm2: "flagm2";
     /// FEAT_FLAGM2 (flag manipulation instructions)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs";
-    /// FEAT_SSBS & FEAT_SSBS2 (speculative store bypass safe)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb";
-    /// FEAT_SB (speculation barrier)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] paca: "paca";
-    /// FEAT_PAuth (address authentication)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pacg: "pacg";
-    /// FEAT_PAuth (generic authentication)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb: "dpb";
-    /// FEAT_DPB (aka dcpop - data cache clean to point of persistence)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb2: "dpb2";
-    /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2: "sve2";
-    /// FEAT_SVE2 (Scalable Vector Extension 2)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve2p1: "sve2p1";
-    /// FEAT_SVE2p1 (Scalable Vector Extension 2.1)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes";
-    /// FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sm4: "sve2-sm4";
-    /// FEAT_SVE_SM4 (SVE2 SM4 crypto)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3";
-    /// FEAT_SVE_SHA3 (SVE2 SHA3 crypto)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm";
-    /// FEAT_SVE_BitPerm (SVE2 bit permutation instructions)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve_b16b16: "sve-b16b16";
-    /// FEAT_SVE_B16B16 (SVE or SME Instructions)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp16: "fp16";
+    /// FEAT_FP16 (Half-float support)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8: "fp8";
+    /// FEAT_FP8 (F8CVT Instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot2: "fp8dot2";
+    /// FEAT_FP8DOT2 (F8DP2 Instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot4: "fp8dot4";
+    /// FEAT_FP8DOT4 (F8DP4 Instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8fma: "fp8fma";
+    /// FEAT_FP8FMA (F8FMA Instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fpmr: "fpmr";
+    /// FEAT_FPMR (Special-purpose AArch64-FPMR register)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] frintts: "frintts";
     /// FEAT_FRINTTS (float to integer rounding instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] hbc: "hbc";
+    /// FEAT_HBC (Hinted conditional branches)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] i8mm: "i8mm";
     /// FEAT_I8MM (integer matrix multiplication, plus ASIMD support)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f32mm: "f32mm";
-    /// FEAT_F32MM (single-precision matrix multiplication)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f64mm: "f64mm";
-    /// FEAT_F64MM (double-precision matrix multiplication)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bf16: "bf16";
-    /// FEAT_BF16 (BFloat16 type, plus MM instructions, plus ASIMD support)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rand: "rand";
-    /// FEAT_RNG (Random Number Generator)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bti: "bti";
-    /// FEAT_BTI (Branch Target Identification)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] mte: "mte";
-    /// FEAT_MTE & FEAT_MTE2 (Memory Tagging Extension)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] jsconv: "jsconv";
     /// FEAT_JSCVT (JavaScript float conversion instructions)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fcma: "fcma";
-    /// FEAT_FCMA (float complex number operations)
-    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] aes: "aes";
-    /// FEAT_AES (AES SIMD instructions) & FEAT_PMULL (PMULL{2}, 64-bit operand variants)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse: "lse";
+    /// FEAT_LSE (Large System Extension - atomics)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lse128: "lse128";
+    /// FEAT_LSE128 (128-bit atomics)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse2: "lse2";
+    /// FEAT_LSE2 (unaligned and register-pair atomics)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lut: "lut";
+    /// FEAT_LUT (Lookup Table Instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] mops: "mops";
+    /// FEAT_MOPS (Standardization of memory operations)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] mte: "mte";
+    /// FEAT_MTE & FEAT_MTE2 (Memory Tagging Extension)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] paca: "paca";
+    /// FEAT_PAuth (address authentication)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pacg: "pacg";
+    /// FEAT_PAuth (generic authentication)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rand: "rand";
+    /// FEAT_RNG (Random Number Generator)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc";
+    /// FEAT_LRCPC (Release consistent Processor consistent)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc2: "rcpc2";
+    /// FEAT_LRCPC2 (RCPC with immediate offsets)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] rcpc3: "rcpc3";
+    /// FEAT_LRCPC3 (RCPC Instructions v3)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rdm: "rdm";
+    /// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb";
+    /// FEAT_SB (speculation barrier)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha2: "sha2";
     /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha3: "sha3";
     /// FEAT_SHA512 & FEAT_SHA3 (SHA2-512 & SHA3 instructions)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sm4: "sm4";
     /// FEAT_SM3 & FEAT_SM4 (SM3 & SM4 instructions)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] hbc: "hbc";
-    /// FEAT_HBC (Hinted conditional branches)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] mops: "mops";
-    /// FEAT_MOPS (Standardization of memory operations)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ecv: "ecv";
-    /// FEAT_ECV (Enhanced Counter Virtualization)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] cssc: "cssc";
-    /// FEAT_CSSC (Common Short Sequence Compression instructions)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fpmr: "fpmr";
-    /// FEAT_FPMR (Special-purpose AArch64-FPMR register)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lut: "lut";
-    /// FEAT_LUT (Lookup Table Instructions)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] faminmax: "faminmax";
-    /// FEAT_FAMINMAX (FAMIN and FAMAX SIMD/SVE/SME instructions)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8: "fp8";
-    /// FEAT_FP8 (F8CVT Instructions)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8fma: "fp8fma";
-    /// FEAT_FP8FMA (F8FMA Instructions)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot4: "fp8dot4";
-    /// FEAT_FP8DOT4 (F8DP4 Instructions)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot2: "fp8dot2";
-    /// FEAT_FP8DOT2 (F8DP2 Instructions)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt";
-    /// FEAT_WFxT (WFET and WFIT Instructions)
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme: "sme";
     /// FEAT_SME (Scalable Matrix Extension)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_i16i64: "sme-i16i64";
-    /// FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f64f64: "sme-f64f64";
-    /// FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_fa64: "sme-fa64";
-    /// FEAT_SME_FA64 (Full A64 instruction set support in Streaming SVE mode)
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2: "sme2";
     /// FEAT_SME2 (SME Version 2)
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2p1: "sme2p1";
     /// FEAT_SME2p1 (SME Version 2.1)
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f16f16: "sme-f16f16";
     /// FEAT_SME_F16F16 (Non-widening half-precision FP16 to FP16 arithmetic for SME2)
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_lutv2: "sme-lutv2";
-    /// FEAT_SME_LUTv2 (LUTI4 Instruction)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f64f64: "sme-f64f64";
+    /// FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f16: "sme-f8f16";
     /// FEAT_SME_F8F16
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f32: "sme-f8f32";
     /// FEAT_SME_F8F32
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8fma: "ssve-fp8fma";
-    /// FEAT_SSVE_FP8FMA
-    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot4: "ssve-fp8dot4";
-    /// FEAT_SSVE_FP8DOT4
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_fa64: "sme-fa64";
+    /// FEAT_SME_FA64 (Full A64 instruction set support in Streaming SVE mode)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_i16i64: "sme-i16i64";
+    /// FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_lutv2: "sme-lutv2";
+    /// FEAT_SME_LUTv2 (LUTI4 Instruction)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs";
+    /// FEAT_SSBS & FEAT_SSBS2 (speculative store bypass safe)
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot2: "ssve-fp8dot2";
     /// FEAT_SSVE_FP8DOT2
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot4: "ssve-fp8dot4";
+    /// FEAT_SSVE_FP8DOT4
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8fma: "ssve-fp8fma";
+    /// FEAT_SSVE_FP8FMA
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve: "sve";
+    /// FEAT_SVE (Scalable Vector Extension)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2: "sve2";
+    /// FEAT_SVE2 (Scalable Vector Extension 2)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve2p1: "sve2p1";
+    /// FEAT_SVE2p1 (Scalable Vector Extension 2.1)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes";
+    /// FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve_b16b16: "sve-b16b16";
+    /// FEAT_SVE_B16B16 (SVE or SME Instructions)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm";
+    /// FEAT_SVE_BitPerm (SVE2 bit permutation instructions)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3";
+    /// FEAT_SVE_SHA3 (SVE2 SHA3 crypto)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sm4: "sve2-sm4";
+    /// FEAT_SVE_SM4 (SVE2 SM4 crypto)
+    @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] tme: "tme";
+    /// FEAT_TME (Transactional Memory Extensions)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt";
+    /// FEAT_WFxT (WFET and WFIT Instructions)
 }
-- 
cgit 1.4.1-3-g733a5


From 351ec5744cdbc74d0ea14cc08345367ba7f465a3 Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski@arm.com>
Date: Wed, 3 Jul 2024 12:52:55 +0100
Subject: std_detect: Update aarch64 feature dependencies to LLVM upstream

Feature dependencies for newer aarch64 fetaures differ between LLVM 18
in the Rust tree and upstream LLVM 19.
This commit updates those dependencies to reflect new LLVM upstream
changes.
---
 .../std_detect/src/detect/os/linux/aarch64.rs      | 55 ++++++++++++----------
 1 file changed, 29 insertions(+), 26 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index 6209a4a3443..9ebd98dad81 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -403,11 +403,12 @@ impl AtHwcap {
             enable_feature(Feature::crc, self.crc32);
             enable_feature(Feature::lse, self.atomics);
             enable_feature(Feature::lse2, self.uscat);
-            enable_feature(Feature::lse128, self.lse128);
+            enable_feature(Feature::lse128, self.lse128 && self.atomics);
             enable_feature(Feature::rcpc, self.lrcpc);
             // RCPC2 (rcpc-immo in LLVM) requires RCPC support
-            enable_feature(Feature::rcpc2, self.ilrcpc && self.lrcpc);
-            enable_feature(Feature::rcpc3, self.lrcpc3);
+            let rcpc2 = self.ilrcpc && self.lrcpc;
+            enable_feature(Feature::rcpc2, rcpc2);
+            enable_feature(Feature::rcpc3, self.lrcpc3 && rcpc2);
             enable_feature(Feature::dit, self.dit);
             enable_feature(Feature::flagm, self.flagm);
             enable_feature(Feature::flagm2, self.flagm2);
@@ -456,7 +457,7 @@ impl AtHwcap {
             // SVE2 requires SVE
             let sve2 = self.sve2 && self.sve && asimd;
             enable_feature(Feature::sve2, sve2);
-            enable_feature(Feature::sve2p1, self.sve2p1);
+            enable_feature(Feature::sve2p1, self.sve2p1 && sve2);
             // SVE2 extensions require SVE2 and crypto features
             enable_feature(
                 Feature::sve2_aes,
@@ -483,30 +484,32 @@ impl AtHwcap {
             enable_feature(Feature::cssc, self.cssc);
             enable_feature(Feature::fpmr, self.fpmr);
             enable_feature(Feature::faminmax, self.faminmax);
-            enable_feature(Feature::fp8, self.f8cvt);
-            enable_feature(Feature::fp8fma, self.f8fma);
-            enable_feature(Feature::fp8dot4, self.f8dp4);
-            enable_feature(Feature::fp8dot2, self.f8dp2);
+            let fp8 = self.f8cvt && self.faminmax && self.lut && self.bf16;
+            enable_feature(Feature::fp8, fp8);
+            let fp8fma = self.f8fma && fp8;
+            enable_feature(Feature::fp8fma, fp8fma);
+            let fp8dot4 = self.f8dp4 && fp8fma;
+            enable_feature(Feature::fp8dot4, fp8dot4);
+            enable_feature(Feature::fp8dot2, self.f8dp2 && fp8dot4);
             enable_feature(Feature::wfxt, self.wfxt);
-            enable_feature(Feature::sme, self.sme && self.bf16);
-            enable_feature(Feature::sme_i16i64, self.smei16i64 && self.sme);
-            enable_feature(Feature::sme_f64f64, self.smef64f64 && self.sme);
-            // enable_feature(Feature::sme_i8i32, self.smei8i32);
-            // enable_feature(Feature::sme_f16f32, self.smef16f32);
-            // enable_feature(Feature::sme_b16f32, self.smeb16f32);
-            // enable_feature(Feature::sme_f32f32, self.smef32f32);
-            enable_feature(Feature::sme_fa64, self.smefa64 && self.sme && sve2);
-            enable_feature(Feature::sme2, self.sme2 && self.sme);
-            enable_feature(Feature::sme2p1, self.sme2p1 && self.sme2 && self.sme);
-            // enable_feature(Feature::sme_i16i32, self.smei16i32);
-            // enable_feature(Feature::sme_bi32i32, self.smebi32i32);
-            enable_feature(Feature::sme_f16f16, self.smef16f16);
+            let sme = self.sme && self.bf16;
+            enable_feature(Feature::sme, sme);
+            enable_feature(Feature::sme_i16i64, self.smei16i64 && sme);
+            enable_feature(Feature::sme_f64f64, self.smef64f64 && sme);
+            enable_feature(Feature::sme_fa64, self.smefa64 && sme && sve2);
+            let sme2 = self.sme2 && sme;
+            enable_feature(Feature::sme2, sme2);
+            enable_feature(Feature::sme2p1, self.sme2p1 && sme2);
+            enable_feature(Feature::sme_f16f16, self.smef16f16 && sme2);
             enable_feature(Feature::sme_lutv2, self.smelutv2);
-            enable_feature(Feature::sme_f8f16, self.smef8f16 && self.sme2 && self.f8cvt);
-            enable_feature(Feature::sme_f8f32, self.smef8f32 && self.sme2 && self.f8cvt);
-            enable_feature(Feature::ssve_fp8fma, self.smesf8fma && self.sme2);
-            enable_feature(Feature::ssve_fp8dot4, self.smesf8dp4 && self.sme2);
-            enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && self.sme2);
+            let sme_f8f32 = self.smef8f32 && sme2 && fp8;
+            enable_feature(Feature::sme_f8f32, sme_f8f32);
+            enable_feature(Feature::sme_f8f16, self.smef8f16 && sme_f8f32);
+            let ssve_fp8fma = self.smesf8fma && sme2 && fp8;
+            enable_feature(Feature::ssve_fp8fma, ssve_fp8fma);
+            let ssve_fp8dot4 = self.smesf8dp4 && ssve_fp8fma;
+            enable_feature(Feature::ssve_fp8dot4, ssve_fp8dot4);
+            enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && ssve_fp8dot4);
         }
         value
     }
-- 
cgit 1.4.1-3-g733a5


From 8a511191a0cf9290270e1a0f25bb0c26f9a56077 Mon Sep 17 00:00:00 2001
From: Mads Marquart <mads@marquart.dk>
Date: Sat, 7 Sep 2024 14:13:02 +0200
Subject: Enable feature detection on all Apple/Darwin targets

Tested in the simulator and on the device I had lying around, a 1st
generation iPad Mini (which isn't Aarch64, but shows that the
`sysctlbyname` calls still work even there, even if they return false).

`sysctlbyname` _should_ be safe to use without causing rejections from
the app store, as its usage is documented in:
https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics

Also, the standard library will use these soon anyhow, so this shouldn't
affect the situation:
https://github.com/rust-lang/rust/pull/129019
---
 .../stdarch/crates/std_detect/src/detect/mod.rs    |   4 +-
 .../std_detect/src/detect/os/darwin/aarch64.rs     | 100 +++++++++++++++++++++
 .../std_detect/src/detect/os/macos/aarch64.rs      |  98 --------------------
 .../crates/std_detect/tests/cpu-detection.rs       |   4 +-
 .../stdarch/crates/stdarch-test/src/disassembly.rs |   4 +-
 library/stdarch/triagebot.toml                     |   6 +-
 6 files changed, 109 insertions(+), 107 deletions(-)
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
 delete mode 100644 library/stdarch/crates/std_detect/src/detect/os/macos/aarch64.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index ae86ca987ee..ab247a303ed 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -66,8 +66,8 @@ cfg_if! {
     } else if #[cfg(all(target_os = "windows", any(target_arch = "aarch64", target_arch = "arm64ec")))] {
         #[path = "os/windows/aarch64.rs"]
         mod os;
-    } else if #[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "libc"))] {
-        #[path = "os/macos/aarch64.rs"]
+    } else if #[cfg(all(target_vendor = "apple", target_arch = "aarch64", feature = "libc"))] {
+        #[path = "os/darwin/aarch64.rs"]
         mod os;
     } else {
         #[path = "os/other.rs"]
diff --git a/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
new file mode 100644
index 00000000000..3f292c1b1b1
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
@@ -0,0 +1,100 @@
+//! Run-time feature detection for aarch64 on Darwin (macOS/iOS/tvOS/watchOS/visionOS).
+//!
+//! <https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics>
+
+use crate::detect::{cache, Feature};
+
+#[inline]
+fn _sysctlbyname(name: &str) -> bool {
+    use libc;
+
+    let mut enabled: i32 = 0;
+    let mut enabled_len: usize = 4;
+    let enabled_ptr = &mut enabled as *mut i32 as *mut libc::c_void;
+
+    let ret = unsafe {
+        libc::sysctlbyname(
+            name.as_ptr() as *const i8,
+            enabled_ptr,
+            &mut enabled_len,
+            core::ptr::null_mut(),
+            0,
+        )
+    };
+
+    match ret {
+        0 => enabled != 0,
+        _ => false,
+    }
+}
+
+/// Try to read the features using sysctlbyname.
+pub(crate) fn detect_features() -> cache::Initializer {
+    let mut value = cache::Initializer::default();
+
+    let mut enable_feature = |f, enable| {
+        if enable {
+            value.set(f as u32);
+        }
+    };
+
+    let asimd = _sysctlbyname("hw.optional.AdvSIMD\0");
+    let pmull = _sysctlbyname("hw.optional.arm.FEAT_PMULL\0");
+    let fp = _sysctlbyname("hw.optional.floatingpoint\0");
+    let fp16 = _sysctlbyname("hw.optional.arm.FEAT_FP16\0");
+    let crc = _sysctlbyname("hw.optional.armv8_crc32\0");
+    let lse = _sysctlbyname("hw.optional.arm.FEAT_LSE\0");
+    let lse2 = _sysctlbyname("hw.optional.arm.FEAT_LSE2\0");
+    let rdm = _sysctlbyname("hw.optional.arm.FEAT_RDM\0");
+    let rcpc = _sysctlbyname("hw.optional.arm.FEAT_LRCPC\0");
+    let rcpc2 = _sysctlbyname("hw.optional.arm.FEAT_LRCPC2\0");
+    let dotprod = _sysctlbyname("hw.optional.arm.FEAT_DotProd\0");
+    let fhm = _sysctlbyname("hw.optional.arm.FEAT_FHM\0");
+    let flagm = _sysctlbyname("hw.optional.arm.FEAT_FlagM\0");
+    let ssbs = _sysctlbyname("hw.optional.arm.FEAT_SSBS\0");
+    let sb = _sysctlbyname("hw.optional.arm.FEAT_SB\0");
+    let paca = _sysctlbyname("hw.optional.arm.FEAT_PAuth\0");
+    let dpb = _sysctlbyname("hw.optional.arm.FEAT_DPB\0");
+    let dpb2 = _sysctlbyname("hw.optional.arm.FEAT_DPB2\0");
+    let frintts = _sysctlbyname("hw.optional.arm.FEAT_FRINTTS\0");
+    let i8mm = _sysctlbyname("hw.optional.arm.FEAT_I8MM\0");
+    let bf16 = _sysctlbyname("hw.optional.arm.FEAT_BF16\0");
+    let bti = _sysctlbyname("hw.optional.arm.FEAT_BTI\0");
+    let fcma = _sysctlbyname("hw.optional.arm.FEAT_FCMA\0");
+    let aes = _sysctlbyname("hw.optional.arm.FEAT_AES\0");
+    let sha1 = _sysctlbyname("hw.optional.arm.FEAT_SHA1\0");
+    let sha2 = _sysctlbyname("hw.optional.arm.FEAT_SHA256\0");
+    let sha3 = _sysctlbyname("hw.optional.arm.FEAT_SHA3\0");
+    let sha512 = _sysctlbyname("hw.optional.arm.FEAT_SHA512\0");
+    let jsconv = _sysctlbyname("hw.optional.arm.FEAT_JSCVT\0");
+
+    enable_feature(Feature::asimd, asimd);
+    enable_feature(Feature::pmull, pmull);
+    enable_feature(Feature::fp, fp);
+    enable_feature(Feature::fp16, fp16);
+    enable_feature(Feature::crc, crc);
+    enable_feature(Feature::lse, lse);
+    enable_feature(Feature::lse2, lse2);
+    enable_feature(Feature::rdm, rdm);
+    enable_feature(Feature::rcpc, rcpc);
+    enable_feature(Feature::rcpc2, rcpc2);
+    enable_feature(Feature::dotprod, dotprod);
+    enable_feature(Feature::fhm, fhm);
+    enable_feature(Feature::flagm, flagm);
+    enable_feature(Feature::ssbs, ssbs);
+    enable_feature(Feature::sb, sb);
+    enable_feature(Feature::paca, paca);
+    enable_feature(Feature::dpb, dpb);
+    enable_feature(Feature::dpb2, dpb2);
+    enable_feature(Feature::frintts, frintts);
+    enable_feature(Feature::i8mm, i8mm);
+    enable_feature(Feature::bf16, bf16);
+    enable_feature(Feature::bti, bti);
+    enable_feature(Feature::fcma, fcma);
+    enable_feature(Feature::aes, aes && pmull);
+    enable_feature(Feature::jsconv, jsconv);
+    enable_feature(Feature::sha2, sha1 && sha2 && asimd);
+    enable_feature(Feature::sha3, sha512 && sha3 && asimd);
+
+    value
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/macos/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/macos/aarch64.rs
deleted file mode 100644
index a29968b5c3d..00000000000
--- a/library/stdarch/crates/std_detect/src/detect/os/macos/aarch64.rs
+++ /dev/null
@@ -1,98 +0,0 @@
-//! Run-time feature detection for aarch64 on macOS.
-
-use crate::detect::{cache, Feature};
-
-#[inline]
-fn _sysctlbyname(name: &str) -> bool {
-    use libc;
-
-    let mut enabled: i32 = 0;
-    let mut enabled_len: usize = 4;
-    let enabled_ptr = &mut enabled as *mut i32 as *mut libc::c_void;
-
-    let ret = unsafe {
-        libc::sysctlbyname(
-            name.as_ptr() as *const i8,
-            enabled_ptr,
-            &mut enabled_len,
-            core::ptr::null_mut(),
-            0,
-        )
-    };
-
-    match ret {
-        0 => enabled != 0,
-        _ => false,
-    }
-}
-
-/// Try to read the features using sysctlbyname.
-pub(crate) fn detect_features() -> cache::Initializer {
-    let mut value = cache::Initializer::default();
-
-    let mut enable_feature = |f, enable| {
-        if enable {
-            value.set(f as u32);
-        }
-    };
-
-    let asimd = _sysctlbyname("hw.optional.AdvSIMD\0");
-    let pmull = _sysctlbyname("hw.optional.arm.FEAT_PMULL\0");
-    let fp = _sysctlbyname("hw.optional.floatingpoint\0");
-    let fp16 = _sysctlbyname("hw.optional.arm.FEAT_FP16\0");
-    let crc = _sysctlbyname("hw.optional.armv8_crc32\0");
-    let lse = _sysctlbyname("hw.optional.arm.FEAT_LSE\0");
-    let lse2 = _sysctlbyname("hw.optional.arm.FEAT_LSE2\0");
-    let rdm = _sysctlbyname("hw.optional.arm.FEAT_RDM\0");
-    let rcpc = _sysctlbyname("hw.optional.arm.FEAT_LRCPC\0");
-    let rcpc2 = _sysctlbyname("hw.optional.arm.FEAT_LRCPC2\0");
-    let dotprod = _sysctlbyname("hw.optional.arm.FEAT_DotProd\0");
-    let fhm = _sysctlbyname("hw.optional.arm.FEAT_FHM\0");
-    let flagm = _sysctlbyname("hw.optional.arm.FEAT_FlagM\0");
-    let ssbs = _sysctlbyname("hw.optional.arm.FEAT_SSBS\0");
-    let sb = _sysctlbyname("hw.optional.arm.FEAT_SB\0");
-    let paca = _sysctlbyname("hw.optional.arm.FEAT_PAuth\0");
-    let dpb = _sysctlbyname("hw.optional.arm.FEAT_DPB\0");
-    let dpb2 = _sysctlbyname("hw.optional.arm.FEAT_DPB2\0");
-    let frintts = _sysctlbyname("hw.optional.arm.FEAT_FRINTTS\0");
-    let i8mm = _sysctlbyname("hw.optional.arm.FEAT_I8MM\0");
-    let bf16 = _sysctlbyname("hw.optional.arm.FEAT_BF16\0");
-    let bti = _sysctlbyname("hw.optional.arm.FEAT_BTI\0");
-    let fcma = _sysctlbyname("hw.optional.arm.FEAT_FCMA\0");
-    let aes = _sysctlbyname("hw.optional.arm.FEAT_AES\0");
-    let sha1 = _sysctlbyname("hw.optional.arm.FEAT_SHA1\0");
-    let sha2 = _sysctlbyname("hw.optional.arm.FEAT_SHA256\0");
-    let sha3 = _sysctlbyname("hw.optional.arm.FEAT_SHA3\0");
-    let sha512 = _sysctlbyname("hw.optional.arm.FEAT_SHA512\0");
-    let jsconv = _sysctlbyname("hw.optional.arm.FEAT_JSCVT\0");
-
-    enable_feature(Feature::asimd, asimd);
-    enable_feature(Feature::pmull, pmull);
-    enable_feature(Feature::fp, fp);
-    enable_feature(Feature::fp16, fp16);
-    enable_feature(Feature::crc, crc);
-    enable_feature(Feature::lse, lse);
-    enable_feature(Feature::lse2, lse2);
-    enable_feature(Feature::rdm, rdm);
-    enable_feature(Feature::rcpc, rcpc);
-    enable_feature(Feature::rcpc2, rcpc2);
-    enable_feature(Feature::dotprod, dotprod);
-    enable_feature(Feature::fhm, fhm);
-    enable_feature(Feature::flagm, flagm);
-    enable_feature(Feature::ssbs, ssbs);
-    enable_feature(Feature::sb, sb);
-    enable_feature(Feature::paca, paca);
-    enable_feature(Feature::dpb, dpb);
-    enable_feature(Feature::dpb2, dpb2);
-    enable_feature(Feature::frintts, frintts);
-    enable_feature(Feature::i8mm, i8mm);
-    enable_feature(Feature::bf16, bf16);
-    enable_feature(Feature::bti, bti);
-    enable_feature(Feature::fcma, fcma);
-    enable_feature(Feature::aes, aes && pmull);
-    enable_feature(Feature::jsconv, jsconv);
-    enable_feature(Feature::sha2, sha1 && sha2 && asimd);
-    enable_feature(Feature::sha3, sha512 && sha3 && asimd);
-
-    value
-}
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index cecc53afa4b..53fbf6d8c1d 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -188,8 +188,8 @@ fn aarch64_bsd() {
 }
 
 #[test]
-#[cfg(all(target_arch = "aarch64", target_os = "macos"))]
-fn aarch64_macos() {
+#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
+fn aarch64_darwin() {
     println!("asimd: {:?}", is_aarch64_feature_detected!("asimd"));
     println!("fp: {:?}", is_aarch64_feature_detected!("fp"));
     println!("fp16: {:?}", is_aarch64_feature_detected!("fp16"));
diff --git a/library/stdarch/crates/stdarch-test/src/disassembly.rs b/library/stdarch/crates/stdarch-test/src/disassembly.rs
index 206942a4b48..cb6c008a71b 100644
--- a/library/stdarch/crates/stdarch-test/src/disassembly.rs
+++ b/library/stdarch/crates/stdarch-test/src/disassembly.rs
@@ -74,8 +74,8 @@ pub(crate) fn disassemble_myself() -> HashSet<Function> {
     let me = env::current_exe().expect("failed to get current exe");
 
     let objdump = env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string());
-    let add_args = if cfg!(target_os = "macos") && cfg!(target_arch = "aarch64") {
-        // Target features need to be enabled for LLVM objdump on Macos ARM64
+    let add_args = if cfg!(target_vendor = "apple") && cfg!(target_arch = "aarch64") {
+        // Target features need to be enabled for LLVM objdump on Darwin ARM64
         vec!["--mattr=+v8.6a,+crypto,+tme"]
     } else if cfg!(target_arch = "riscv64") {
         vec!["--mattr=+zk,+zks,+zbc,+zbb"]
diff --git a/library/stdarch/triagebot.toml b/library/stdarch/triagebot.toml
index 81b8611405e..082c8cf2c63 100644
--- a/library/stdarch/triagebot.toml
+++ b/library/stdarch/triagebot.toml
@@ -43,10 +43,10 @@ resolved/implemented on Fuchsia. Could one of you weigh in please?
 Thanks!
 """
 
-[ping.macos]
+[ping.apple]
+alias = ["macos", "ios", "tvos", "watchos", "visionos"]
 message = """\
-Hey MacOS Group! This issue or PR could use some MacOS-specific guidance. Could
+Hey Apple Group! This issue or PR could use some Darwin-specific guidance. Could
 one of you weigh in please?
 Thanks!
 """
-
-- 
cgit 1.4.1-3-g733a5


From 720973b1b49772bed937975f56299c47494f19a6 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Sun, 15 Sep 2024 20:10:08 +0900
Subject: std_detect: Use elf_aux_info on FreeBSD

---
 .../std_detect/src/detect/os/freebsd/auxvec.rs     | 80 ++++++----------------
 .../crates/std_detect/tests/cpu-detection.rs       |  7 +-
 2 files changed, 27 insertions(+), 60 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
index 29fcc8cb0c7..4e72bf22d76 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs
@@ -9,11 +9,6 @@
     allow(dead_code)
 )]
 
-/// Key to access the CPU Hardware capabilities bitfield.
-pub(crate) const AT_HWCAP: usize = 25;
-/// Key to access the CPU Hardware capabilities 2 bitfield.
-pub(crate) const AT_HWCAP2: usize = 26;
-
 /// Cache HWCAP bitfields of the ELF Auxiliary Vector.
 ///
 /// If an entry cannot be read all the bits in the bitfield are set to zero.
@@ -38,65 +33,34 @@ pub(crate) struct AuxVec {
 ///
 /// [elf_common.h]: https://svnweb.freebsd.org/base/release/12.0.0/sys/sys/elf_common.h?revision=341707
 pub(crate) fn auxv() -> Result<AuxVec, ()> {
-    if let Ok(hwcap) = archauxv(AT_HWCAP) {
-        if let Ok(hwcap2) = archauxv(AT_HWCAP2) {
-            if hwcap != 0 && hwcap2 != 0 {
-                return Ok(AuxVec { hwcap, hwcap2 });
-            }
-        }
+    let hwcap = archauxv(libc::AT_HWCAP);
+    let hwcap2 = archauxv(libc::AT_HWCAP2);
+    // Zero could indicate that no features were detected, but it's also used to
+    // indicate an error. In particular, on many platforms AT_HWCAP2 will be
+    // legitimately zero, since it contains the most recent feature flags.
+    if hwcap != 0 || hwcap2 != 0 {
+        return Ok(AuxVec { hwcap, hwcap2 });
     }
     Err(())
 }
 
 /// Tries to read the `key` from the auxiliary vector.
-fn archauxv(key: usize) -> Result<usize, ()> {
-    use core::mem;
-
-    #[derive(Copy, Clone)]
-    #[repr(C)]
-    pub struct Elf_Auxinfo {
-        pub a_type: usize,
-        pub a_un: unnamed,
-    }
-    #[derive(Copy, Clone)]
-    #[repr(C)]
-    pub union unnamed {
-        pub a_val: libc::c_long,
-        pub a_ptr: *mut libc::c_void,
-        pub a_fcn: Option<unsafe extern "C" fn() -> ()>,
-    }
-
-    let mut auxv: [Elf_Auxinfo; 27] = [Elf_Auxinfo {
-        a_type: 0,
-        a_un: unnamed { a_val: 0 },
-    }; 27];
-
-    let mut len: libc::c_uint = mem::size_of_val(&auxv) as libc::c_uint;
-
+fn archauxv(key: libc::c_int) -> usize {
+    const OUT_LEN: libc::c_int = core::mem::size_of::<libc::c_ulong>() as libc::c_int;
+    let mut out: libc::c_ulong = 0;
     unsafe {
-        let mut mib = [
-            libc::CTL_KERN,
-            libc::KERN_PROC,
-            libc::KERN_PROC_AUXV,
-            libc::getpid(),
-        ];
-
-        let ret = libc::sysctl(
-            mib.as_mut_ptr(),
-            mib.len() as u32,
-            &mut auxv as *mut _ as *mut _,
-            &mut len as *mut _ as *mut _,
-            0 as *mut libc::c_void,
-            0,
+        // elf_aux_info is available on FreeBSD 12.0+ and 11.4+:
+        // https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470
+        // https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h
+        // FreeBSD 11 support in std has been removed in Rust 1.75 (https://github.com/rust-lang/rust/pull/114521),
+        // so we can safely use this function.
+        let res = libc::elf_aux_info(
+            key,
+            &mut out as *mut libc::c_ulong as *mut libc::c_void,
+            OUT_LEN,
         );
-
-        if ret != -1 {
-            for i in 0..auxv.len() {
-                if auxv[i].a_type == key {
-                    return Ok(auxv[i].a_un.a_val as usize);
-                }
-            }
-        }
+        // If elf_aux_info fails, `out` will be left at zero (which is the proper default value).
+        debug_assert!(res == 0 || out == 0);
     }
-    return Ok(0);
+    out as usize
 }
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 53fbf6d8c1d..5f4144c1973 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -228,8 +228,11 @@ fn powerpc_linux() {
 }
 
 #[test]
-#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
-fn powerpc64_linux() {
+#[cfg(all(
+    target_arch = "powerpc64",
+    any(target_os = "linux", target_os = "freebsd"),
+))]
+fn powerpc64_linux_or_freebsd() {
     println!("altivec: {}", is_powerpc64_feature_detected!("altivec"));
     println!("vsx: {}", is_powerpc64_feature_detected!("vsx"));
     println!("power8: {}", is_powerpc64_feature_detected!("power8"));
-- 
cgit 1.4.1-3-g733a5


From 9e5b3f5c8cfe267573761b86d0f917a0c25503a1 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Sun, 15 Sep 2024 17:23:29 +0900
Subject: std_detect: Fix link in mips.rs

---
 library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
index 9c030f41a00..f1bea382522 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
@@ -15,7 +15,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
     // The values are part of the platform-specific [asm/hwcap.h][hwcap]
     //
-    // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
+    // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/mips/include/uapi/asm/hwcap.h
     if let Ok(auxv) = auxvec::auxv() {
         enable_feature(&mut value, Feature::msa, bit::test(auxv.hwcap, 1));
         return value;
-- 
cgit 1.4.1-3-g733a5


From 24b5e3c99e7d97be8e077712f1bb52f7fbc3bd45 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Sun, 15 Sep 2024 17:22:18 +0900
Subject: Use C string literal

---
 .../stdarch/crates/std_detect/src/detect/cache.rs  |  9 ++--
 .../std_detect/src/detect/os/darwin/aarch64.rs     | 63 +++++++++++-----------
 .../std_detect/src/detect/os/linux/aarch64.rs      |  5 +-
 .../std_detect/src/detect/os/linux/auxvec.rs       |  2 +-
 4 files changed, 39 insertions(+), 40 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 182513d883b..643e4da1948 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -134,7 +134,8 @@ cfg_if::cfg_if! {
 
         #[inline]
         fn initialize(mut value: Initializer) -> Initializer {
-            const RUST_STD_DETECT_UNSTABLE: &[u8] = b"RUST_STD_DETECT_UNSTABLE\0";
+            use core::ffi::CStr;
+            const RUST_STD_DETECT_UNSTABLE: &CStr = c"RUST_STD_DETECT_UNSTABLE";
             cfg_if::cfg_if! {
                 if #[cfg(windows)] {
                     use alloc::vec;
@@ -142,18 +143,18 @@ cfg_if::cfg_if! {
                     extern "system" {
                         fn GetEnvironmentVariableA(name: *const u8, buffer: *mut u8, size: u32) -> u32;
                     }
-                    let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr(), core::ptr::null_mut(), 0) };
+                    let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::<u8>(), core::ptr::null_mut(), 0) };
                     if len > 0 {
                         // +1 to include the null terminator.
                         let mut env = vec![0; len as usize + 1];
-                        let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr(), env.as_mut_ptr(), len + 1) };
+                        let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::<u8>(), env.as_mut_ptr(), len + 1) };
                         if len > 0 {
                             disable_features(&env[..len as usize], &mut value);
                         }
                     }
                 } else {
                     let env = unsafe {
-                        libc::getenv(RUST_STD_DETECT_UNSTABLE.as_ptr() as *const libc::c_char)
+                        libc::getenv(RUST_STD_DETECT_UNSTABLE.as_ptr())
                     };
                     if !env.is_null() {
                         let len = unsafe { libc::strlen(env) };
diff --git a/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
index 3f292c1b1b1..0a822353ca6 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
@@ -3,9 +3,10 @@
 //! <https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics>
 
 use crate::detect::{cache, Feature};
+use core::ffi::CStr;
 
 #[inline]
-fn _sysctlbyname(name: &str) -> bool {
+fn _sysctlbyname(name: &CStr) -> bool {
     use libc;
 
     let mut enabled: i32 = 0;
@@ -14,7 +15,7 @@ fn _sysctlbyname(name: &str) -> bool {
 
     let ret = unsafe {
         libc::sysctlbyname(
-            name.as_ptr() as *const i8,
+            name.as_ptr(),
             enabled_ptr,
             &mut enabled_len,
             core::ptr::null_mut(),
@@ -38,35 +39,35 @@ pub(crate) fn detect_features() -> cache::Initializer {
         }
     };
 
-    let asimd = _sysctlbyname("hw.optional.AdvSIMD\0");
-    let pmull = _sysctlbyname("hw.optional.arm.FEAT_PMULL\0");
-    let fp = _sysctlbyname("hw.optional.floatingpoint\0");
-    let fp16 = _sysctlbyname("hw.optional.arm.FEAT_FP16\0");
-    let crc = _sysctlbyname("hw.optional.armv8_crc32\0");
-    let lse = _sysctlbyname("hw.optional.arm.FEAT_LSE\0");
-    let lse2 = _sysctlbyname("hw.optional.arm.FEAT_LSE2\0");
-    let rdm = _sysctlbyname("hw.optional.arm.FEAT_RDM\0");
-    let rcpc = _sysctlbyname("hw.optional.arm.FEAT_LRCPC\0");
-    let rcpc2 = _sysctlbyname("hw.optional.arm.FEAT_LRCPC2\0");
-    let dotprod = _sysctlbyname("hw.optional.arm.FEAT_DotProd\0");
-    let fhm = _sysctlbyname("hw.optional.arm.FEAT_FHM\0");
-    let flagm = _sysctlbyname("hw.optional.arm.FEAT_FlagM\0");
-    let ssbs = _sysctlbyname("hw.optional.arm.FEAT_SSBS\0");
-    let sb = _sysctlbyname("hw.optional.arm.FEAT_SB\0");
-    let paca = _sysctlbyname("hw.optional.arm.FEAT_PAuth\0");
-    let dpb = _sysctlbyname("hw.optional.arm.FEAT_DPB\0");
-    let dpb2 = _sysctlbyname("hw.optional.arm.FEAT_DPB2\0");
-    let frintts = _sysctlbyname("hw.optional.arm.FEAT_FRINTTS\0");
-    let i8mm = _sysctlbyname("hw.optional.arm.FEAT_I8MM\0");
-    let bf16 = _sysctlbyname("hw.optional.arm.FEAT_BF16\0");
-    let bti = _sysctlbyname("hw.optional.arm.FEAT_BTI\0");
-    let fcma = _sysctlbyname("hw.optional.arm.FEAT_FCMA\0");
-    let aes = _sysctlbyname("hw.optional.arm.FEAT_AES\0");
-    let sha1 = _sysctlbyname("hw.optional.arm.FEAT_SHA1\0");
-    let sha2 = _sysctlbyname("hw.optional.arm.FEAT_SHA256\0");
-    let sha3 = _sysctlbyname("hw.optional.arm.FEAT_SHA3\0");
-    let sha512 = _sysctlbyname("hw.optional.arm.FEAT_SHA512\0");
-    let jsconv = _sysctlbyname("hw.optional.arm.FEAT_JSCVT\0");
+    let asimd = _sysctlbyname(c"hw.optional.AdvSIMD");
+    let pmull = _sysctlbyname(c"hw.optional.arm.FEAT_PMULL");
+    let fp = _sysctlbyname(c"hw.optional.floatingpoint");
+    let fp16 = _sysctlbyname(c"hw.optional.arm.FEAT_FP16");
+    let crc = _sysctlbyname(c"hw.optional.armv8_crc32");
+    let lse = _sysctlbyname(c"hw.optional.arm.FEAT_LSE");
+    let lse2 = _sysctlbyname(c"hw.optional.arm.FEAT_LSE2");
+    let rdm = _sysctlbyname(c"hw.optional.arm.FEAT_RDM");
+    let rcpc = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC");
+    let rcpc2 = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC2");
+    let dotprod = _sysctlbyname(c"hw.optional.arm.FEAT_DotProd");
+    let fhm = _sysctlbyname(c"hw.optional.arm.FEAT_FHM");
+    let flagm = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM");
+    let ssbs = _sysctlbyname(c"hw.optional.arm.FEAT_SSBS");
+    let sb = _sysctlbyname(c"hw.optional.arm.FEAT_SB");
+    let paca = _sysctlbyname(c"hw.optional.arm.FEAT_PAuth");
+    let dpb = _sysctlbyname(c"hw.optional.arm.FEAT_DPB");
+    let dpb2 = _sysctlbyname(c"hw.optional.arm.FEAT_DPB2");
+    let frintts = _sysctlbyname(c"hw.optional.arm.FEAT_FRINTTS");
+    let i8mm = _sysctlbyname(c"hw.optional.arm.FEAT_I8MM");
+    let bf16 = _sysctlbyname(c"hw.optional.arm.FEAT_BF16");
+    let bti = _sysctlbyname(c"hw.optional.arm.FEAT_BTI");
+    let fcma = _sysctlbyname(c"hw.optional.arm.FEAT_FCMA");
+    let aes = _sysctlbyname(c"hw.optional.arm.FEAT_AES");
+    let sha1 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA1");
+    let sha2 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA256");
+    let sha3 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA3");
+    let sha512 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA512");
+    let jsconv = _sysctlbyname(c"hw.optional.arm.FEAT_JSCVT");
 
     enable_feature(Feature::asimd, asimd);
     enable_feature(Feature::pmull, pmull);
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index 9ebd98dad81..e2de39ba0f2 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -14,10 +14,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
         // https://reviews.llvm.org/D114523
         let mut arch = [0_u8; libc::PROP_VALUE_MAX as usize];
         let len = unsafe {
-            libc::__system_property_get(
-                b"ro.arch\0".as_ptr() as *const libc::c_char,
-                arch.as_mut_ptr() as *mut libc::c_char,
-            )
+            libc::__system_property_get(c"ro.arch".as_ptr(), arch.as_mut_ptr() as *mut libc::c_char)
         };
         // On Exynos, ro.arch is not available on Android 12+, but it is fine
         // because Android 9+ includes the fix.
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index a04ea7b147e..61b6e2df1ac 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -187,7 +187,7 @@ fn getauxval(key: usize) -> Result<usize, ()> {
     use libc;
     pub type F = unsafe extern "C" fn(usize) -> usize;
     unsafe {
-        let ptr = libc::dlsym(libc::RTLD_DEFAULT, "getauxval\0".as_ptr() as *const _);
+        let ptr = libc::dlsym(libc::RTLD_DEFAULT, c"getauxval".as_ptr());
         if ptr.is_null() {
             return Err(());
         }
-- 
cgit 1.4.1-3-g733a5


From 485ded63696970f2d7c2ab6416d175e66b5905d1 Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski@arm.com>
Date: Tue, 3 Sep 2024 11:58:10 +0100
Subject: std_detect: Add sme-b16b16 as an explicit aarch64 target feature

LLVM 20 split out what used to be called b16b16 and correspond to aarch64
FEAT_SVE_B16B16 into sve-b16b16 and sme-b16b16.
Add sme-b16b16 as an explicit feature and update the detection accordingly.
---
 library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs   |  7 +++++--
 .../stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs   | 10 +++++-----
 library/stdarch/crates/std_detect/tests/cpu-detection.rs       |  1 +
 3 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index b75fe933d2f..6002291d56c 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -62,6 +62,7 @@ features! {
     /// * `"sha3"` - FEAT_SHA512 & FEAT_SHA3
     /// * `"sm4"` - FEAT_SM3 & FEAT_SM4
     /// * `"sme"` - FEAT_SME
+    /// * `"sme-b16b16"` - FEAT_SME_B16B16
     /// * `"sme-f16f16"` - FEAT_SME_F16F16
     /// * `"sme-f64f64"` - FEAT_SME_F64F64
     /// * `"sme-f8f16"` - FEAT_SME_F8F16
@@ -76,7 +77,7 @@ features! {
     /// * `"ssve-fp8dot4"` - FEAT_SSVE_FP8DOT4
     /// * `"ssve-fp8fma"` - FEAT_SSVE_FP8FMA
     /// * `"sve"` - FEAT_SVE
-    /// * `"sve-b16b16"` - FEAT_SVE_B16B16
+    /// * `"sve-b16b16"` - FEAT_SVE_B16B16 (SVE or SME Z-targeting instructions)
     /// * `"sve2"` - FEAT_SVE2
     /// * `"sve2-aes"` - FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto)
     /// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm
@@ -207,6 +208,8 @@ features! {
     /// FEAT_SME2 (SME Version 2)
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2p1: "sme2p1";
     /// FEAT_SME2p1 (SME Version 2.1)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_b16b16: "sme-b16b16";
+    /// FEAT_SME_B16B16
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f16f16: "sme-f16f16";
     /// FEAT_SME_F16F16 (Non-widening half-precision FP16 to FP16 arithmetic for SME2)
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f64f64: "sme-f64f64";
@@ -238,7 +241,7 @@ features! {
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes";
     /// FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto)
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve_b16b16: "sve-b16b16";
-    /// FEAT_SVE_B16B16 (SVE or SME Instructions)
+    /// FEAT_SVE_B16B16 (SVE or SME Z-targeting instructions)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm";
     /// FEAT_SVE_BitPerm (SVE2 bit permutation instructions)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3";
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index e2de39ba0f2..b4c7bff0baf 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -469,11 +469,7 @@ impl AtHwcap {
                 self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2,
             );
             enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2);
-            // SVE_B16B16 can be implemented either for SVE or SME
-            enable_feature(
-                Feature::sve_b16b16,
-                self.bf16 && (self.sveb16b16 || self.smeb16b16),
-            );
+            enable_feature(Feature::sve_b16b16, self.bf16 && self.sveb16b16);
             enable_feature(Feature::hbc, self.hbc);
             enable_feature(Feature::mops, self.mops);
             enable_feature(Feature::ecv, self.ecv);
@@ -497,6 +493,10 @@ impl AtHwcap {
             let sme2 = self.sme2 && sme;
             enable_feature(Feature::sme2, sme2);
             enable_feature(Feature::sme2p1, self.sme2p1 && sme2);
+            enable_feature(
+                Feature::sme_b16b16,
+                sme2 && self.bf16 && self.sveb16b16 && self.smeb16b16,
+            );
             enable_feature(Feature::sme_f16f16, self.smef16f16 && sme2);
             enable_feature(Feature::sme_lutv2, self.smelutv2);
             let sme_f8f32 = self.smef8f32 && sme2 && fp8;
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 5f4144c1973..dd38493e7ca 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -122,6 +122,7 @@ fn aarch64_linux() {
     println!("fp8dot2: {}", is_aarch64_feature_detected!("fp8dot2"));
     println!("wfxt: {}", is_aarch64_feature_detected!("wfxt"));
     println!("sme: {}", is_aarch64_feature_detected!("sme"));
+    println!("sme-b16b16: {}", is_aarch64_feature_detected!("sme-b16b16"));
     println!("sme-i16i64: {}", is_aarch64_feature_detected!("sme-i16i64"));
     println!("sme-f64f64: {}", is_aarch64_feature_detected!("sme-f64f64"));
     println!("sme-fa64: {}", is_aarch64_feature_detected!("sme-fa64"));
-- 
cgit 1.4.1-3-g733a5


From 0760ed6ca74230c76e6458494c16bb323a0f6be4 Mon Sep 17 00:00:00 2001
From: Yuri Astrakhan <YuriAstrakhan@gmail.com>
Date: Sat, 28 Sep 2024 00:56:02 -0400
Subject: Minor linting

---
 library/stdarch/crates/intrinsic-test/src/format.rs      | 8 +-------
 library/stdarch/crates/intrinsic-test/src/main.rs        | 2 +-
 library/stdarch/crates/intrinsic-test/src/types.rs       | 8 ++++----
 library/stdarch/crates/intrinsic-test/src/values.rs      | 2 +-
 library/stdarch/crates/std_detect/src/detect/cache.rs    | 2 +-
 library/stdarch/crates/stdarch-gen-loongarch/src/main.rs | 4 ++--
 library/stdarch/crates/stdarch-test/src/disassembly.rs   | 2 +-
 library/stdarch/crates/stdarch-test/src/lib.rs           | 5 +----
 library/stdarch/crates/stdarch-verify/src/lib.rs         | 2 +-
 9 files changed, 13 insertions(+), 22 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/intrinsic-test/src/format.rs b/library/stdarch/crates/intrinsic-test/src/format.rs
index fd185fb7485..9ee331d7f7a 100644
--- a/library/stdarch/crates/intrinsic-test/src/format.rs
+++ b/library/stdarch/crates/intrinsic-test/src/format.rs
@@ -3,15 +3,9 @@
 //! We don't need perfect formatting for the generated tests, but simple indentation can make
 //! debugging a lot easier.
 
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, Default)]
 pub struct Indentation(u32);
 
-impl std::default::Default for Indentation {
-    fn default() -> Self {
-        Self(0)
-    }
-}
-
 impl Indentation {
     pub fn nested(self) -> Self {
         Self(self.0 + 1)
diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs
index eefb5f921a9..58966d230c0 100644
--- a/library/stdarch/crates/intrinsic-test/src/main.rs
+++ b/library/stdarch/crates/intrinsic-test/src/main.rs
@@ -402,7 +402,7 @@ fn main() {
     let args: Cli = clap::Parser::parse();
 
     let filename = args.input;
-    let c_runner = args.runner.unwrap_or_else(String::new);
+    let c_runner = args.runner.unwrap_or_default();
     let skip = if let Some(filename) = args.skip {
         let data = std::fs::read_to_string(&filename).expect("Failed to open file");
         data.lines()
diff --git a/library/stdarch/crates/intrinsic-test/src/types.rs b/library/stdarch/crates/intrinsic-test/src/types.rs
index a3db342ae2a..594fe4d2d20 100644
--- a/library/stdarch/crates/intrinsic-test/src/types.rs
+++ b/library/stdarch/crates/intrinsic-test/src/types.rs
@@ -267,9 +267,9 @@ impl IntrinsicType {
         match *self {
             IntrinsicType::Type {
                 kind,
-                bit_len: Some(bit_len),
+                bit_len: Some(8),
                 ..
-            } if bit_len == 8 => match kind {
+            } => match kind {
                 TypeKind::Int => "(int)",
                 TypeKind::UInt => "(unsigned int)",
                 TypeKind::Poly => "(unsigned int)(uint8_t)",
@@ -318,8 +318,8 @@ impl IntrinsicType {
                 ..
             } => {
                 let (prefix, suffix) = match language {
-                    &Language::Rust => ("[", "]"),
-                    &Language::C => ("{", "}"),
+                    Language::Rust => ("[", "]"),
+                    Language::C => ("{", "}"),
                 };
                 let body_indentation = indentation.nested();
                 format!(
diff --git a/library/stdarch/crates/intrinsic-test/src/values.rs b/library/stdarch/crates/intrinsic-test/src/values.rs
index 1a69a034c86..1b614a742ef 100644
--- a/library/stdarch/crates/intrinsic-test/src/values.rs
+++ b/library/stdarch/crates/intrinsic-test/src/values.rs
@@ -7,7 +7,7 @@ pub fn value_for_array(bits: u32, index: u32) -> u64 {
         8 => VALUES_8[index % VALUES_8.len()].into(),
         16 => VALUES_16[index % VALUES_16.len()].into(),
         32 => VALUES_32[index % VALUES_32.len()].into(),
-        64 => VALUES_64[index % VALUES_64.len()].into(),
+        64 => VALUES_64[index % VALUES_64.len()],
         _ => unimplemented!("value_for_array(bits: {bits}, ..)"),
     }
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 643e4da1948..cee07b0ec7e 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -179,7 +179,7 @@ cfg_if::cfg_if! {
 fn do_initialize(value: Initializer) {
     CACHE[0].initialize((value.0) as usize & Cache::MASK);
     CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK);
-    CACHE[2].initialize((value.0 >> 2 * Cache::CAPACITY) as usize & Cache::MASK);
+    CACHE[2].initialize((value.0 >> (2 * Cache::CAPACITY)) as usize & Cache::MASK);
 }
 
 // We only have to detect features once, and it's fairly costly, so hint to LLVM
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs b/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs
index 47bc9a10746..cd37c025b50 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs
+++ b/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs
@@ -296,7 +296,7 @@ fn gen_bind_body(
                 ),
                 _ => panic!("unsupported parameter number"),
             };
-            format!("fn __{}{} {};", current_name, fn_inputs, fn_output)
+            format!("fn __{current_name}{fn_inputs} {fn_output};")
         };
         let function = format!(
             r#"    #[link_name = "llvm.loongarch.{}"]
@@ -448,7 +448,7 @@ fn gen_bind_body(
             };
             rustc_legacy_const_generics = "rustc_legacy_const_generics(2, 3)";
         }
-        format!("pub unsafe fn {}{} {}", current_name, fn_inputs, fn_output)
+        format!("pub unsafe fn {current_name}{fn_inputs} {fn_output}")
     };
     let mut call_params = {
         match para_num {
diff --git a/library/stdarch/crates/stdarch-test/src/disassembly.rs b/library/stdarch/crates/stdarch-test/src/disassembly.rs
index cb6c008a71b..a7b46a73fc3 100644
--- a/library/stdarch/crates/stdarch-test/src/disassembly.rs
+++ b/library/stdarch/crates/stdarch-test/src/disassembly.rs
@@ -169,7 +169,7 @@ fn parse(output: &str) -> HashSet<Function> {
                     }
                 }
                 match (parts.first(), parts.last()) {
-                    (Some(instr), Some(last_arg)) if is_shll(&instr) && last_arg == "#0" => {
+                    (Some(instr), Some(last_arg)) if is_shll(instr) && last_arg == "#0" => {
                         assert_eq!(parts.len(), 4);
                         let mut new_parts = Vec::with_capacity(3);
                         let new_instr = format!("{}{}{}", &instr[..1], "xtl", &instr[5..]);
diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs
index a2835e3b0c4..3248de8f3a4 100644
--- a/library/stdarch/crates/stdarch-test/src/lib.rs
+++ b/library/stdarch/crates/stdarch-test/src/lib.rs
@@ -169,10 +169,7 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
     }
 
     if !found {
-        panic!(
-            "failed to find instruction `{}` in the disassembly",
-            expected
-        );
+        panic!("failed to find instruction `{expected}` in the disassembly");
     } else if !probably_only_one_instruction {
         panic!(
             "instruction found, but the disassembly contains too many \
diff --git a/library/stdarch/crates/stdarch-verify/src/lib.rs b/library/stdarch/crates/stdarch-verify/src/lib.rs
index 1afb556aaba..5e4db96a70d 100644
--- a/library/stdarch/crates/stdarch-verify/src/lib.rs
+++ b/library/stdarch/crates/stdarch-verify/src/lib.rs
@@ -535,7 +535,7 @@ fn find_doc(attrs: &[syn::Attribute]) -> String {
                     }
                 }
             }
-            return None;
+            None
         })
         .collect()
 }
-- 
cgit 1.4.1-3-g733a5


From 168479a4a5e86a770750c1171e764a5a51b2e2de Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski@arm.com>
Date: Wed, 16 Oct 2024 13:47:47 +0100
Subject: std_detect: Add pauth-lr aarch64 target feature

Add feature detection for aarch64 FEAT_PAuth_LR.
There is currently no Linux cpuinfo support so the OS-specific lines are
commented out.
---
 library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs     | 3 +++
 library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs | 4 ++++
 library/stdarch/crates/std_detect/tests/cpu-detection.rs         | 1 +
 3 files changed, 8 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 6002291d56c..925b73eccbe 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -51,6 +51,7 @@ features! {
     /// * `"mte"` - FEAT_MTE & FEAT_MTE2
     /// * `"paca"` - FEAT_PAuth (address authentication)
     /// * `"pacg"` - FEAT_Pauth (generic authentication)
+    /// * `"pauth-lr"` - FEAT_PAuth_LR
     /// * `"pmull"` - FEAT_PMULL
     /// * `"rand"` - FEAT_RNG
     /// * `"rcpc"` - FEAT_LRCPC
@@ -184,6 +185,8 @@ features! {
     /// FEAT_PAuth (address authentication)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pacg: "pacg";
     /// FEAT_PAuth (generic authentication)
+    @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] pauth_lr: "pauth-lr";
+    /// FEAT_PAuth_LR
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rand: "rand";
     /// FEAT_RNG (Random Number Generator)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc";
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index b4c7bff0baf..27816c09e69 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -140,6 +140,7 @@ struct AtHwcap {
     smesf8fma: bool,
     smesf8dp4: bool,
     smesf8dp2: bool,
+    // pauthlr: bool,
 }
 
 impl From<auxvec::AuxVec> for AtHwcap {
@@ -243,6 +244,7 @@ impl From<auxvec::AuxVec> for AtHwcap {
             smesf8fma: bit::test(auxv.hwcap2, 60),
             smesf8dp4: bit::test(auxv.hwcap2, 61),
             smesf8dp2: bit::test(auxv.hwcap2, 62),
+            // pauthlr: bit::test(auxv.hwcap2, ??),
         }
     }
 }
@@ -353,6 +355,7 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
             smesf8fma: f.has("smesf8fma"),
             smesf8dp4: f.has("smesf8dp4"),
             smesf8dp2: f.has("smesf8dp2"),
+            // pauthlr: f.has("pauthlr"),
         }
     }
 }
@@ -413,6 +416,7 @@ impl AtHwcap {
             enable_feature(Feature::sb, self.sb);
             enable_feature(Feature::paca, self.paca);
             enable_feature(Feature::pacg, self.pacg);
+            // enable_feature(Feature::pauth_lr, self.pauthlr);
             enable_feature(Feature::dpb, self.dcpop);
             enable_feature(Feature::dpb2, self.dcpodp);
             enable_feature(Feature::rand, self.rng);
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index dd38493e7ca..6ed612a5960 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -83,6 +83,7 @@ fn aarch64_linux() {
     println!("sb: {}", is_aarch64_feature_detected!("sb"));
     println!("paca: {}", is_aarch64_feature_detected!("paca"));
     println!("pacg: {}", is_aarch64_feature_detected!("pacg"));
+    // println!("pauth-lr: {}", is_aarch64_feature_detected!("pauth-lr"));
     println!("dpb: {}", is_aarch64_feature_detected!("dpb"));
     println!("dpb2: {}", is_aarch64_feature_detected!("dpb2"));
     println!("sve-b16b16: {}", is_aarch64_feature_detected!("sve-b16b16"));
-- 
cgit 1.4.1-3-g733a5


From a6a49cfd9013acdbe55b92cdd9f886bc3c29e4ca Mon Sep 17 00:00:00 2001
From: Urgau <urgau@numericable.fr>
Date: Tue, 5 Nov 2024 20:31:40 +0100
Subject: Add ability to declare a feature without cfg checking

This is necessary to avoid `unexpected_cfgs` warnings for unexpected/
missing target features, in user code.
---
 library/stdarch/crates/std_detect/src/detect/macros.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 5321ef2252e..f2cc711812a 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -9,6 +9,9 @@ macro_rules! detect_feature {
         $(cfg!(target_feature = $target_feature_lit) ||)*
             $crate::detect::__is_feature_detected::$feature()
     };
+    ($feature:tt, $feature_lit:tt, without cfg check: true) => {
+        $crate::detect::__is_feature_detected::$feature()
+    };
 }
 
 #[allow(unused)]
@@ -21,6 +24,7 @@ macro_rules! features {
       $(@BIND_FEATURE_NAME: $bind_feature:tt; $feature_impl:tt; $(#[$deprecate_attr:meta];)?)*
       $(@NO_RUNTIME_DETECTION: $nort_feature:tt; )*
       $(@FEATURE: #[$stability_attr:meta] $feature:ident: $feature_lit:tt;
+          $(without cfg check: $feature_cfg_check:literal;)?
           $(implied by target_features: [$($target_feature_lit:tt),*];)?
           $(#[$feature_comment:meta])*)*
     ) => {
@@ -32,7 +36,7 @@ macro_rules! features {
         macro_rules! $macro_name {
             $(
                 ($feature_lit) => {
-                    $crate::detect_feature!($feature, $feature_lit $(: $($target_feature_lit),*)?)
+                    $crate::detect_feature!($feature, $feature_lit $(, without cfg check: $feature_cfg_check)? $(: $($target_feature_lit),*)?)
                 };
             )*
             $(
-- 
cgit 1.4.1-3-g733a5


From f62e1daa2dfecc6fc3b20a24bc39a8c7fe84080a Mon Sep 17 00:00:00 2001
From: Urgau <urgau@numericable.fr>
Date: Tue, 5 Nov 2024 20:38:57 +0100
Subject: Mark feature with missing corresponding target feature cfgs as such

Computed by diffing of:
$ rg "[ ]+@FEATURE: .*: \"(.*)\";" -r '$1' --no-filename \
  crates/std_detect/src/detect/ | sort | uniq

With (from the main Rust repo[^1]):
$ rg "target_feature" tests/ui/check-cfg/well-known-values.stderr

[^1]: https://github.com/rust-lang/rust/blob/e8c698bb3bdc121ac7f65919bd16d22f6567a3f1/tests/ui/check-cfg/well-known-values.stderr#L177
---
 .../crates/std_detect/src/detect/arch/aarch64.rs      |  1 +
 .../stdarch/crates/std_detect/src/detect/arch/arm.rs  |  1 +
 .../crates/std_detect/src/detect/arch/powerpc.rs      |  1 +
 .../crates/std_detect/src/detect/arch/powerpc64.rs    |  1 +
 .../crates/std_detect/src/detect/arch/riscv.rs        | 19 +++++++++++++++++++
 .../stdarch/crates/std_detect/src/detect/arch/x86.rs  |  4 ++++
 6 files changed, 27 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
index 925b73eccbe..13570a25c1c 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs
@@ -160,6 +160,7 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8fma: "fp8fma";
     /// FEAT_FP8FMA (F8FMA Instructions)
     @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fpmr: "fpmr";
+    without cfg check: true;
     /// FEAT_FPMR (Special-purpose AArch64-FPMR register)
     @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] frintts: "frintts";
     /// FEAT_FRINTTS (float to integer rounding instructions)
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
index 50e77fde59a..c3c8883ce31 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs
@@ -14,6 +14,7 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] neon: "neon";
     /// ARM Advanced SIMD (NEON) - Aarch32
     @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] pmull: "pmull";
+    without cfg check: true;
     /// Polynomial Multiply
     @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] crc: "crc";
     /// CRC32 (Cyclic Redundancy Check)
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
index fc2ac8963f0..85ec101f188 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
@@ -12,5 +12,6 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] vsx: "vsx";
     /// VSX
     @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8: "power8";
+    without cfg check: true;
     /// Power8
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
index 579bdc50ca8..813429c40c6 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
@@ -12,5 +12,6 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] vsx: "vsx";
     /// VSX
     @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8: "power8";
+    without cfg check: true;
     /// Power8
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index ea6a5e65b49..0980406a7ac 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -90,28 +90,36 @@ features! {
     /// [ISA manual]: https://github.com/riscv/riscv-isa-manual/
     #[stable(feature = "riscv_ratified", since = "1.76.0")]
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32i: "rv32i";
+    without cfg check: true;
     /// RV32I Base Integer Instruction Set
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zifencei: "zifencei";
+    without cfg check: true;
     /// "Zifencei" Instruction-Fetch Fence
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintpause: "zihintpause";
+    without cfg check: true;
     /// "Zihintpause" Pause Hint
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv64i: "rv64i";
+    without cfg check: true;
     /// RV64I Base Integer Instruction Set
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] m: "m";
     /// "M" Standard Extension for Integer Multiplication and Division
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] a: "a";
     /// "A" Standard Extension for Atomic Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicsr: "zicsr";
+    without cfg check: true;
     /// "Zicsr", Control and Status Register (CSR) Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicntr: "zicntr";
+    without cfg check: true;
     /// "Zicntr", Standard Extension for Base Counters and Timers
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihpm: "zihpm";
+    without cfg check: true;
     /// "Zihpm", Standard Extension for Hardware Performance Counters
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] f: "f";
     /// "F" Standard Extension for Single-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] d: "d";
     /// "D" Standard Extension for Double-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] q: "q";
+    without cfg check: true;
     /// "Q" Standard Extension for Quad-Precision Floating-Point
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] c: "c";
     /// "C" Standard Extension for Compressed Instructions
@@ -125,34 +133,45 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zhinxmin: "zhinxmin";
     /// "Zhinxmin" Standard Extension for Minimal Half-Precision Floating-Point in Integer Registers
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] ztso: "ztso";
+    without cfg check: true;
     /// "Ztso" Standard Extension for Total Store Ordering
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32e: "rv32e";
+    without cfg check: true;
     /// RV32E Base Integer Instruction Set
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv128i: "rv128i";
+    without cfg check: true;
     /// RV128I Base Integer Instruction Set
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfh: "zfh";
     /// "Zfh" Standard Extension for 16-Bit Half-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfhmin: "zfhmin";
     /// "Zfhmin" Standard Extension for Minimal Half-Precision Floating-Point Support
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] j: "j";
+    without cfg check: true;
     /// "J" Standard Extension for Dynamically Translated Languages
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] p: "p";
+    without cfg check: true;
     /// "P" Standard Extension for Packed-SIMD Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] v: "v";
     /// "V" Standard Extension for Vector Operations
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zam: "zam";
+    without cfg check: true;
     /// "Zam" Standard Extension for Misaligned Atomics
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] s: "s";
+    without cfg check: true;
     /// Supervisor-Level ISA
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svnapot: "svnapot";
+    without cfg check: true;
     /// "Svnapot" Standard Extension for NAPOT Translation Contiguity
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svpbmt: "svpbmt";
+    without cfg check: true;
     /// "Svpbmt" Standard Extension for Page-Based Memory Types
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svinval: "svinval";
+    without cfg check: true;
     /// "Svinval" Standard Extension for Fine-Grained Address-Translation Cache Invalidation
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] h: "h";
+    without cfg check: true;
     /// Hypervisor Extension
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zba: "zba";
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 2890c7ee2f6..891fc427db9 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -123,8 +123,10 @@ features! {
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rdseed: "rdseed";
     /// RDSEED
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] tsc: "tsc";
+    without cfg check: true;
     /// TSC (Time Stamp Counter)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] mmx: "mmx";
+    without cfg check: true;
     /// MMX (MultiMedia eXtensions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse: "sse";
     /// SSE (Streaming SIMD Extensions)
@@ -157,8 +159,10 @@ features! {
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512cd: "avx512cd" ;
     /// AVX-512 CD (Conflict Detection Instructions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512er: "avx512er";
+    without cfg check: true;
     /// AVX-512 ER (Expo nential and Reciprocal Instructions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512pf: "avx512pf";
+    without cfg check: true;
     /// AVX-512 PF (Prefetch Instructions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bw: "avx512bw";
     /// AVX-512 BW (Byte and Word Instructions)
-- 
cgit 1.4.1-3-g733a5


From 7956142f9288cb3117b2b7a6924f9106fe57dace Mon Sep 17 00:00:00 2001
From: Urgau <urgau@numericable.fr>
Date: Thu, 7 Nov 2024 00:28:43 +0100
Subject: Add compile-time tests against unexpected target features cfgs

---
 .../stdarch/crates/std_detect/src/detect/macros.rs | 24 ++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index f2cc711812a..38c8b8068fb 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -14,6 +14,21 @@ macro_rules! detect_feature {
     };
 }
 
+#[allow(unused_macros, reason = "it's used in the features! macro below")]
+macro_rules! check_cfg_feature {
+    ($feature:tt, $feature_lit:tt) => {
+        check_cfg_feature!($feature, $feature_lit : $feature_lit)
+    };
+    ($feature:tt, $feature_lit:tt : $($target_feature_lit:tt),*) => {
+        $(cfg!(target_feature = $target_feature_lit);)*
+    };
+    ($feature:tt, $feature_lit:tt, without cfg check: $feature_cfg_check:literal) => {
+        // FIXME: Enable once rust-lang/rust#132577 hit's nightly
+        // #[expect(unexpected_cfgs, reason = $feature_lit)]
+        // { cfg!(target_feature = $feature_lit) }
+    };
+}
+
 #[allow(unused)]
 macro_rules! features {
     (
@@ -117,6 +132,15 @@ macro_rules! features {
             };
         }
 
+        #[test]
+        #[deny(unexpected_cfgs)]
+        #[deny(unfulfilled_lint_expectations)]
+        fn unexpected_cfgs() {
+            $(
+                check_cfg_feature!($feature, $feature_lit $(, without cfg check: $feature_cfg_check)? $(: $($target_feature_lit),*)?);
+            )*
+        }
+
         /// Each variant denotes a position in a bitset for a particular feature.
         ///
         /// PLEASE: do not use this, it is an implementation detail subject
-- 
cgit 1.4.1-3-g733a5


From 91c0dabca3090b3a9aa54b5976b7cc610d6f12fd Mon Sep 17 00:00:00 2001
From: Urgau <urgau@numericable.fr>
Date: Mon, 25 Nov 2024 22:16:09 +0100
Subject: Enable `without cfg check` test in `std_detect`

---
 library/stdarch/crates/std_detect/src/detect/macros.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 38c8b8068fb..23b238d1bc2 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -23,9 +23,8 @@ macro_rules! check_cfg_feature {
         $(cfg!(target_feature = $target_feature_lit);)*
     };
     ($feature:tt, $feature_lit:tt, without cfg check: $feature_cfg_check:literal) => {
-        // FIXME: Enable once rust-lang/rust#132577 hit's nightly
-        // #[expect(unexpected_cfgs, reason = $feature_lit)]
-        // { cfg!(target_feature = $feature_lit) }
+        #[expect(unexpected_cfgs, reason = $feature_lit)]
+        { cfg!(target_feature = $feature_lit) }
     };
 }
 
-- 
cgit 1.4.1-3-g733a5


From ddf10db1a344e091ccdb0d38353b644f18f51fd9 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Mon, 25 Nov 2024 19:06:28 +1100
Subject: Fix the `features` macro.

The first rule of the `features` macro looks like this:
```
macro_rules! features {
    (
      @TARGET: $target:ident;
      @CFG: $cfg:meta;
      @MACRO_NAME: $macro_name:ident;
      @MACRO_ATTRS: $(#[$macro_attrs:meta])*
      $(@BIND_FEATURE_NAME: $bind_feature:tt; $feature_impl:tt; $(#[$deprecate_attr:meta];)?)*
      $(@NO_RUNTIME_DETECTION: $nort_feature:tt; )*
      $(@FEATURE: #[$stability_attr:meta] $feature:ident: $feature_lit:tt;
          $(without cfg check: $feature_cfg_check:literal;)?
          $(implied by target_features: [$($target_feature_lit:tt),*];)?
          $(#[$feature_comment:meta])*)*
    ) => {
```
Notice all the `tt` specifiers. They are used because they are forwarded
to another macro. Only `ident`, `lifetime`, and `tt` specifiers can be
forwarded this way.

But there is an exception: `$feature_lit:tt`, which was added recently.
In theory it should cause an error like this:
```
error: no rules expected `literal` metavariable
   --> /home/njn/dev/rust3/library/stdarch/crates/std_detect/src/detect/macros.rs:54:91
    |
51  | /         macro_rules! $macro_name {
52  | |             $(
53  | |                 ($feature_lit) => {
54  | |                     $crate::detect_feature!($feature, $feature_lit $(, without cfg check: $feature_cfg_check)? ...
    | |                                                                                           ^^^^^^^^^^^^^^^^^^ no rules expected this token in macro call
...   |
88  | |             };
89  | |         }
    | |_________- in this expansion of `is_x86_feature_detected!`
    |
   ::: std/tests/run-time-detect.rs:145:27
    |
145 |       println!("tsc: {:?}", is_x86_feature_detected!("tsc"));
    |                             ------------------------------- in this macro invocation
    |
note: while trying to match keyword `true`
   --> /home/njn/dev/rust3/library/stdarch/crates/std_detect/src/detect/macros.rs:12:55
    |
12  |     ($feature:tt, $feature_lit:tt, without cfg check: true) => {
    |                                                       ^^^^
    = note: captured metavariables except for `:tt`, `:ident` and `:lifetime` cannot be compared to other tokens
    = note: see <https://doc.rust-lang.org/nightly/reference/macros-by-example.html#forwarding-a-matched-fragment> for more information
```
(The URL at the end of the error has more details about this forwarding
limitation.)

In practice it doesn't cause this error. I'm not sure why, but the
existing macro implementation in rustc is far from perfect, so it's
believable that it does the wrong thing here.

Why does this matter? Because https://github.com/rust-lang/rust/pull/124141
is modifying the macro implementation, and when that PR is applied the
error *does* occur. (It's one of several cases I have found where the
existing compiler accepts code it shouldn't, but #124141 causes that
code to be rejected.)

Fortunately the fix is simple: replace the `literal` specifier with `tt`.
---
 library/stdarch/crates/std_detect/src/detect/macros.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 23b238d1bc2..6ce842b31eb 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -38,7 +38,7 @@ macro_rules! features {
       $(@BIND_FEATURE_NAME: $bind_feature:tt; $feature_impl:tt; $(#[$deprecate_attr:meta];)?)*
       $(@NO_RUNTIME_DETECTION: $nort_feature:tt; )*
       $(@FEATURE: #[$stability_attr:meta] $feature:ident: $feature_lit:tt;
-          $(without cfg check: $feature_cfg_check:literal;)?
+          $(without cfg check: $feature_cfg_check:tt;)?
           $(implied by target_features: [$($target_feature_lit:tt),*];)?
           $(#[$feature_comment:meta])*)*
     ) => {
-- 
cgit 1.4.1-3-g733a5


From dbdcef3f3c3e68b24aff7b1521bcf231b9b38027 Mon Sep 17 00:00:00 2001
From: Laine Taffin Altman <alexanderaltman@me.com>
Date: Sat, 21 Dec 2024 20:48:10 -0800
Subject: Expand feature detection on AArch64 Darwin

This reflects the currently available set of sysctl values as of macOS 15, on 2024-12-21.  Features not (yet) exposed by `is_aarch64_feature_detected` have been left in comments to document their existence for the future.
---
 .../std_detect/src/detect/os/darwin/aarch64.rs     | 122 ++++++++++++++-------
 1 file changed, 84 insertions(+), 38 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
index 0a822353ca6..aca10e90b5a 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
@@ -39,63 +39,109 @@ pub(crate) fn detect_features() -> cache::Initializer {
         }
     };
 
-    let asimd = _sysctlbyname(c"hw.optional.AdvSIMD");
-    let pmull = _sysctlbyname(c"hw.optional.arm.FEAT_PMULL");
+    // Armv8.0 features not using the standard identifiers
     let fp = _sysctlbyname(c"hw.optional.floatingpoint");
-    let fp16 = _sysctlbyname(c"hw.optional.arm.FEAT_FP16");
+    let asimd = _sysctlbyname(c"hw.optional.AdvSIMD");
     let crc = _sysctlbyname(c"hw.optional.armv8_crc32");
-    let lse = _sysctlbyname(c"hw.optional.arm.FEAT_LSE");
-    let lse2 = _sysctlbyname(c"hw.optional.arm.FEAT_LSE2");
-    let rdm = _sysctlbyname(c"hw.optional.arm.FEAT_RDM");
-    let rcpc = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC");
-    let rcpc2 = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC2");
-    let dotprod = _sysctlbyname(c"hw.optional.arm.FEAT_DotProd");
-    let fhm = _sysctlbyname(c"hw.optional.arm.FEAT_FHM");
-    let flagm = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM");
-    let ssbs = _sysctlbyname(c"hw.optional.arm.FEAT_SSBS");
-    let sb = _sysctlbyname(c"hw.optional.arm.FEAT_SB");
-    let paca = _sysctlbyname(c"hw.optional.arm.FEAT_PAuth");
+
+    // Armv8 and Armv9 features using the standard identifiers
+    let aes = _sysctlbyname(c"hw.optional.arm.FEAT_AES");
+    let bf16 = _sysctlbyname(c"hw.optional.arm.FEAT_BF16");
+    let bti = _sysctlbyname(c"hw.optional.arm.FEAT_BTI");
+    let dit = _sysctlbyname(c"hw.optional.arm.FEAT_DIT");
     let dpb = _sysctlbyname(c"hw.optional.arm.FEAT_DPB");
     let dpb2 = _sysctlbyname(c"hw.optional.arm.FEAT_DPB2");
+    let dotprod = _sysctlbyname(c"hw.optional.arm.FEAT_DotProd");
+    let ecv = _sysctlbyname(c"hw.optional.arm.FEAT_ECV");
+    let fcma = _sysctlbyname(c"hw.optional.arm.FEAT_FCMA");
+    let fhm = _sysctlbyname(c"hw.optional.arm.FEAT_FHM");
+    let fp16 = _sysctlbyname(c"hw.optional.arm.FEAT_FP16");
     let frintts = _sysctlbyname(c"hw.optional.arm.FEAT_FRINTTS");
+    let flagm = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM");
+    let flagm2 = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM2");
     let i8mm = _sysctlbyname(c"hw.optional.arm.FEAT_I8MM");
-    let bf16 = _sysctlbyname(c"hw.optional.arm.FEAT_BF16");
-    let bti = _sysctlbyname(c"hw.optional.arm.FEAT_BTI");
-    let fcma = _sysctlbyname(c"hw.optional.arm.FEAT_FCMA");
-    let aes = _sysctlbyname(c"hw.optional.arm.FEAT_AES");
+    let jsconv = _sysctlbyname(c"hw.optional.arm.FEAT_JSCVT");
+    let rcpc = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC");
+    let rcpc2 = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC2");
+    let lse = _sysctlbyname(c"hw.optional.arm.FEAT_LSE");
+    let lse2 = _sysctlbyname(c"hw.optional.arm.FEAT_LSE2");
+    let pauth = _sysctlbyname(c"hw.optional.arm.FEAT_PAuth");
+    let pmull = _sysctlbyname(c"hw.optional.arm.FEAT_PMULL");
+    let rdm = _sysctlbyname(c"hw.optional.arm.FEAT_RDM");
+    let sb = _sysctlbyname(c"hw.optional.arm.FEAT_SB");
     let sha1 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA1");
-    let sha2 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA256");
+    let sha256 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA256");
     let sha3 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA3");
     let sha512 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA512");
-    let jsconv = _sysctlbyname(c"hw.optional.arm.FEAT_JSCVT");
+    let sme = _sysctlbyname(c"hw.optional.arm.FEAT_SME");
+    let sme2 = _sysctlbyname(c"hw.optional.arm.FEAT_SME2");
+    let sme_f64f64 = _sysctlbyname(c"hw.optional.arm.FEAT_SME_F64F64");
+    let sme_i16i64 = _sysctlbyname(c"hw.optional.arm.FEAT_SME_I16I64");
+    let ssbs = _sysctlbyname(c"hw.optional.arm.FEAT_SSBS");
+    let wfxt = _sysctlbyname(c"hw.optional.arm.FEAT_WFxT");
+
+    // The following features are not exposed by `is_aarch64_feature_detected`,
+    // but *are* reported by `sysctl`. They are here as documentation that they
+    // exist, and may potentially be exposed later.
+    /*
+    let afp = _sysctlbyname(c"hw.optional.arm.FEAT_AFP");
+    let csv2 = _sysctlbyname(c"hw.optional.arm.FEAT_CSV2");
+    let csv3 = _sysctlbyname(c"hw.optional.arm.FEAT_CSV3");
+    let fpac = _sysctlbyname(c"hw.optional.arm.FEAT_FPAC");
+    let pauth2 = _sysctlbyname(c"hw.optional.arm.FEAT_PAuth2");
+    let rpres = _sysctlbyname(c"hw.optional.arm.FEAT_RPRES");
+    let specres = _sysctlbyname(c"hw.optional.arm.FEAT_SPECRES");
+     */
 
+    // The following "features" are reported by `sysctl` but are mandatory parts
+    // of SME or SME2, and so are not exposed separately by
+    // `is_aarch64_feature_detected`.  They are here to document their
+    // existence, in case they're needed in the future.
+    /*
+    let sme_b16f32 = _sysctlbyname(c"hw.optional.arm.SME_B16F32");
+    let sme_bi32i32 = _sysctlbyname(c"hw.optional.arm.SME_BI32I32");
+    let sme_f16f32 = _sysctlbyname(c"hw.optional.arm.SME_F16F32");
+    let sme_f32f32 = _sysctlbyname(c"hw.optional.arm.SME_F32F32");
+    let sme_i16i32 = _sysctlbyname(c"hw.optional.arm.SME_I16I32");
+    let sme_i8i32 = _sysctlbyname(c"hw.optional.arm.SME_I8I32");
+     */
+
+    enable_feature(Feature::aes, aes && pmull);
     enable_feature(Feature::asimd, asimd);
-    enable_feature(Feature::pmull, pmull);
-    enable_feature(Feature::fp, fp);
-    enable_feature(Feature::fp16, fp16);
+    enable_feature(Feature::bf16, bf16);
+    enable_feature(Feature::bti, bti);
     enable_feature(Feature::crc, crc);
-    enable_feature(Feature::lse, lse);
-    enable_feature(Feature::lse2, lse2);
-    enable_feature(Feature::rdm, rdm);
-    enable_feature(Feature::rcpc, rcpc);
-    enable_feature(Feature::rcpc2, rcpc2);
+    enable_feature(Feature::dit, dit);
     enable_feature(Feature::dotprod, dotprod);
-    enable_feature(Feature::fhm, fhm);
-    enable_feature(Feature::flagm, flagm);
-    enable_feature(Feature::ssbs, ssbs);
-    enable_feature(Feature::sb, sb);
-    enable_feature(Feature::paca, paca);
     enable_feature(Feature::dpb, dpb);
     enable_feature(Feature::dpb2, dpb2);
+    enable_feature(Feature::ecv, ecv);
+    enable_feature(Feature::fcma, fcma);
+    enable_feature(Feature::fhm, fhm);
+    enable_feature(Feature::flagm, flagm);
+    enable_feature(Feature::flagm2, flagm2);
+    enable_feature(Feature::fp, fp);
+    enable_feature(Feature::fp16, fp16);
     enable_feature(Feature::frintts, frintts);
     enable_feature(Feature::i8mm, i8mm);
-    enable_feature(Feature::bf16, bf16);
-    enable_feature(Feature::bti, bti);
-    enable_feature(Feature::fcma, fcma);
-    enable_feature(Feature::aes, aes && pmull);
     enable_feature(Feature::jsconv, jsconv);
-    enable_feature(Feature::sha2, sha1 && sha2 && asimd);
+    enable_feature(Feature::lse, lse);
+    enable_feature(Feature::lse2, lse2);
+    enable_feature(Feature::paca, pauth);
+    enable_feature(Feature::pacg, pauth);
+    enable_feature(Feature::pmull, aes && pmull);
+    enable_feature(Feature::rcpc, rcpc);
+    enable_feature(Feature::rcpc2, rcpc2);
+    enable_feature(Feature::rdm, rdm);
+    enable_feature(Feature::sb, sb);
+    enable_feature(Feature::sha2, sha1 && sha256 && asimd);
     enable_feature(Feature::sha3, sha512 && sha3 && asimd);
+    enable_feature(Feature::sme, sme);
+    enable_feature(Feature::sme2, sme2);
+    enable_feature(Feature::sme_f64f64, sme_f64f64);
+    enable_feature(Feature::sme_i16i64, sme_i16i64);
+    enable_feature(Feature::ssbs, ssbs);
+    enable_feature(Feature::wfxt, wfxt);
 
     value
 }
-- 
cgit 1.4.1-3-g733a5


From b5babcfac28a61f7308e02333a195ad7e4249af8 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sun, 12 Jan 2025 17:35:47 +0100
Subject: add `is_s390x_feature_detected`

---
 library/stdarch/crates/std_detect/README.md        |  8 +-
 .../crates/std_detect/src/detect/arch/mod.rs       |  5 ++
 .../crates/std_detect/src/detect/arch/s390x.rs     | 12 +++
 .../stdarch/crates/std_detect/src/detect/mod.rs    |  1 +
 .../std_detect/src/detect/os/linux/auxvec.rs       | 24 ++++--
 .../crates/std_detect/src/detect/os/linux/mod.rs   |  3 +
 .../crates/std_detect/src/detect/os/linux/s390x.rs | 95 ++++++++++++++++++++++
 library/stdarch/crates/std_detect/src/lib.rs       |  1 +
 .../crates/std_detect/tests/cpu-detection.rs       | 10 ++-
 .../std_detect/tests/macro_trailing_commas.rs      | 14 +++-
 10 files changed, 158 insertions(+), 15 deletions(-)
 create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/s390x.rs
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md
index 04bc6a665e7..b9146d42c9e 100644
--- a/library/stdarch/crates/std_detect/README.md
+++ b/library/stdarch/crates/std_detect/README.md
@@ -5,7 +5,7 @@ The private `std::detect` module implements run-time feature detection in Rust's
 standard library. This allows detecting whether the CPU the binary runs on
 supports certain features, like SIMD instructions.
 
-# Usage 
+# Usage
 
 `std::detect` APIs are available as part of `libstd`. Prefer using it via the
 standard library than through this crate. Unstable features of `std::detect` are
@@ -19,7 +19,7 @@ from the platform.
 You can then manually include `std_detect` as a dependency to get similar
 run-time feature detection support than the one offered by Rust's standard
 library. We intend to make `std_detect` more flexible and configurable in this
-regard to better serve the needs of `#[no_std]` targets. 
+regard to better serve the needs of `#[no_std]` targets.
 
 # Features
 
@@ -53,8 +53,8 @@ crate from working on applications in which `std` is not available.
   [`cupid`](https://crates.io/crates/cupid) crate.
 
 * Linux/Android:
-  * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `riscv{32,64}`, `loongarch64`: `std_detect`
-    supports these on Linux by querying ELF auxiliary vectors (using `getauxval`
+  * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `riscv{32,64}`, `loongarch64`, `s390x`:
+    `std_detect` supports these on Linux by querying ELF auxiliary vectors (using `getauxval`
     when available), and if that fails, by querying `/proc/cpuinfo`.
   * `arm64`: partial support for doing run-time feature detection by directly
     querying `mrs` is implemented for Linux >= 4.11, but not enabled by default.
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
index d0f5fab7491..ff00c202dea 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
@@ -21,6 +21,8 @@ mod mips;
 mod mips64;
 #[macro_use]
 mod loongarch;
+#[macro_use]
+mod s390x;
 
 cfg_if! {
     if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
@@ -50,6 +52,9 @@ cfg_if! {
     } else if #[cfg(target_arch = "loongarch64")] {
         #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")]
         pub use loongarch::*;
+    } else if #[cfg(target_arch = "s390x")] {
+        #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+        pub use s390x::*;
     } else {
         // Unimplemented architecture:
         #[doc(hidden)]
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs b/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs
new file mode 100644
index 00000000000..be62aeb96ac
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs
@@ -0,0 +1,12 @@
+//! Run-time feature detection on s390x.
+
+features! {
+    @TARGET: s390x;
+    @CFG: target_arch = "s390x";
+    @MACRO_NAME: is_s390x_feature_detected;
+    @MACRO_ATTRS:
+    /// Checks if `s390x` feature is enabled.
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector: "vector";
+    /// s390x vector facility
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index ab247a303ed..a61400a5553 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -101,6 +101,7 @@ pub fn features() -> impl Iterator<Item = (&'static str, bool)> {
             target_arch = "mips",
             target_arch = "mips64",
             target_arch = "loongarch64",
+            target_arch = "s390x",
         ))] {
             (0_u8..Feature::_last as u8).map(|discriminant: u8| {
                 #[allow(bindings_with_variant_name)] // RISC-V has Feature::f
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 61b6e2df1ac..79720a0dee0 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -10,7 +10,8 @@ pub(crate) const AT_HWCAP: usize = 16;
     target_arch = "aarch64",
     target_arch = "arm",
     target_arch = "powerpc",
-    target_arch = "powerpc64"
+    target_arch = "powerpc64",
+    target_arch = "s390x",
 ))]
 pub(crate) const AT_HWCAP2: usize = 26;
 
@@ -26,7 +27,8 @@ pub(crate) struct AuxVec {
         target_arch = "aarch64",
         target_arch = "arm",
         target_arch = "powerpc",
-        target_arch = "powerpc64"
+        target_arch = "powerpc64",
+        target_arch = "s390x",
     ))]
     pub hwcap2: usize,
 }
@@ -98,7 +100,8 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
                 target_arch = "aarch64",
                 target_arch = "arm",
                 target_arch = "powerpc",
-                target_arch = "powerpc64"
+                target_arch = "powerpc64",
+                target_arch = "s390x",
             ))]
             {
                 if let Ok(hwcap2) = getauxval(AT_HWCAP2) {
@@ -146,7 +149,8 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
             target_arch = "aarch64",
             target_arch = "arm",
             target_arch = "powerpc",
-            target_arch = "powerpc64"
+            target_arch = "powerpc64",
+            target_arch = "s390x",
         ))]
         {
             let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize };
@@ -242,7 +246,8 @@ fn auxv_from_buf(buf: &[usize]) -> Result<AuxVec, ()> {
         target_arch = "aarch64",
         target_arch = "arm",
         target_arch = "powerpc",
-        target_arch = "powerpc64"
+        target_arch = "powerpc64",
+        target_arch = "s390x",
     ))]
     {
         let mut hwcap = None;
@@ -275,7 +280,8 @@ mod tests {
     #[cfg(any(
         target_arch = "arm",
         target_arch = "powerpc",
-        target_arch = "powerpc64"
+        target_arch = "powerpc64",
+        target_arch = "s390x",
     ))]
     #[test]
     fn auxv_crate() {
@@ -290,7 +296,8 @@ mod tests {
             target_arch = "aarch64",
             target_arch = "arm",
             target_arch = "powerpc",
-            target_arch = "powerpc64"
+            target_arch = "powerpc64",
+            target_arch = "s390x",
         ))]
         {
             if let Ok(hwcap2) = getauxval(AT_HWCAP2) {
@@ -365,7 +372,8 @@ mod tests {
         target_arch = "aarch64",
         target_arch = "arm",
         target_arch = "powerpc",
-        target_arch = "powerpc64"
+        target_arch = "powerpc64",
+        target_arch = "s390x",
     ))]
     #[test]
     #[cfg(feature = "std_detect_file_io")]
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
index 30577385200..fbe6bea93c9 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
@@ -57,6 +57,9 @@ cfg_if::cfg_if! {
     } else if #[cfg(target_arch = "loongarch64")] {
         mod loongarch;
         pub(crate) use self::loongarch::detect_features;
+    } else if #[cfg(target_arch = "s390x")] {
+        mod s390x;
+        pub(crate) use self::s390x::detect_features;
     } else {
         use crate::detect::cache;
         /// Performs run-time feature detection.
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs
new file mode 100644
index 00000000000..91ce55e94e3
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs
@@ -0,0 +1,95 @@
+//! Run-time feature detection for s390x on Linux.
+
+use super::auxvec;
+use crate::detect::{bit, cache, Feature};
+
+/// Try to read the features from the auxiliary vector
+pub(crate) fn detect_features() -> cache::Initializer {
+    if let Ok(auxv) = auxvec::auxv() {
+        let hwcap: AtHwcap = auxv.into();
+        return hwcap.cache();
+    }
+
+    cache::Initializer::default()
+}
+
+/// These values are part of the platform-specific [asm/elf.h][kernel], and are a selection of the
+/// fields found in the [Facility Indications].
+///
+/// [Facility Indications]: https://www.ibm.com/support/pages/sites/default/files/2021-05/SA22-7871-10.pdf#page=63
+/// [kernel]: https://github.com/torvalds/linux/blob/b62cef9a5c673f1b8083159f5dc03c1c5daced2f/arch/s390/include/asm/elf.h#L129
+#[derive(Debug, Default, PartialEq)]
+struct AtHwcap {
+    esan3: bool,
+    zarch: bool,
+    stfle: bool,
+    msa: bool,
+    ldisp: bool,
+    eimm: bool,
+    dfp: bool,
+    hpage: bool,
+    etf3eh: bool,
+    high_gprs: bool,
+    te: bool,
+    vxrs: bool,
+    vxrs_bcd: bool,
+    vxrs_ext: bool,
+    gs: bool,
+    vxrs_ext2: bool,
+    vxrs_pde: bool,
+    sort: bool,
+    dflt: bool,
+    vxrs_pde2: bool,
+    nnpa: bool,
+    pci_mio: bool,
+    sie: bool,
+}
+
+impl From<auxvec::AuxVec> for AtHwcap {
+    /// Reads AtHwcap from the auxiliary vector.
+    fn from(auxv: auxvec::AuxVec) -> Self {
+        AtHwcap {
+            esan3: bit::test(auxv.hwcap, 0),
+            zarch: bit::test(auxv.hwcap, 1),
+            stfle: bit::test(auxv.hwcap, 2),
+            msa: bit::test(auxv.hwcap, 3),
+            ldisp: bit::test(auxv.hwcap, 4),
+            eimm: bit::test(auxv.hwcap, 5),
+            dfp: bit::test(auxv.hwcap, 6),
+            hpage: bit::test(auxv.hwcap, 7),
+            etf3eh: bit::test(auxv.hwcap, 8),
+            high_gprs: bit::test(auxv.hwcap, 9),
+            te: bit::test(auxv.hwcap, 10),
+            vxrs: bit::test(auxv.hwcap, 11),
+            vxrs_bcd: bit::test(auxv.hwcap, 12),
+            vxrs_ext: bit::test(auxv.hwcap, 13),
+            gs: bit::test(auxv.hwcap, 14),
+            vxrs_ext2: bit::test(auxv.hwcap, 15),
+            vxrs_pde: bit::test(auxv.hwcap, 16),
+            sort: bit::test(auxv.hwcap, 17),
+            dflt: bit::test(auxv.hwcap, 18),
+            vxrs_pde2: bit::test(auxv.hwcap, 19),
+            nnpa: bit::test(auxv.hwcap, 20),
+            pci_mio: bit::test(auxv.hwcap, 21),
+            sie: bit::test(auxv.hwcap, 22),
+        }
+    }
+}
+
+impl AtHwcap {
+    /// Initializes the cache from the feature bits.
+    fn cache(self) -> cache::Initializer {
+        let mut value = cache::Initializer::default();
+        {
+            let mut enable_feature = |f, enable| {
+                if enable {
+                    value.set(f as u32);
+                }
+            };
+
+            // bit 129 of the extended facility list
+            enable_feature(Feature::vector, self.vxrs);
+        }
+        value
+    }
+}
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 881e12cf9c3..ab1b77bad5b 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -12,6 +12,7 @@
 //! * `powerpc`: [`is_powerpc_feature_detected`]
 //! * `powerpc64`: [`is_powerpc64_feature_detected`]
 //! * `loongarch`: [`is_loongarch_feature_detected`]
+//! * `s390x`: [`is_s390x_feature_detected`]
 
 #![unstable(feature = "stdarch_internal", issue = "none")]
 #![feature(staged_api, doc_cfg, allow_internal_unstable)]
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 6ed612a5960..69b7075bb10 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -4,6 +4,7 @@
 #![cfg_attr(target_arch = "aarch64", feature(stdarch_aarch64_feature_detection))]
 #![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))]
 #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))]
+#![cfg_attr(target_arch = "s390x", feature(stdarch_s390x_feature_detection))]
 #![cfg_attr(
     any(target_arch = "x86", target_arch = "x86_64"),
     feature(sha512_sm_x86, x86_amx_intrinsics, xop_target_feature)
@@ -18,7 +19,8 @@
         target_arch = "x86",
         target_arch = "x86_64",
         target_arch = "powerpc",
-        target_arch = "powerpc64"
+        target_arch = "powerpc64",
+        target_arch = "s390x",
     ),
     macro_use
 )]
@@ -240,6 +242,12 @@ fn powerpc64_linux_or_freebsd() {
     println!("power8: {}", is_powerpc64_feature_detected!("power8"));
 }
 
+#[test]
+#[cfg(all(target_arch = "s390x", target_os = "linux",))]
+fn s390x_linux() {
+    println!("vector: {}", is_s390x_feature_detected!("vector"));
+}
+
 #[test]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 fn x86_all() {
diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
index 9f6ef074d00..4769a5e3185 100644
--- a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
+++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
@@ -7,7 +7,8 @@
         target_arch = "x86",
         target_arch = "x86_64",
         target_arch = "powerpc",
-        target_arch = "powerpc64"
+        target_arch = "powerpc64",
+        target_arch = "s390x",
     ),
     feature(stdarch_internal)
 )]
@@ -15,6 +16,7 @@
 #![cfg_attr(target_arch = "aarch64", feature(stdarch_aarch64_feature_detection))]
 #![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))]
 #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))]
+#![cfg_attr(target_arch = "s390x", feature(stdarch_s390x_feature_detection))]
 #![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)]
 
 #[cfg(any(
@@ -24,7 +26,8 @@
     target_arch = "x86",
     target_arch = "x86_64",
     target_arch = "powerpc",
-    target_arch = "powerpc64"
+    target_arch = "powerpc64",
+    target_arch = "s390x",
 ))]
 #[macro_use]
 extern crate std_detect;
@@ -60,6 +63,13 @@ fn powerpc64_linux() {
     let _ = is_powerpc64_feature_detected!("altivec",);
 }
 
+#[test]
+#[cfg(all(target_arch = "s390x", target_os = "linux"))]
+fn s390x_linux() {
+    let _ = is_s390x_feature_detected!("vector");
+    let _ = is_s390x_feature_detected!("vector",);
+}
+
 #[test]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 fn x86_all() {
-- 
cgit 1.4.1-3-g733a5


From 64e9ca74a8ffaedcae2490fe5245ab8c941efe30 Mon Sep 17 00:00:00 2001
From: Eric Huss <eric@huss.org>
Date: Sun, 9 Feb 2025 09:12:30 -0800
Subject: Apply missing_unsafe_on_extern

---
 .../stdarch/crates/core_arch/src/aarch64/mte.rs    |    2 +-
 .../crates/core_arch/src/aarch64/neon/generated.rs |  696 +++++-----
 .../crates/core_arch/src/aarch64/neon/mod.rs       |   12 +-
 .../crates/core_arch/src/aarch64/prefetch.rs       |    2 +-
 .../stdarch/crates/core_arch/src/aarch64/tme.rs    |    2 +-
 library/stdarch/crates/core_arch/src/arm/dsp.rs    |    2 +-
 library/stdarch/crates/core_arch/src/arm/neon.rs   |    2 +-
 library/stdarch/crates/core_arch/src/arm/sat.rs    |    2 +-
 library/stdarch/crates/core_arch/src/arm/simd32.rs |    2 +-
 .../crates/core_arch/src/arm_shared/barrier/mod.rs |    2 +-
 .../stdarch/crates/core_arch/src/arm_shared/crc.rs |    2 +-
 .../crates/core_arch/src/arm_shared/crypto.rs      |    2 +-
 .../crates/core_arch/src/arm_shared/hints.rs       |    2 +-
 .../core_arch/src/arm_shared/neon/generated.rs     | 1336 ++++++++++----------
 .../crates/core_arch/src/arm_shared/neon/mod.rs    |    8 +-
 .../core_arch/src/loongarch64/lasx/generated.rs    |    2 +-
 .../core_arch/src/loongarch64/lsx/generated.rs     |    2 +-
 .../crates/core_arch/src/loongarch64/mod.rs        |    2 +-
 library/stdarch/crates/core_arch/src/mips/msa.rs   |    2 +-
 library/stdarch/crates/core_arch/src/nvptx/mod.rs  |    4 +-
 .../stdarch/crates/core_arch/src/nvptx/packed.rs   |    2 +-
 .../crates/core_arch/src/powerpc/altivec.rs        |    6 +-
 .../stdarch/crates/core_arch/src/powerpc/vsx.rs    |    2 +-
 .../stdarch/crates/core_arch/src/powerpc64/vsx.rs  |    2 +-
 library/stdarch/crates/core_arch/src/riscv32/zk.rs |    2 +-
 library/stdarch/crates/core_arch/src/riscv64/zk.rs |    2 +-
 .../crates/core_arch/src/riscv_shared/zb.rs        |    4 +-
 .../crates/core_arch/src/riscv_shared/zk.rs        |    6 +-
 .../stdarch/crates/core_arch/src/s390x/vector.rs   |    2 +-
 .../stdarch/crates/core_arch/src/wasm32/atomic.rs  |    2 +-
 .../stdarch/crates/core_arch/src/wasm32/memory.rs  |    2 +-
 library/stdarch/crates/core_arch/src/wasm32/mod.rs |    2 +-
 .../crates/core_arch/src/wasm32/relaxed_simd.rs    |    2 +-
 .../stdarch/crates/core_arch/src/wasm32/simd128.rs |    2 +-
 library/stdarch/crates/core_arch/src/x86/adx.rs    |    2 +-
 library/stdarch/crates/core_arch/src/x86/aes.rs    |    2 +-
 library/stdarch/crates/core_arch/src/x86/avx.rs    |    2 +-
 library/stdarch/crates/core_arch/src/x86/avx2.rs   |    2 +-
 .../stdarch/crates/core_arch/src/x86/avx512bf16.rs |    2 +-
 .../crates/core_arch/src/x86/avx512bitalg.rs       |    2 +-
 .../stdarch/crates/core_arch/src/x86/avx512bw.rs   |    2 +-
 .../stdarch/crates/core_arch/src/x86/avx512cd.rs   |    2 +-
 .../stdarch/crates/core_arch/src/x86/avx512dq.rs   |    2 +-
 .../stdarch/crates/core_arch/src/x86/avx512f.rs    |    2 +-
 .../stdarch/crates/core_arch/src/x86/avx512fp16.rs |    2 +-
 .../stdarch/crates/core_arch/src/x86/avx512ifma.rs |    2 +-
 .../stdarch/crates/core_arch/src/x86/avx512vbmi.rs |    2 +-
 .../crates/core_arch/src/x86/avx512vbmi2.rs        |    2 +-
 .../stdarch/crates/core_arch/src/x86/avx512vnni.rs |    2 +-
 .../crates/core_arch/src/x86/avxneconvert.rs       |    2 +-
 library/stdarch/crates/core_arch/src/x86/bmi1.rs   |    2 +-
 library/stdarch/crates/core_arch/src/x86/bmi2.rs   |    2 +-
 library/stdarch/crates/core_arch/src/x86/f16c.rs   |    2 +-
 library/stdarch/crates/core_arch/src/x86/fxsr.rs   |    2 +-
 library/stdarch/crates/core_arch/src/x86/gfni.rs   |    2 +-
 .../stdarch/crates/core_arch/src/x86/pclmulqdq.rs  |    2 +-
 library/stdarch/crates/core_arch/src/x86/rdrand.rs |    2 +-
 library/stdarch/crates/core_arch/src/x86/rdtsc.rs  |    2 +-
 library/stdarch/crates/core_arch/src/x86/rtm.rs    |    2 +-
 library/stdarch/crates/core_arch/src/x86/sha.rs    |    2 +-
 library/stdarch/crates/core_arch/src/x86/sse.rs    |    2 +-
 library/stdarch/crates/core_arch/src/x86/sse2.rs   |    2 +-
 library/stdarch/crates/core_arch/src/x86/sse3.rs   |    2 +-
 library/stdarch/crates/core_arch/src/x86/sse41.rs  |    2 +-
 library/stdarch/crates/core_arch/src/x86/sse42.rs  |    2 +-
 library/stdarch/crates/core_arch/src/x86/sse4a.rs  |    2 +-
 library/stdarch/crates/core_arch/src/x86/ssse3.rs  |    2 +-
 library/stdarch/crates/core_arch/src/x86/tbm.rs    |    2 +-
 library/stdarch/crates/core_arch/src/x86/vaes.rs   |    2 +-
 .../stdarch/crates/core_arch/src/x86/vpclmulqdq.rs |    2 +-
 library/stdarch/crates/core_arch/src/x86/xsave.rs  |    2 +-
 library/stdarch/crates/core_arch/src/x86_64/adx.rs |    2 +-
 library/stdarch/crates/core_arch/src/x86_64/amx.rs |    2 +-
 .../stdarch/crates/core_arch/src/x86_64/avx512f.rs |    2 +-
 .../crates/core_arch/src/x86_64/avx512fp16.rs      |    2 +-
 library/stdarch/crates/core_arch/src/x86_64/bmi.rs |    2 +-
 .../stdarch/crates/core_arch/src/x86_64/bmi2.rs    |    2 +-
 .../stdarch/crates/core_arch/src/x86_64/fxsr.rs    |    2 +-
 .../stdarch/crates/core_arch/src/x86_64/rdrand.rs  |    2 +-
 library/stdarch/crates/core_arch/src/x86_64/sse.rs |    2 +-
 .../stdarch/crates/core_arch/src/x86_64/sse2.rs    |    2 +-
 .../stdarch/crates/core_arch/src/x86_64/sse42.rs   |    2 +-
 library/stdarch/crates/core_arch/src/x86_64/tbm.rs |    2 +-
 .../stdarch/crates/core_arch/src/x86_64/xsave.rs   |    2 +-
 .../stdarch/crates/std_detect/src/detect/cache.rs  |    2 +-
 .../std_detect/src/detect/os/windows/aarch64.rs    |    2 +-
 .../crates/stdarch-gen-arm/src/intrinsic.rs        |    2 +-
 .../crates/stdarch-gen-loongarch/src/main.rs       |    2 +-
 88 files changed, 1116 insertions(+), 1116 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/aarch64/mte.rs b/library/stdarch/crates/core_arch/src/aarch64/mte.rs
index 014a9feafb8..de5a1b48800 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/mte.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/mte.rs
@@ -2,7 +2,7 @@
 //!
 //! [ACLE documentation](https://arm-software.github.io/acle/main/acle.html#markdown-toc-mte-intrinsics)
 
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[cfg_attr(
         any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.irg"
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 027dcfc8744..90a70ee4d74 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -108,7 +108,7 @@ pub unsafe fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uin
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fabd))]
 pub unsafe fn vabd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fabd.v1f64"
@@ -126,7 +126,7 @@ pub unsafe fn vabd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fabd))]
 pub unsafe fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fabd.v2f64"
@@ -291,7 +291,7 @@ pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(saddlv))]
 pub unsafe fn vaddlv_s16(a: int16x4_t) -> i32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.saddlv.i32.v4i16"
@@ -309,7 +309,7 @@ pub unsafe fn vaddlv_s16(a: int16x4_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(saddlv))]
 pub unsafe fn vaddlvq_s16(a: int16x8_t) -> i32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.saddlv.i32.v8i16"
@@ -327,7 +327,7 @@ pub unsafe fn vaddlvq_s16(a: int16x8_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(saddlv))]
 pub unsafe fn vaddlvq_s32(a: int32x4_t) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.saddlv.i64.v4i32"
@@ -345,7 +345,7 @@ pub unsafe fn vaddlvq_s32(a: int32x4_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(saddlp))]
 pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.saddlv.i64.v2i32"
@@ -363,7 +363,7 @@ pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(uaddlv))]
 pub unsafe fn vaddlv_u16(a: uint16x4_t) -> u32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uaddlv.i32.v4i16"
@@ -381,7 +381,7 @@ pub unsafe fn vaddlv_u16(a: uint16x4_t) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(uaddlv))]
 pub unsafe fn vaddlvq_u16(a: uint16x8_t) -> u32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uaddlv.i32.v8i16"
@@ -399,7 +399,7 @@ pub unsafe fn vaddlvq_u16(a: uint16x8_t) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(uaddlv))]
 pub unsafe fn vaddlvq_u32(a: uint32x4_t) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uaddlv.i64.v4i32"
@@ -417,7 +417,7 @@ pub unsafe fn vaddlvq_u32(a: uint32x4_t) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(uaddlp))]
 pub unsafe fn vaddlv_u32(a: uint32x2_t) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uaddlv.i64.v2i32"
@@ -435,7 +435,7 @@ pub unsafe fn vaddlv_u32(a: uint32x2_t) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(faddp))]
 pub unsafe fn vaddv_f32(a: float32x2_t) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.faddv.f32.v2f32"
@@ -453,7 +453,7 @@ pub unsafe fn vaddv_f32(a: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(faddp))]
 pub unsafe fn vaddvq_f32(a: float32x4_t) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.faddv.f32.v4f32"
@@ -471,7 +471,7 @@ pub unsafe fn vaddvq_f32(a: float32x4_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(faddp))]
 pub unsafe fn vaddvq_f64(a: float64x2_t) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.faddv.f64.v2f64"
@@ -489,7 +489,7 @@ pub unsafe fn vaddvq_f64(a: float64x2_t) -> f64 {
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.bcaxs.v16i8"
@@ -507,7 +507,7 @@ pub unsafe fn vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.bcaxs.v8i16"
@@ -525,7 +525,7 @@ pub unsafe fn vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.bcaxs.v4i32"
@@ -543,7 +543,7 @@ pub unsafe fn vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.bcaxs.v2i64"
@@ -561,7 +561,7 @@ pub unsafe fn vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.bcaxu.v16i8"
@@ -579,7 +579,7 @@ pub unsafe fn vbcaxq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.bcaxu.v8i16"
@@ -597,7 +597,7 @@ pub unsafe fn vbcaxq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.bcaxu.v4i32"
@@ -615,7 +615,7 @@ pub unsafe fn vbcaxq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(bcax))]
 pub unsafe fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.bcaxu.v2i64"
@@ -633,7 +633,7 @@ pub unsafe fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub unsafe fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcadd.rot270.v2f32"
@@ -651,7 +651,7 @@ pub unsafe fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub unsafe fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcadd.rot270.v4f32"
@@ -669,7 +669,7 @@ pub unsafe fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub unsafe fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcadd.rot270.v2f64"
@@ -687,7 +687,7 @@ pub unsafe fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub unsafe fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcadd.rot90.v2f32"
@@ -705,7 +705,7 @@ pub unsafe fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub unsafe fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcadd.rot90.v4f32"
@@ -723,7 +723,7 @@ pub unsafe fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub unsafe fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcadd.rot90.v2f64"
@@ -741,7 +741,7 @@ pub unsafe fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(facge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcage_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.facge.v1i64.v1f64"
@@ -759,7 +759,7 @@ pub unsafe fn vcage_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(facge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcageq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.facge.v2i64.v2f64"
@@ -777,7 +777,7 @@ pub unsafe fn vcageq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(facge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcaged_f64(a: f64, b: f64) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.facge.i64.f64"
@@ -795,7 +795,7 @@ pub unsafe fn vcaged_f64(a: f64, b: f64) -> u64 {
 #[cfg_attr(test, assert_instr(facge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcages_f32(a: f32, b: f32) -> u32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.facge.i32.f32"
@@ -813,7 +813,7 @@ pub unsafe fn vcages_f32(a: f32, b: f32) -> u32 {
 #[cfg_attr(test, assert_instr(facgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcagt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.facgt.v1i64.v1f64"
@@ -831,7 +831,7 @@ pub unsafe fn vcagt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(facgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.facgt.v2i64.v2f64"
@@ -849,7 +849,7 @@ pub unsafe fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(facgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcagtd_f64(a: f64, b: f64) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.facgt.i64.f64"
@@ -867,7 +867,7 @@ pub unsafe fn vcagtd_f64(a: f64, b: f64) -> u64 {
 #[cfg_attr(test, assert_instr(facgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcagts_f32(a: f32, b: f32) -> u32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.facgt.i32.f32"
@@ -2585,7 +2585,7 @@ pub unsafe fn vcltzd_s64(a: i64) -> u64 {
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub unsafe fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32"
@@ -2603,7 +2603,7 @@ pub unsafe fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub unsafe fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32"
@@ -2621,7 +2621,7 @@ pub unsafe fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> floa
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub unsafe fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcmla.rot0.v2f64"
@@ -2729,7 +2729,7 @@ pub unsafe fn vcmlaq_laneq_f32<const LANE: i32>(
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub unsafe fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcmla.rot180.v2f32"
@@ -2747,7 +2747,7 @@ pub unsafe fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub unsafe fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcmla.rot180.v4f32"
@@ -2765,7 +2765,7 @@ pub unsafe fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t)
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub unsafe fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcmla.rot180.v2f64"
@@ -2873,7 +2873,7 @@ pub unsafe fn vcmlaq_rot180_laneq_f32<const LANE: i32>(
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub unsafe fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcmla.rot270.v2f32"
@@ -2891,7 +2891,7 @@ pub unsafe fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub unsafe fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcmla.rot270.v4f32"
@@ -2909,7 +2909,7 @@ pub unsafe fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t)
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub unsafe fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcmla.rot270.v2f64"
@@ -3017,7 +3017,7 @@ pub unsafe fn vcmlaq_rot270_laneq_f32<const LANE: i32>(
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub unsafe fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcmla.rot90.v2f32"
@@ -3035,7 +3035,7 @@ pub unsafe fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) ->
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub unsafe fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcmla.rot90.v4f32"
@@ -3053,7 +3053,7 @@ pub unsafe fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub unsafe fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcmla.rot90.v2f64"
@@ -6405,7 +6405,7 @@ pub unsafe fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvt_n_f64_s64<const N: i32>(a: int64x1_t) -> float64x1_t {
     static_assert!(N >= 1 && N <= 64);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64"
@@ -6425,7 +6425,7 @@ pub unsafe fn vcvt_n_f64_s64<const N: i32>(a: int64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtq_n_f64_s64<const N: i32>(a: int64x2_t) -> float64x2_t {
     static_assert!(N >= 1 && N <= 64);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64"
@@ -6445,7 +6445,7 @@ pub unsafe fn vcvtq_n_f64_s64<const N: i32>(a: int64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvt_n_f64_u64<const N: i32>(a: uint64x1_t) -> float64x1_t {
     static_assert!(N >= 1 && N <= 64);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64"
@@ -6465,7 +6465,7 @@ pub unsafe fn vcvt_n_f64_u64<const N: i32>(a: uint64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtq_n_f64_u64<const N: i32>(a: uint64x2_t) -> float64x2_t {
     static_assert!(N >= 1 && N <= 64);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64"
@@ -6485,7 +6485,7 @@ pub unsafe fn vcvtq_n_f64_u64<const N: i32>(a: uint64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvt_n_s64_f64<const N: i32>(a: float64x1_t) -> int64x1_t {
     static_assert!(N >= 1 && N <= 64);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64"
@@ -6505,7 +6505,7 @@ pub unsafe fn vcvt_n_s64_f64<const N: i32>(a: float64x1_t) -> int64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtq_n_s64_f64<const N: i32>(a: float64x2_t) -> int64x2_t {
     static_assert!(N >= 1 && N <= 64);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64"
@@ -6525,7 +6525,7 @@ pub unsafe fn vcvtq_n_s64_f64<const N: i32>(a: float64x2_t) -> int64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvt_n_u64_f64<const N: i32>(a: float64x1_t) -> uint64x1_t {
     static_assert!(N >= 1 && N <= 64);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64"
@@ -6545,7 +6545,7 @@ pub unsafe fn vcvt_n_u64_f64<const N: i32>(a: float64x1_t) -> uint64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtq_n_u64_f64<const N: i32>(a: float64x2_t) -> uint64x2_t {
     static_assert!(N >= 1 && N <= 64);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64"
@@ -6563,7 +6563,7 @@ pub unsafe fn vcvtq_n_u64_f64<const N: i32>(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtzs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.fptosi.sat.v1i64.v1f64"
@@ -6581,7 +6581,7 @@ pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
 #[cfg_attr(test, assert_instr(fcvtzs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.fptosi.sat.v2i64.v2f64"
@@ -6599,7 +6599,7 @@ pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtzu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.fptoui.sat.v1i64.v1f64"
@@ -6617,7 +6617,7 @@ pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(fcvtzu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.fptoui.sat.v2i64.v2f64"
@@ -6635,7 +6635,7 @@ pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvta_s32_f32(a: float32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtas.v2i32.v2f32"
@@ -6653,7 +6653,7 @@ pub unsafe fn vcvta_s32_f32(a: float32x2_t) -> int32x2_t {
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtas.v4i32.v4f32"
@@ -6671,7 +6671,7 @@ pub unsafe fn vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t {
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvta_s64_f64(a: float64x1_t) -> int64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtas.v1i64.v1f64"
@@ -6689,7 +6689,7 @@ pub unsafe fn vcvta_s64_f64(a: float64x1_t) -> int64x1_t {
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtas.v2i64.v2f64"
@@ -6707,7 +6707,7 @@ pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtau.v2i32.v2f32"
@@ -6725,7 +6725,7 @@ pub unsafe fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t {
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtau.v4i32.v4f32"
@@ -6743,7 +6743,7 @@ pub unsafe fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t {
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtau.v1i64.v1f64"
@@ -6761,7 +6761,7 @@ pub unsafe fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtau.v2i64.v2f64"
@@ -6779,7 +6779,7 @@ pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtas_s32_f32(a: f32) -> i32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtas.i32.f32"
@@ -6797,7 +6797,7 @@ pub unsafe fn vcvtas_s32_f32(a: f32) -> i32 {
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtad_s64_f64(a: f64) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtas.i64.f64"
@@ -6815,7 +6815,7 @@ pub unsafe fn vcvtad_s64_f64(a: f64) -> i64 {
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtas_u32_f32(a: f32) -> u32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtau.i32.f32"
@@ -6833,7 +6833,7 @@ pub unsafe fn vcvtas_u32_f32(a: f32) -> u32 {
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtad_u64_f64(a: f64) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtau.i64.f64"
@@ -6873,7 +6873,7 @@ pub unsafe fn vcvts_f32_s32(a: i32) -> f32 {
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtms.v2i32.v2f32"
@@ -6891,7 +6891,7 @@ pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t {
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtms.v4i32.v4f32"
@@ -6909,7 +6909,7 @@ pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t {
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtms.v1i64.v1f64"
@@ -6927,7 +6927,7 @@ pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t {
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtms.v2i64.v2f64"
@@ -6945,7 +6945,7 @@ pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtmu.v2i32.v2f32"
@@ -6963,7 +6963,7 @@ pub unsafe fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtmu.v4i32.v4f32"
@@ -6981,7 +6981,7 @@ pub unsafe fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtmu.v1i64.v1f64"
@@ -6999,7 +6999,7 @@ pub unsafe fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtmu.v2i64.v2f64"
@@ -7017,7 +7017,7 @@ pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtms_s32_f32(a: f32) -> i32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtms.i32.f32"
@@ -7035,7 +7035,7 @@ pub unsafe fn vcvtms_s32_f32(a: f32) -> i32 {
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtmd_s64_f64(a: f64) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtms.i64.f64"
@@ -7053,7 +7053,7 @@ pub unsafe fn vcvtmd_s64_f64(a: f64) -> i64 {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtms_u32_f32(a: f32) -> u32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtmu.i32.f32"
@@ -7071,7 +7071,7 @@ pub unsafe fn vcvtms_u32_f32(a: f32) -> u32 {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtmd_u64_f64(a: f64) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtmu.i64.f64"
@@ -7089,7 +7089,7 @@ pub unsafe fn vcvtmd_u64_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtns.v2i32.v2f32"
@@ -7107,7 +7107,7 @@ pub unsafe fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t {
 #[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtns.v4i32.v4f32"
@@ -7125,7 +7125,7 @@ pub unsafe fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t {
 #[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtns.v1i64.v1f64"
@@ -7143,7 +7143,7 @@ pub unsafe fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t {
 #[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtns.v2i64.v2f64"
@@ -7161,7 +7161,7 @@ pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtn_u32_f32(a: float32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtnu.v2i32.v2f32"
@@ -7179,7 +7179,7 @@ pub unsafe fn vcvtn_u32_f32(a: float32x2_t) -> uint32x2_t {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtnq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtnu.v4i32.v4f32"
@@ -7197,7 +7197,7 @@ pub unsafe fn vcvtnq_u32_f32(a: float32x4_t) -> uint32x4_t {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtn_u64_f64(a: float64x1_t) -> uint64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtnu.v1i64.v1f64"
@@ -7215,7 +7215,7 @@ pub unsafe fn vcvtn_u64_f64(a: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtnu.v2i64.v2f64"
@@ -7233,7 +7233,7 @@ pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtns_s32_f32(a: f32) -> i32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtns.i32.f32"
@@ -7251,7 +7251,7 @@ pub unsafe fn vcvtns_s32_f32(a: f32) -> i32 {
 #[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtnd_s64_f64(a: f64) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtns.i64.f64"
@@ -7269,7 +7269,7 @@ pub unsafe fn vcvtnd_s64_f64(a: f64) -> i64 {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtns_u32_f32(a: f32) -> u32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtnu.i32.f32"
@@ -7287,7 +7287,7 @@ pub unsafe fn vcvtns_u32_f32(a: f32) -> u32 {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtnd_u64_f64(a: f64) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtnu.i64.f64"
@@ -7305,7 +7305,7 @@ pub unsafe fn vcvtnd_u64_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtps.v2i32.v2f32"
@@ -7323,7 +7323,7 @@ pub unsafe fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t {
 #[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtps.v4i32.v4f32"
@@ -7341,7 +7341,7 @@ pub unsafe fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t {
 #[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtps.v1i64.v1f64"
@@ -7359,7 +7359,7 @@ pub unsafe fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t {
 #[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtps.v2i64.v2f64"
@@ -7377,7 +7377,7 @@ pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtp_u32_f32(a: float32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtpu.v2i32.v2f32"
@@ -7395,7 +7395,7 @@ pub unsafe fn vcvtp_u32_f32(a: float32x2_t) -> uint32x2_t {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtpq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtpu.v4i32.v4f32"
@@ -7413,7 +7413,7 @@ pub unsafe fn vcvtpq_u32_f32(a: float32x4_t) -> uint32x4_t {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtp_u64_f64(a: float64x1_t) -> uint64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtpu.v1i64.v1f64"
@@ -7431,7 +7431,7 @@ pub unsafe fn vcvtp_u64_f64(a: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtpu.v2i64.v2f64"
@@ -7449,7 +7449,7 @@ pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtps_s32_f32(a: f32) -> i32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtps.i32.f32"
@@ -7467,7 +7467,7 @@ pub unsafe fn vcvtps_s32_f32(a: f32) -> i32 {
 #[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtpd_s64_f64(a: f64) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtps.i64.f64"
@@ -7485,7 +7485,7 @@ pub unsafe fn vcvtpd_s64_f64(a: f64) -> i64 {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtps_u32_f32(a: f32) -> u32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtpu.i32.f32"
@@ -7503,7 +7503,7 @@ pub unsafe fn vcvtps_u32_f32(a: f32) -> u32 {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtpd_u64_f64(a: f64) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtpu.i64.f64"
@@ -7545,7 +7545,7 @@ pub unsafe fn vcvtd_f64_u64(a: u64) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvts_n_f32_s32<const N: i32>(a: i32) -> f32 {
     static_assert!(N >= 1 && N <= 64);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfxs2fp.f32.i32"
@@ -7565,7 +7565,7 @@ pub unsafe fn vcvts_n_f32_s32<const N: i32>(a: i32) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtd_n_f64_s64<const N: i32>(a: i64) -> f64 {
     static_assert!(N >= 1 && N <= 64);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfxs2fp.f64.i64"
@@ -7585,7 +7585,7 @@ pub unsafe fn vcvtd_n_f64_s64<const N: i32>(a: i64) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvts_n_f32_u32<const N: i32>(a: u32) -> f32 {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfxu2fp.f32.i32"
@@ -7605,7 +7605,7 @@ pub unsafe fn vcvts_n_f32_u32<const N: i32>(a: u32) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtd_n_f64_u64<const N: i32>(a: u64) -> f64 {
     static_assert!(N >= 1 && N <= 64);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfxu2fp.f64.i64"
@@ -7625,7 +7625,7 @@ pub unsafe fn vcvtd_n_f64_u64<const N: i32>(a: u64) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvts_n_s32_f32<const N: i32>(a: f32) -> i32 {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfp2fxs.i32.f32"
@@ -7645,7 +7645,7 @@ pub unsafe fn vcvts_n_s32_f32<const N: i32>(a: f32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtd_n_s64_f64<const N: i32>(a: f64) -> i64 {
     static_assert!(N >= 1 && N <= 64);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfp2fxs.i64.f64"
@@ -7665,7 +7665,7 @@ pub unsafe fn vcvtd_n_s64_f64<const N: i32>(a: f64) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvts_n_u32_f32<const N: i32>(a: f32) -> u32 {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfp2fxu.i32.f32"
@@ -7685,7 +7685,7 @@ pub unsafe fn vcvts_n_u32_f32<const N: i32>(a: f32) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtd_n_u64_f64<const N: i32>(a: f64) -> u64 {
     static_assert!(N >= 1 && N <= 64);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfp2fxu.i64.f64"
@@ -7747,7 +7747,7 @@ pub unsafe fn vcvtd_u64_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcvtxn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fcvtxn.v2f32.v2f64"
@@ -8323,7 +8323,7 @@ pub unsafe fn vduph_lane_p16<const N: i32>(a: poly16x4_t) -> p16 {
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.eor3s.v16i8"
@@ -8341,7 +8341,7 @@ pub unsafe fn veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.eor3s.v8i16"
@@ -8359,7 +8359,7 @@ pub unsafe fn veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.eor3s.v4i32"
@@ -8377,7 +8377,7 @@ pub unsafe fn veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.eor3s.v2i64"
@@ -8395,7 +8395,7 @@ pub unsafe fn veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.eor3u.v16i8"
@@ -8413,7 +8413,7 @@ pub unsafe fn veor3q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.eor3u.v8i16"
@@ -8431,7 +8431,7 @@ pub unsafe fn veor3q_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.eor3u.v4i32"
@@ -8449,7 +8449,7 @@ pub unsafe fn veor3q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 #[cfg_attr(test, assert_instr(eor3))]
 pub unsafe fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.eor3u.v2i64"
@@ -8501,7 +8501,7 @@ pub unsafe fn vextq_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmadd))]
 pub unsafe fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.fma.v1f64"
@@ -8650,7 +8650,7 @@ pub unsafe fn vfma_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.fma.f64"
@@ -8670,7 +8670,7 @@ pub unsafe fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) ->
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmla))]
 pub unsafe fn vfmaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.fma.v2f64"
@@ -8717,7 +8717,7 @@ pub unsafe fn vfmaq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.fma.f32"
@@ -8738,7 +8738,7 @@ pub unsafe fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) ->
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.fma.f32"
@@ -8759,7 +8759,7 @@ pub unsafe fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vfmad_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.fma.f64"
@@ -9009,7 +9009,7 @@ pub unsafe fn vfmsd_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld1))]
 pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x2.v1f64.p0f64"
@@ -9027,7 +9027,7 @@ pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld1))]
 pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x3.v1f64.p0f64"
@@ -9045,7 +9045,7 @@ pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld1))]
 pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x4.v1f64.p0f64"
@@ -9063,7 +9063,7 @@ pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld1))]
 pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x2.v2f64.p0f64"
@@ -9081,7 +9081,7 @@ pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld1))]
 pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x3.v2f64.p0f64"
@@ -9099,7 +9099,7 @@ pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld1))]
 pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x4.v2f64.p0f64"
@@ -9117,7 +9117,7 @@ pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2_dup_f64(a: *const f64) -> float64x1x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2r.v1f64.p0f64"
@@ -9135,7 +9135,7 @@ pub unsafe fn vld2_dup_f64(a: *const f64) -> float64x1x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2q_dup_f64(a: *const f64) -> float64x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2r.v2f64.p0f64"
@@ -9153,7 +9153,7 @@ pub unsafe fn vld2q_dup_f64(a: *const f64) -> float64x2x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2q_dup_s64(a: *const i64) -> int64x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2r.v2i64.p0i64"
@@ -9171,7 +9171,7 @@ pub unsafe fn vld2q_dup_s64(a: *const i64) -> int64x2x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld2_f64(a: *const f64) -> float64x1x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2.v1f64.p0v1f64"
@@ -9191,7 +9191,7 @@ pub unsafe fn vld2_f64(a: *const f64) -> float64x1x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld2_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x2_t) -> float64x1x2_t {
     static_assert!(LANE == 0);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2lane.v1f64.p0i8"
@@ -9211,7 +9211,7 @@ pub unsafe fn vld2_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x2_t) ->
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld2_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x2_t) -> int64x1x2_t {
     static_assert!(LANE == 0);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2lane.v1i64.p0i8"
@@ -9277,7 +9277,7 @@ pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2))]
 pub unsafe fn vld2q_f64(a: *const f64) -> float64x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2.v2f64.p0v2f64"
@@ -9295,7 +9295,7 @@ pub unsafe fn vld2q_f64(a: *const f64) -> float64x2x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2))]
 pub unsafe fn vld2q_s64(a: *const i64) -> int64x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2.v2i64.p0v2i64"
@@ -9315,7 +9315,7 @@ pub unsafe fn vld2q_s64(a: *const i64) -> int64x2x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld2q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x2_t) -> float64x2x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2lane.v2f64.p0i8"
@@ -9336,7 +9336,7 @@ pub unsafe fn vld2q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x2_t) -
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld2q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x2_t) -> int8x16x2_t {
     static_assert_uimm_bits!(LANE, 4);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2lane.v16i8.p0i8"
@@ -9356,7 +9356,7 @@ pub unsafe fn vld2q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x2_t) -> in
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld2q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x2_t) -> int64x2x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2lane.v2i64.p0i8"
@@ -9448,7 +9448,7 @@ pub unsafe fn vld2q_u64(a: *const u64) -> uint64x2x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3_dup_f64(a: *const f64) -> float64x1x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3r.v1f64.p0f64"
@@ -9466,7 +9466,7 @@ pub unsafe fn vld3_dup_f64(a: *const f64) -> float64x1x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3q_dup_f64(a: *const f64) -> float64x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3r.v2f64.p0f64"
@@ -9484,7 +9484,7 @@ pub unsafe fn vld3q_dup_f64(a: *const f64) -> float64x2x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3q_dup_s64(a: *const i64) -> int64x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3r.v2i64.p0i64"
@@ -9502,7 +9502,7 @@ pub unsafe fn vld3q_dup_s64(a: *const i64) -> int64x2x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld3_f64(a: *const f64) -> float64x1x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3.v1f64.p0v1f64"
@@ -9522,7 +9522,7 @@ pub unsafe fn vld3_f64(a: *const f64) -> float64x1x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld3_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x3_t) -> float64x1x3_t {
     static_assert!(LANE == 0);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3lane.v1f64.p0i8"
@@ -9561,7 +9561,7 @@ pub unsafe fn vld3_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x3_t) ->
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld3_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x3_t) -> int64x1x3_t {
     static_assert!(LANE == 0);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3lane.v1i64.p0i8"
@@ -9620,7 +9620,7 @@ pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3))]
 pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3.v2f64.p0v2f64"
@@ -9638,7 +9638,7 @@ pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3))]
 pub unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3.v2i64.p0v2i64"
@@ -9658,7 +9658,7 @@ pub unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld3q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x3_t) -> float64x2x3_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3lane.v2f64.p0i8"
@@ -9697,7 +9697,7 @@ pub unsafe fn vld3q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x3_t) ->
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld3q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x3_t) -> int8x16x3_t {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3lane.v16i8.p0i8"
@@ -9723,7 +9723,7 @@ pub unsafe fn vld3q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x3_t) -> in
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld3q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x3_t) -> int64x2x3_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3lane.v2i64.p0i8"
@@ -9808,7 +9808,7 @@ pub unsafe fn vld3q_u64(a: *const u64) -> uint64x2x3_t {
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4_dup_f64(a: *const f64) -> float64x1x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4r.v1f64.p0f64"
@@ -9826,7 +9826,7 @@ pub unsafe fn vld4_dup_f64(a: *const f64) -> float64x1x4_t {
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_dup_f64(a: *const f64) -> float64x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4r.v2f64.p0f64"
@@ -9844,7 +9844,7 @@ pub unsafe fn vld4q_dup_f64(a: *const f64) -> float64x2x4_t {
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_dup_s64(a: *const i64) -> int64x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4r.v2i64.p0i64"
@@ -9862,7 +9862,7 @@ pub unsafe fn vld4q_dup_s64(a: *const i64) -> int64x2x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld4_f64(a: *const f64) -> float64x1x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4.v1f64.p0v1f64"
@@ -9882,7 +9882,7 @@ pub unsafe fn vld4_f64(a: *const f64) -> float64x1x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x4_t) -> float64x1x4_t {
     static_assert!(LANE == 0);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4lane.v1f64.p0i8"
@@ -9909,7 +9909,7 @@ pub unsafe fn vld4_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x4_t) ->
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x4_t) -> int64x1x4_t {
     static_assert!(LANE == 0);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4lane.v1i64.p0i8"
@@ -9982,7 +9982,7 @@ pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld4))]
 pub unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4.v2f64.p0v2f64"
@@ -10000,7 +10000,7 @@ pub unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld4))]
 pub unsafe fn vld4q_s64(a: *const i64) -> int64x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4.v2i64.p0v2i64"
@@ -10020,7 +10020,7 @@ pub unsafe fn vld4q_s64(a: *const i64) -> int64x2x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x4_t) -> float64x2x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4lane.v2f64.p0i8"
@@ -10047,7 +10047,7 @@ pub unsafe fn vld4q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x4_t) -
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x4_t) -> int8x16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4lane.v16i8.p0i8"
@@ -10074,7 +10074,7 @@ pub unsafe fn vld4q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x4_t) -> in
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x4_t) -> int64x2x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4lane.v2i64.p0i8"
@@ -10173,7 +10173,7 @@ pub unsafe fn vld4q_u64(a: *const u64) -> uint64x2x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmax))]
 pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmax.v1f64"
@@ -10191,7 +10191,7 @@ pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmax))]
 pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmax.v2f64"
@@ -10209,7 +10209,7 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnm))]
 pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmaxnm.v1f64"
@@ -10227,7 +10227,7 @@ pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnm))]
 pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmaxnm.v2f64"
@@ -10245,7 +10245,7 @@ pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 pub unsafe fn vmaxnmv_f32(a: float32x2_t) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32"
@@ -10263,7 +10263,7 @@ pub unsafe fn vmaxnmv_f32(a: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 pub unsafe fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64"
@@ -10281,7 +10281,7 @@ pub unsafe fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnmv))]
 pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmaxnmv.f32.v4f32"
@@ -10299,7 +10299,7 @@ pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmin))]
 pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmin.v1f64"
@@ -10317,7 +10317,7 @@ pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmin))]
 pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmin.v2f64"
@@ -10335,7 +10335,7 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fminnm))]
 pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fminnm.v1f64"
@@ -10353,7 +10353,7 @@ pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fminnm))]
 pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fminnm.v2f64"
@@ -10371,7 +10371,7 @@ pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vminnmv_f32(a: float32x2_t) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32"
@@ -10389,7 +10389,7 @@ pub unsafe fn vminnmv_f32(a: float32x2_t) -> f32 {
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vminnmvq_f64(a: float64x2_t) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64"
@@ -10407,7 +10407,7 @@ pub unsafe fn vminnmvq_f64(a: float64x2_t) -> f64 {
 #[cfg_attr(test, assert_instr(fminnmv))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vminnmvq_f32(a: float32x4_t) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fminnmv.f32.v4f32"
@@ -11707,7 +11707,7 @@ pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(pmull))]
 pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.pmull64"
@@ -11793,7 +11793,7 @@ pub unsafe fn vmuld_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmulx))]
 pub unsafe fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmulx.v2f32"
@@ -11811,7 +11811,7 @@ pub unsafe fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmulx))]
 pub unsafe fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmulx.v4f32"
@@ -11829,7 +11829,7 @@ pub unsafe fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmulx))]
 pub unsafe fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmulx.v1f64"
@@ -11847,7 +11847,7 @@ pub unsafe fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmulx))]
 pub unsafe fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmulx.v2f64"
@@ -11962,7 +11962,7 @@ pub unsafe fn vmulx_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmulx))]
 pub unsafe fn vmulxd_f64(a: f64, b: f64) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmulx.f64"
@@ -11980,7 +11980,7 @@ pub unsafe fn vmulxd_f64(a: f64, b: f64) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmulx))]
 pub unsafe fn vmulxs_f32(a: f32, b: f32) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmulx.f32"
@@ -12144,7 +12144,7 @@ pub unsafe fn vpadds_f32(a: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(faddp))]
 pub unsafe fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.faddp.v4f32"
@@ -12162,7 +12162,7 @@ pub unsafe fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(faddp))]
 pub unsafe fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.faddp.v2f64"
@@ -12180,7 +12180,7 @@ pub unsafe fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmaxnmp.v2f32"
@@ -12198,7 +12198,7 @@ pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmaxnmp.v4f32"
@@ -12216,7 +12216,7 @@ pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmaxnmp.v2f64"
@@ -12234,7 +12234,7 @@ pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vpmaxnmqd_f64(a: float64x2_t) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64"
@@ -12252,7 +12252,7 @@ pub unsafe fn vpmaxnmqd_f64(a: float64x2_t) -> f64 {
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vpmaxnms_f32(a: float32x2_t) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32"
@@ -12270,7 +12270,7 @@ pub unsafe fn vpmaxnms_f32(a: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxp))]
 pub unsafe fn vpmaxqd_f64(a: float64x2_t) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64"
@@ -12288,7 +12288,7 @@ pub unsafe fn vpmaxqd_f64(a: float64x2_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxp))]
 pub unsafe fn vpmaxs_f32(a: float32x2_t) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32"
@@ -12306,7 +12306,7 @@ pub unsafe fn vpmaxs_f32(a: float32x2_t) -> f32 {
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fminnmp.v2f32"
@@ -12324,7 +12324,7 @@ pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fminnmp.v4f32"
@@ -12342,7 +12342,7 @@ pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fminnmp.v2f64"
@@ -12360,7 +12360,7 @@ pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vpminnmqd_f64(a: float64x2_t) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64"
@@ -12378,7 +12378,7 @@ pub unsafe fn vpminnmqd_f64(a: float64x2_t) -> f64 {
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vpminnms_f32(a: float32x2_t) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32"
@@ -12396,7 +12396,7 @@ pub unsafe fn vpminnms_f32(a: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fminp))]
 pub unsafe fn vpminqd_f64(a: float64x2_t) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fminv.f64.v2f64"
@@ -12414,7 +12414,7 @@ pub unsafe fn vpminqd_f64(a: float64x2_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fminp))]
 pub unsafe fn vpmins_f32(a: float32x2_t) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.fminv.f32.v2f32"
@@ -12432,7 +12432,7 @@ pub unsafe fn vpmins_f32(a: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqabs.v1i64"
@@ -12450,7 +12450,7 @@ pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 pub unsafe fn vqabsq_s64(a: int64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqabs.v2i64"
@@ -12490,7 +12490,7 @@ pub unsafe fn vqabsh_s16(a: i16) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 pub unsafe fn vqabss_s32(a: i32) -> i32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqabs.i32"
@@ -12508,7 +12508,7 @@ pub unsafe fn vqabss_s32(a: i32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 pub unsafe fn vqabsd_s64(a: i64) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqabs.i64"
@@ -12578,7 +12578,7 @@ pub unsafe fn vqaddh_u16(a: u16, b: u16) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqadd))]
 pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqadd.i32"
@@ -12596,7 +12596,7 @@ pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqadd))]
 pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqadd.i64"
@@ -12614,7 +12614,7 @@ pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(uqadd))]
 pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqadd.i32"
@@ -12632,7 +12632,7 @@ pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(uqadd))]
 pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqadd.i64"
@@ -13437,7 +13437,7 @@ pub unsafe fn vqdmulls_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i64 {
 #[cfg_attr(test, assert_instr(sqdmull))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqdmulls_s32(a: i32, b: i32) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqdmulls.scalar"
@@ -13529,7 +13529,7 @@ pub unsafe fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
 #[cfg_attr(test, assert_instr(sqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64"
@@ -13547,7 +13547,7 @@ pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
 #[cfg_attr(test, assert_instr(uqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqmovnd_u64(a: u64) -> u32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64"
@@ -13679,7 +13679,7 @@ pub unsafe fn vqmovund_s64(a: i64) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqneg))]
 pub unsafe fn vqneg_s64(a: int64x1_t) -> int64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqneg.v1i64"
@@ -13697,7 +13697,7 @@ pub unsafe fn vqneg_s64(a: int64x1_t) -> int64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqneg))]
 pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqneg.v2i64"
@@ -13929,7 +13929,7 @@ pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(
 #[cfg_attr(test, assert_instr(sqrdmlah))]
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrdmlah.v4i16"
@@ -13947,7 +13947,7 @@ pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_
 #[cfg_attr(test, assert_instr(sqrdmlah))]
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrdmlah.v8i16"
@@ -13965,7 +13965,7 @@ pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8
 #[cfg_attr(test, assert_instr(sqrdmlah))]
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrdmlah.v2i32"
@@ -13983,7 +13983,7 @@ pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_
 #[cfg_attr(test, assert_instr(sqrdmlah))]
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrdmlah.v4i32"
@@ -14251,7 +14251,7 @@ pub unsafe fn vqrdmlshq_laneq_s32<const LANE: i32>(
 #[cfg_attr(test, assert_instr(sqrdmlsh))]
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub unsafe fn vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrdmlsh.v4i16"
@@ -14269,7 +14269,7 @@ pub unsafe fn vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_
 #[cfg_attr(test, assert_instr(sqrdmlsh))]
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub unsafe fn vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrdmlsh.v8i16"
@@ -14287,7 +14287,7 @@ pub unsafe fn vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8
 #[cfg_attr(test, assert_instr(sqrdmlsh))]
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub unsafe fn vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrdmlsh.v2i32"
@@ -14305,7 +14305,7 @@ pub unsafe fn vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_
 #[cfg_attr(test, assert_instr(sqrdmlsh))]
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub unsafe fn vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrdmlsh.v4i32"
@@ -14529,7 +14529,7 @@ pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 {
 #[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrshl.i64"
@@ -14547,7 +14547,7 @@ pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
 #[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrshl.i32"
@@ -14565,7 +14565,7 @@ pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
 #[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqrshl.i32"
@@ -14583,7 +14583,7 @@ pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
 #[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqrshl.i64"
@@ -15032,7 +15032,7 @@ pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 {
 #[cfg_attr(test, assert_instr(sqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshl.i64"
@@ -15050,7 +15050,7 @@ pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
 #[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqshl.i64"
@@ -15208,7 +15208,7 @@ pub unsafe fn vqshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> u
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshrn.i32"
@@ -15228,7 +15228,7 @@ pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqshrn.i32"
@@ -15432,7 +15432,7 @@ pub unsafe fn vqsubh_u16(a: u16, b: u16) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqsub))]
 pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqsub.i32"
@@ -15450,7 +15450,7 @@ pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqsub))]
 pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqsub.i64"
@@ -15468,7 +15468,7 @@ pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(uqsub))]
 pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqsub.i32"
@@ -15486,7 +15486,7 @@ pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(uqsub))]
 pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqsub.i64"
@@ -15504,7 +15504,7 @@ pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 {
 #[cfg_attr(test, assert_instr(rax1))]
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 pub unsafe fn vrax1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.rax1"
@@ -15522,7 +15522,7 @@ pub unsafe fn vrax1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(rbit))]
 pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.rbit.v8i8"
@@ -15540,7 +15540,7 @@ pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(rbit))]
 pub unsafe fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.rbit.v16i8"
@@ -15602,7 +15602,7 @@ pub unsafe fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
 #[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frecpe.v1f64"
@@ -15620,7 +15620,7 @@ pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frecpe.v2f64"
@@ -15638,7 +15638,7 @@ pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrecped_f64(a: f64) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frecpe.f64"
@@ -15656,7 +15656,7 @@ pub unsafe fn vrecped_f64(a: f64) -> f64 {
 #[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrecpes_f32(a: f32) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frecpe.f32"
@@ -15674,7 +15674,7 @@ pub unsafe fn vrecpes_f32(a: f32) -> f32 {
 #[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frecps.v1f64"
@@ -15692,7 +15692,7 @@ pub unsafe fn vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frecps.v2f64"
@@ -15710,7 +15710,7 @@ pub unsafe fn vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrecpsd_f64(a: f64, b: f64) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frecps.f64"
@@ -15728,7 +15728,7 @@ pub unsafe fn vrecpsd_f64(a: f64, b: f64) -> f64 {
 #[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrecpss_f32(a: f32, b: f32) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frecps.f32"
@@ -15746,7 +15746,7 @@ pub unsafe fn vrecpss_f32(a: f32, b: f32) -> f32 {
 #[cfg_attr(test, assert_instr(frecpx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrecpxd_f64(a: f64) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frecpx.f64"
@@ -15764,7 +15764,7 @@ pub unsafe fn vrecpxd_f64(a: f64) -> f64 {
 #[cfg_attr(test, assert_instr(frecpx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrecpxs_f32(a: f32) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frecpx.f32"
@@ -16464,7 +16464,7 @@ pub unsafe fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
 pub unsafe fn vrnd32x_f32(a: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frint32x.v2f32"
@@ -16482,7 +16482,7 @@ pub unsafe fn vrnd32x_f32(a: float32x2_t) -> float32x2_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
 pub unsafe fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frint32x.v4f32"
@@ -16500,7 +16500,7 @@ pub unsafe fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
 pub unsafe fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frint32x.v2f64"
@@ -16518,7 +16518,7 @@ pub unsafe fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
 pub unsafe fn vrnd32x_f64(a: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.frint32x.f64"
@@ -16536,7 +16536,7 @@ pub unsafe fn vrnd32x_f64(a: float64x1_t) -> float64x1_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
 pub unsafe fn vrnd32z_f32(a: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frint32z.v2f32"
@@ -16554,7 +16554,7 @@ pub unsafe fn vrnd32z_f32(a: float32x2_t) -> float32x2_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
 pub unsafe fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frint32z.v4f32"
@@ -16572,7 +16572,7 @@ pub unsafe fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
 pub unsafe fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frint32z.v2f64"
@@ -16590,7 +16590,7 @@ pub unsafe fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
 pub unsafe fn vrnd32z_f64(a: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.frint32z.f64"
@@ -16608,7 +16608,7 @@ pub unsafe fn vrnd32z_f64(a: float64x1_t) -> float64x1_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
 pub unsafe fn vrnd64x_f32(a: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frint64x.v2f32"
@@ -16626,7 +16626,7 @@ pub unsafe fn vrnd64x_f32(a: float32x2_t) -> float32x2_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
 pub unsafe fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frint64x.v4f32"
@@ -16644,7 +16644,7 @@ pub unsafe fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
 pub unsafe fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frint64x.v2f64"
@@ -16662,7 +16662,7 @@ pub unsafe fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
 pub unsafe fn vrnd64x_f64(a: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.frint64x.f64"
@@ -16680,7 +16680,7 @@ pub unsafe fn vrnd64x_f64(a: float64x1_t) -> float64x1_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
 pub unsafe fn vrnd64z_f32(a: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frint64z.v2f32"
@@ -16698,7 +16698,7 @@ pub unsafe fn vrnd64z_f32(a: float32x2_t) -> float32x2_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
 pub unsafe fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frint64z.v4f32"
@@ -16716,7 +16716,7 @@ pub unsafe fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
 pub unsafe fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frint64z.v2f64"
@@ -16734,7 +16734,7 @@ pub unsafe fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
 #[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
 pub unsafe fn vrnd64z_f64(a: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.frint64z.f64"
@@ -16752,7 +16752,7 @@ pub unsafe fn vrnd64z_f64(a: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintz))]
 pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.trunc.v2f32"
@@ -16770,7 +16770,7 @@ pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintz))]
 pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.trunc.v4f32"
@@ -16788,7 +16788,7 @@ pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintz))]
 pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.trunc.v1f64"
@@ -16806,7 +16806,7 @@ pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintz))]
 pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.trunc.v2f64"
@@ -16824,7 +16824,7 @@ pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frinta))]
 pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.round.v2f32"
@@ -16842,7 +16842,7 @@ pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frinta))]
 pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.round.v4f32"
@@ -16860,7 +16860,7 @@ pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frinta))]
 pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.round.v1f64"
@@ -16878,7 +16878,7 @@ pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frinta))]
 pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.round.v2f64"
@@ -16896,7 +16896,7 @@ pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frinti))]
 pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.nearbyint.v2f32"
@@ -16914,7 +16914,7 @@ pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frinti))]
 pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.nearbyint.v4f32"
@@ -16932,7 +16932,7 @@ pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frinti))]
 pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.nearbyint.v1f64"
@@ -16950,7 +16950,7 @@ pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frinti))]
 pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.nearbyint.v2f64"
@@ -16968,7 +16968,7 @@ pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintm))]
 pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.floor.v2f32"
@@ -16986,7 +16986,7 @@ pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintm))]
 pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.floor.v4f32"
@@ -17004,7 +17004,7 @@ pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintm))]
 pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.floor.v1f64"
@@ -17022,7 +17022,7 @@ pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintm))]
 pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.floor.v2f64"
@@ -17040,7 +17040,7 @@ pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintn))]
 pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frintn.v1f64"
@@ -17058,7 +17058,7 @@ pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintn))]
 pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frintn.v2f64"
@@ -17076,7 +17076,7 @@ pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintn))]
 pub unsafe fn vrndns_f32(a: f32) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.roundeven.f32"
@@ -17094,7 +17094,7 @@ pub unsafe fn vrndns_f32(a: f32) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintp))]
 pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.ceil.v2f32"
@@ -17112,7 +17112,7 @@ pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintp))]
 pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.ceil.v4f32"
@@ -17130,7 +17130,7 @@ pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintp))]
 pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.ceil.v1f64"
@@ -17148,7 +17148,7 @@ pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintp))]
 pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.ceil.v2f64"
@@ -17166,7 +17166,7 @@ pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.rint.v2f32"
@@ -17184,7 +17184,7 @@ pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.rint.v4f32"
@@ -17202,7 +17202,7 @@ pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.rint.v1f64"
@@ -17220,7 +17220,7 @@ pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.rint.v2f64"
@@ -17238,7 +17238,7 @@ pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(srshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.srshl.i64"
@@ -17256,7 +17256,7 @@ pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
 #[cfg_attr(test, assert_instr(urshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.urshl.i64"
@@ -17386,7 +17386,7 @@ pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> u
 #[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frsqrte.v1f64"
@@ -17404,7 +17404,7 @@ pub unsafe fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frsqrte.v2f64"
@@ -17422,7 +17422,7 @@ pub unsafe fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrsqrted_f64(a: f64) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frsqrte.f64"
@@ -17440,7 +17440,7 @@ pub unsafe fn vrsqrted_f64(a: f64) -> f64 {
 #[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrsqrtes_f32(a: f32) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frsqrte.f32"
@@ -17458,7 +17458,7 @@ pub unsafe fn vrsqrtes_f32(a: f32) -> f32 {
 #[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frsqrts.v1f64"
@@ -17476,7 +17476,7 @@ pub unsafe fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frsqrts.v2f64"
@@ -17494,7 +17494,7 @@ pub unsafe fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrsqrtsd_f64(a: f64, b: f64) -> f64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frsqrts.f64"
@@ -17512,7 +17512,7 @@ pub unsafe fn vrsqrtsd_f64(a: f64, b: f64) -> f64 {
 #[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrsqrtss_f32(a: f32, b: f32) -> f32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frsqrts.f32"
@@ -17656,7 +17656,7 @@ pub unsafe fn vsetq_lane_f64<const LANE: i32>(a: f64, b: float64x2_t) -> float64
 #[cfg_attr(test, assert_instr(sha512h2))]
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 pub unsafe fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.sha512h2"
@@ -17674,7 +17674,7 @@ pub unsafe fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uin
 #[cfg_attr(test, assert_instr(sha512h))]
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 pub unsafe fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.sha512h"
@@ -17692,7 +17692,7 @@ pub unsafe fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint
 #[cfg_attr(test, assert_instr(sha512su0))]
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 pub unsafe fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.sha512su0"
@@ -17710,7 +17710,7 @@ pub unsafe fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(sha512su1))]
 #[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
 pub unsafe fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.sha512su1"
@@ -17946,7 +17946,7 @@ pub unsafe fn vslid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
 #[cfg_attr(test, assert_instr(sm3partw1))]
 #[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
 pub unsafe fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.sm3partw1"
@@ -17964,7 +17964,7 @@ pub unsafe fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> ui
 #[cfg_attr(test, assert_instr(sm3partw2))]
 #[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
 pub unsafe fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.sm3partw2"
@@ -17982,7 +17982,7 @@ pub unsafe fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> ui
 #[cfg_attr(test, assert_instr(sm3ss1))]
 #[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
 pub unsafe fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.sm3ss1"
@@ -18000,7 +18000,7 @@ pub unsafe fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint3
 #[cfg_attr(test, assert_instr(sm4ekey))]
 #[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
 pub unsafe fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.sm4ekey"
@@ -18018,7 +18018,7 @@ pub unsafe fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[cfg_attr(test, assert_instr(sm4e))]
 #[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
 pub unsafe fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.sm4e"
@@ -18058,7 +18058,7 @@ pub unsafe fn vsqaddh_u16(a: u16, b: i16) -> u16 {
 #[cfg_attr(test, assert_instr(usqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vsqaddd_u64(a: u64, b: i64) -> u64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.usqadd.i64"
@@ -18076,7 +18076,7 @@ pub unsafe fn vsqaddd_u64(a: u64, b: i64) -> u64 {
 #[cfg_attr(test, assert_instr(usqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vsqadds_u32(a: u32, b: i32) -> u32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.usqadd.i32"
@@ -18164,7 +18164,7 @@ pub unsafe fn vsrid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
 #[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x2.v1f64.p0f64"
@@ -18182,7 +18182,7 @@ pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x2.v2f64.p0f64"
@@ -18200,7 +18200,7 @@ pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x3.v1f64.p0f64"
@@ -18218,7 +18218,7 @@ pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x3.v2f64.p0f64"
@@ -18236,7 +18236,7 @@ pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x4.v1f64.p0f64"
@@ -18260,7 +18260,7 @@ pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x4.v2f64.p0f64"
@@ -18310,7 +18310,7 @@ pub unsafe fn vst1q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2.v1f64.p0i8"
@@ -18330,7 +18330,7 @@ pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst2_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x2_t) {
     static_assert!(LANE == 0);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2lane.v1f64.p0i8"
@@ -18350,7 +18350,7 @@ pub unsafe fn vst2_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst2_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x2_t) {
     static_assert!(LANE == 0);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2lane.v1i64.p0i8"
@@ -18394,7 +18394,7 @@ pub unsafe fn vst2_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2.v2f64.p0i8"
@@ -18412,7 +18412,7 @@ pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2.v2i64.p0i8"
@@ -18432,7 +18432,7 @@ pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst2q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x2_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2lane.v2f64.p0i8"
@@ -18452,7 +18452,7 @@ pub unsafe fn vst2q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst2q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x2_t) {
     static_assert_uimm_bits!(LANE, 4);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2lane.v16i8.p0i8"
@@ -18472,7 +18472,7 @@ pub unsafe fn vst2q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst2q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x2_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2lane.v2i64.p0i8"
@@ -18564,7 +18564,7 @@ pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3.v1f64.p0i8"
@@ -18584,7 +18584,7 @@ pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst3_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x3_t) {
     static_assert!(LANE == 0);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3lane.v1f64.p0i8"
@@ -18604,7 +18604,7 @@ pub unsafe fn vst3_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst3_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x3_t) {
     static_assert!(LANE == 0);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3lane.v1i64.p0i8"
@@ -18648,7 +18648,7 @@ pub unsafe fn vst3_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3.v2f64.p0i8"
@@ -18666,7 +18666,7 @@ pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3.v2i64.p0i8"
@@ -18686,7 +18686,7 @@ pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst3q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x3_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3lane.v2f64.p0i8"
@@ -18706,7 +18706,7 @@ pub unsafe fn vst3q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst3q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x3_t) {
     static_assert_uimm_bits!(LANE, 4);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3lane.v16i8.p0i8"
@@ -18726,7 +18726,7 @@ pub unsafe fn vst3q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst3q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x3_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3lane.v2i64.p0i8"
@@ -18818,7 +18818,7 @@ pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4.v1f64.p0i8"
@@ -18838,7 +18838,7 @@ pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst4_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x4_t) {
     static_assert!(LANE == 0);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4lane.v1f64.p0i8"
@@ -18865,7 +18865,7 @@ pub unsafe fn vst4_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst4_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x4_t) {
     static_assert!(LANE == 0);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4lane.v1i64.p0i8"
@@ -18916,7 +18916,7 @@ pub unsafe fn vst4_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4.v2f64.p0i8"
@@ -18934,7 +18934,7 @@ pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4.v2i64.p0i8"
@@ -18954,7 +18954,7 @@ pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst4q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4lane.v2f64.p0i8"
@@ -18981,7 +18981,7 @@ pub unsafe fn vst4q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst4q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x4_t) {
     static_assert_uimm_bits!(LANE, 4);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4lane.v16i8.p0i8"
@@ -19008,7 +19008,7 @@ pub unsafe fn vst4q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst4q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4lane.v2i64.p0i8"
@@ -19981,7 +19981,7 @@ pub unsafe fn vuqaddh_s16(a: i16, b: u16) -> i16 {
 #[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vuqaddd_s64(a: i64, b: u64) -> i64 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.suqadd.i64"
@@ -19999,7 +19999,7 @@ pub unsafe fn vuqaddd_s64(a: i64, b: u64) -> i64 {
 #[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vuqadds_s32(a: i32, b: u32) -> i32 {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.suqadd.i32"
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index ccb231e120d..a238bae6a1b 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -72,7 +72,7 @@ pub struct float64x2x4_t(
 );
 
 #[allow(improper_ctypes)]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     // absolute value
     #[link_name = "llvm.aarch64.neon.abs.i64"]
     fn vabsd_s64_(a: i64) -> i64;
@@ -3423,7 +3423,7 @@ pub unsafe fn vsm3tt1aq_u32<const IMM2: i32>(
 ) -> uint32x4_t {
     static_assert_uimm_bits!(IMM2, 2);
     #[allow(improper_ctypes)]
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.sm3tt1a"
@@ -3446,7 +3446,7 @@ pub unsafe fn vsm3tt1bq_u32<const IMM2: i32>(
 ) -> uint32x4_t {
     static_assert_uimm_bits!(IMM2, 2);
     #[allow(improper_ctypes)]
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.sm3tt1b"
@@ -3469,7 +3469,7 @@ pub unsafe fn vsm3tt2aq_u32<const IMM2: i32>(
 ) -> uint32x4_t {
     static_assert_uimm_bits!(IMM2, 2);
     #[allow(improper_ctypes)]
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.sm3tt2a"
@@ -3492,7 +3492,7 @@ pub unsafe fn vsm3tt2bq_u32<const IMM2: i32>(
 ) -> uint32x4_t {
     static_assert_uimm_bits!(IMM2, 2);
     #[allow(improper_ctypes)]
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.sm3tt2b"
@@ -3511,7 +3511,7 @@ pub unsafe fn vsm3tt2bq_u32<const IMM2: i32>(
 pub unsafe fn vxarq_u64<const IMM6: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(IMM6, 6);
     #[allow(improper_ctypes)]
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.crypto.xar"
diff --git a/library/stdarch/crates/core_arch/src/aarch64/prefetch.rs b/library/stdarch/crates/core_arch/src/aarch64/prefetch.rs
index 1fde2ac4df9..4dcbc9549f1 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/prefetch.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/prefetch.rs
@@ -1,7 +1,7 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.prefetch"]
     fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
 }
diff --git a/library/stdarch/crates/core_arch/src/aarch64/tme.rs b/library/stdarch/crates/core_arch/src/aarch64/tme.rs
index 29cda807c19..207633c1f8d 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/tme.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/tme.rs
@@ -17,7 +17,7 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.aarch64.tstart"]
     fn aarch64_tstart() -> u64;
     #[link_name = "llvm.aarch64.tcommit"]
diff --git a/library/stdarch/crates/core_arch/src/arm/dsp.rs b/library/stdarch/crates/core_arch/src/arm/dsp.rs
index fa91b549f1a..9c03b388843 100644
--- a/library/stdarch/crates/core_arch/src/arm/dsp.rs
+++ b/library/stdarch/crates/core_arch/src/arm/dsp.rs
@@ -23,7 +23,7 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.arm.smulbb"]
     fn arm_smulbb(a: i32, b: i32) -> i32;
 
diff --git a/library/stdarch/crates/core_arch/src/arm/neon.rs b/library/stdarch/crates/core_arch/src/arm/neon.rs
index ff67812365f..ffeb2c6fe10 100644
--- a/library/stdarch/crates/core_arch/src/arm/neon.rs
+++ b/library/stdarch/crates/core_arch/src/arm/neon.rs
@@ -5,7 +5,7 @@ use crate::mem::{align_of, transmute};
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.arm.neon.vbsl.v8i8"]
     fn vbsl_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
     #[link_name = "llvm.arm.neon.vbsl.v16i8"]
diff --git a/library/stdarch/crates/core_arch/src/arm/sat.rs b/library/stdarch/crates/core_arch/src/arm/sat.rs
index 38ec5f4a4ce..bd38f59e642 100644
--- a/library/stdarch/crates/core_arch/src/arm/sat.rs
+++ b/library/stdarch/crates/core_arch/src/arm/sat.rs
@@ -27,7 +27,7 @@ pub unsafe fn __usat<const WIDTH: u32>(x: i32) -> u32 {
     arm_usat(x, WIDTH as i32)
 }
 
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.arm.ssat"]
     fn arm_ssat(x: i32, y: i32) -> i32;
 
diff --git a/library/stdarch/crates/core_arch/src/arm/simd32.rs b/library/stdarch/crates/core_arch/src/arm/simd32.rs
index cb69b2de8b3..0d7c2328e22 100644
--- a/library/stdarch/crates/core_arch/src/arm/simd32.rs
+++ b/library/stdarch/crates/core_arch/src/arm/simd32.rs
@@ -93,7 +93,7 @@ macro_rules! dsp_call {
     };
 }
 
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.arm.qadd8"]
     fn arm_qadd8(a: i32, b: i32) -> i32;
 
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/barrier/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/barrier/mod.rs
index 82e8e6e7179..e198b63521f 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/barrier/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/barrier/mod.rs
@@ -144,7 +144,7 @@ where
     arg.__isb()
 }
 
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[cfg_attr(
         any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.dmb"
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/crc.rs b/library/stdarch/crates/core_arch/src/arm_shared/crc.rs
index 9d584443c6a..1c10af05f60 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/crc.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/crc.rs
@@ -1,4 +1,4 @@
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[cfg_attr(
         any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.crc32b"
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs b/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs
index 2f2150b4be3..2dcd6fdaf03 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs
@@ -1,7 +1,7 @@
 use crate::core_arch::arm_shared::{uint32x4_t, uint8x16_t};
 
 #[allow(improper_ctypes)]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[cfg_attr(
         any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.crypto.aese"
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/hints.rs b/library/stdarch/crates/core_arch/src/arm_shared/hints.rs
index e7524d64582..54fd78270ab 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/hints.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/hints.rs
@@ -107,7 +107,7 @@ pub unsafe fn __nop() {
     crate::arch::asm!("nop", options(nomem, nostack, preserves_flags));
 }
 
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[cfg_attr(
         any(target_arch = "aarch64", target_arch = "arm64ec"),
         link_name = "llvm.aarch64.hint"
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index aaddbff95c0..48c162de4be 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -180,7 +180,7 @@ pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -211,7 +211,7 @@ pub unsafe fn vabd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabdq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -242,7 +242,7 @@ pub unsafe fn vabdq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sabd.v8i8"
@@ -273,7 +273,7 @@ pub unsafe fn vabd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabdq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sabd.v16i8"
@@ -304,7 +304,7 @@ pub unsafe fn vabdq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sabd.v4i16"
@@ -335,7 +335,7 @@ pub unsafe fn vabd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabdq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sabd.v8i16"
@@ -366,7 +366,7 @@ pub unsafe fn vabdq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sabd.v2i32"
@@ -397,7 +397,7 @@ pub unsafe fn vabd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabdq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sabd.v4i32"
@@ -428,7 +428,7 @@ pub unsafe fn vabdq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uabd.v8i8"
@@ -459,7 +459,7 @@ pub unsafe fn vabd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabdq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uabd.v16i8"
@@ -490,7 +490,7 @@ pub unsafe fn vabdq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uabd.v4i16"
@@ -521,7 +521,7 @@ pub unsafe fn vabd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabdq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uabd.v8i16"
@@ -552,7 +552,7 @@ pub unsafe fn vabdq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uabd.v2i32"
@@ -583,7 +583,7 @@ pub unsafe fn vabd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vabdq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uabd.v4i32"
@@ -1330,7 +1330,7 @@ pub unsafe fn vandq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vcage_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v2i32.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -1361,7 +1361,7 @@ pub unsafe fn vcage_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v4i32.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -1392,7 +1392,7 @@ pub unsafe fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vcagt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v2i32.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -1423,7 +1423,7 @@ pub unsafe fn vcagt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v4i32.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -2880,7 +2880,7 @@ pub unsafe fn vcleq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vcls_s8(a: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -2911,7 +2911,7 @@ pub unsafe fn vcls_s8(a: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vclsq_s8(a: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -2942,7 +2942,7 @@ pub unsafe fn vclsq_s8(a: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vcls_s16(a: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -2973,7 +2973,7 @@ pub unsafe fn vcls_s16(a: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vclsq_s16(a: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -3004,7 +3004,7 @@ pub unsafe fn vclsq_s16(a: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vcls_s32(a: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -3035,7 +3035,7 @@ pub unsafe fn vcls_s32(a: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vclsq_s32(a: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -4161,7 +4161,7 @@ pub unsafe fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             target_arch = "arm",
             link_name = "llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32"
@@ -4182,7 +4182,7 @@ pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             target_arch = "arm",
             link_name = "llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32"
@@ -4203,7 +4203,7 @@ pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32"
@@ -4224,7 +4224,7 @@ pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32"
@@ -4245,7 +4245,7 @@ pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             target_arch = "arm",
             link_name = "llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32"
@@ -4266,7 +4266,7 @@ pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             target_arch = "arm",
             link_name = "llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32"
@@ -4287,7 +4287,7 @@ pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32"
@@ -4308,7 +4308,7 @@ pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32"
@@ -4329,7 +4329,7 @@ pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             target_arch = "arm",
             link_name = "llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32"
@@ -4350,7 +4350,7 @@ pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             target_arch = "arm",
             link_name = "llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32"
@@ -4371,7 +4371,7 @@ pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32"
@@ -4392,7 +4392,7 @@ pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32"
@@ -4413,7 +4413,7 @@ pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             target_arch = "arm",
             link_name = "llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32"
@@ -4434,7 +4434,7 @@ pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             target_arch = "arm",
             link_name = "llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32"
@@ -4455,7 +4455,7 @@ pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32"
@@ -4476,7 +4476,7 @@ pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32"
@@ -4506,7 +4506,7 @@ pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v2i32.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -4537,7 +4537,7 @@ pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v4i32.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -4568,7 +4568,7 @@ pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v2i32.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -4599,7 +4599,7 @@ pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v4i32.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -4750,7 +4750,7 @@ pub unsafe fn vdotq_lane_u32<const LANE: i32>(
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v2i32.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -4781,7 +4781,7 @@ pub unsafe fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v4i32.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -4812,7 +4812,7 @@ pub unsafe fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v2i32.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -4843,7 +4843,7 @@ pub unsafe fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v4i32.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -7282,7 +7282,7 @@ pub unsafe fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f32")]
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v2f32")]
         fn _vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
@@ -7310,7 +7310,7 @@ pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float3
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f32")]
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f32")]
         fn _vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
@@ -7478,7 +7478,7 @@ pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.shadd.v8i8"
@@ -7509,7 +7509,7 @@ pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.shadd.v16i8"
@@ -7540,7 +7540,7 @@ pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.shadd.v4i16"
@@ -7571,7 +7571,7 @@ pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.shadd.v8i16"
@@ -7602,7 +7602,7 @@ pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.shadd.v2i32"
@@ -7633,7 +7633,7 @@ pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.shadd.v4i32"
@@ -7664,7 +7664,7 @@ pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uhadd.v8i8"
@@ -7695,7 +7695,7 @@ pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uhadd.v16i8"
@@ -7726,7 +7726,7 @@ pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uhadd.v4i16"
@@ -7757,7 +7757,7 @@ pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uhadd.v8i16"
@@ -7788,7 +7788,7 @@ pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uhadd.v2i32"
@@ -7819,7 +7819,7 @@ pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uhadd.v4i32"
@@ -7850,7 +7850,7 @@ pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.shsub.v4i16"
@@ -7881,7 +7881,7 @@ pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.shsub.v8i16"
@@ -7912,7 +7912,7 @@ pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.shsub.v2i32"
@@ -7943,7 +7943,7 @@ pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.shsub.v4i32"
@@ -7974,7 +7974,7 @@ pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.shsub.v8i8"
@@ -8005,7 +8005,7 @@ pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.shsub.v16i8"
@@ -8036,7 +8036,7 @@ pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uhsub.v8i8"
@@ -8067,7 +8067,7 @@ pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uhsub.v16i8"
@@ -8098,7 +8098,7 @@ pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uhsub.v4i16"
@@ -8129,7 +8129,7 @@ pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uhsub.v8i16"
@@ -8160,7 +8160,7 @@ pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uhsub.v2i32"
@@ -8191,7 +8191,7 @@ pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uhsub.v4i32"
@@ -8222,7 +8222,7 @@ pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x2.v2f32.p0f32"
@@ -8253,7 +8253,7 @@ pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x3.v2f32.p0f32"
@@ -8284,7 +8284,7 @@ pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x4.v2f32.p0f32"
@@ -8315,7 +8315,7 @@ pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x2.v4f32.p0f32"
@@ -8346,7 +8346,7 @@ pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x3.v4f32.p0f32"
@@ -8377,7 +8377,7 @@ pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x4.v4f32.p0f32"
@@ -8546,7 +8546,7 @@ pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x2.v8i8.p0i8"
@@ -8577,7 +8577,7 @@ pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x3.v8i8.p0i8"
@@ -8608,7 +8608,7 @@ pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x4.v8i8.p0i8"
@@ -8639,7 +8639,7 @@ pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x2.v16i8.p0i8"
@@ -8670,7 +8670,7 @@ pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x3.v16i8.p0i8"
@@ -8701,7 +8701,7 @@ pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x4.v16i8.p0i8"
@@ -8732,7 +8732,7 @@ pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x2.v4i16.p0i16"
@@ -8763,7 +8763,7 @@ pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x3.v4i16.p0i16"
@@ -8794,7 +8794,7 @@ pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x4.v4i16.p0i16"
@@ -8825,7 +8825,7 @@ pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x2.v8i16.p0i16"
@@ -8856,7 +8856,7 @@ pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x3.v8i16.p0i16"
@@ -8887,7 +8887,7 @@ pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x4.v8i16.p0i16"
@@ -8918,7 +8918,7 @@ pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x2.v2i32.p0i32"
@@ -8949,7 +8949,7 @@ pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x3.v2i32.p0i32"
@@ -8980,7 +8980,7 @@ pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x4.v2i32.p0i32"
@@ -9011,7 +9011,7 @@ pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x2.v4i32.p0i32"
@@ -9042,7 +9042,7 @@ pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x3.v4i32.p0i32"
@@ -9073,7 +9073,7 @@ pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x4.v4i32.p0i32"
@@ -9104,7 +9104,7 @@ pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x2.v1i64.p0i64"
@@ -9135,7 +9135,7 @@ pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x3.v1i64.p0i64"
@@ -9166,7 +9166,7 @@ pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x4.v1i64.p0i64"
@@ -9197,7 +9197,7 @@ pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x2.v2i64.p0i64"
@@ -9228,7 +9228,7 @@ pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x3.v2i64.p0i64"
@@ -9259,7 +9259,7 @@ pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld1x4.v2i64.p0i64"
@@ -10107,7 +10107,7 @@ pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0i8")]
         fn _vld2_dup_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
     }
@@ -10123,7 +10123,7 @@ pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0i8")]
         fn _vld2q_dup_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
     }
@@ -10139,7 +10139,7 @@ pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0i8")]
         fn _vld2_dup_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
     }
@@ -10155,7 +10155,7 @@ pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0i8")]
         fn _vld2q_dup_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
     }
@@ -10171,7 +10171,7 @@ pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0i8")]
         fn _vld2_dup_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
     }
@@ -10187,7 +10187,7 @@ pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0i8")]
         fn _vld2q_dup_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
     }
@@ -10203,7 +10203,7 @@ pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0i8")]
         fn _vld2_dup_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
     }
@@ -10219,7 +10219,7 @@ pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0i8")]
         fn _vld2q_dup_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
     }
@@ -10235,7 +10235,7 @@ pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2r.v2f32.p0f32"
@@ -10254,7 +10254,7 @@ pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2r.v4f32.p0f32"
@@ -10273,7 +10273,7 @@ pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2r.v8i8.p0i8"
@@ -10292,7 +10292,7 @@ pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2r.v16i8.p0i8"
@@ -10311,7 +10311,7 @@ pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2r.v4i16.p0i16"
@@ -10330,7 +10330,7 @@ pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2r.v8i16.p0i16"
@@ -10349,7 +10349,7 @@ pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2r.v2i32.p0i32"
@@ -10368,7 +10368,7 @@ pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2r.v4i32.p0i32"
@@ -10410,7 +10410,7 @@ pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0i8")]
         fn _vld2_dup_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
     }
@@ -10426,7 +10426,7 @@ pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2r.v1i64.p0i64"
@@ -10698,7 +10698,7 @@ pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32.p0i8")]
         fn _vld2_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
     }
@@ -10714,7 +10714,7 @@ pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32.p0i8")]
         fn _vld2q_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
     }
@@ -10730,7 +10730,7 @@ pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8.p0i8")]
         fn _vld2_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
     }
@@ -10746,7 +10746,7 @@ pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8.p0i8")]
         fn _vld2q_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
     }
@@ -10762,7 +10762,7 @@ pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16.p0i8")]
         fn _vld2_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
     }
@@ -10778,7 +10778,7 @@ pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16.p0i8")]
         fn _vld2q_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
     }
@@ -10794,7 +10794,7 @@ pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32.p0i8")]
         fn _vld2_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
     }
@@ -10810,7 +10810,7 @@ pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld2))]
 pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32.p0i8")]
         fn _vld2q_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
     }
@@ -10826,7 +10826,7 @@ pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2))]
 pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2.v2f32.p0v2f32"
@@ -10845,7 +10845,7 @@ pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2))]
 pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2.v4f32.p0v4f32"
@@ -10864,7 +10864,7 @@ pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2))]
 pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2.v8i8.p0v8i8"
@@ -10883,7 +10883,7 @@ pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2))]
 pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2.v16i8.p0v16i8"
@@ -10902,7 +10902,7 @@ pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2))]
 pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2.v4i16.p0v4i16"
@@ -10921,7 +10921,7 @@ pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2))]
 pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2.v8i16.p0v8i16"
@@ -10940,7 +10940,7 @@ pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2))]
 pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2.v2i32.p0v2i32"
@@ -10959,7 +10959,7 @@ pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2))]
 pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2.v4i32.p0v4i32"
@@ -10980,7 +10980,7 @@ pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0i8"
@@ -11001,7 +11001,7 @@ pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) ->
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0i8"
@@ -11023,7 +11023,7 @@ pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0i8"
@@ -11044,7 +11044,7 @@ pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0i8"
@@ -11065,7 +11065,7 @@ pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> i
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0i8"
@@ -11086,7 +11086,7 @@ pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) ->
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0i8"
@@ -11107,7 +11107,7 @@ pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> i
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0i8"
@@ -11128,7 +11128,7 @@ pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) ->
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0i8")]
         fn _vld2_lane_f32(
             ptr: *const i8,
@@ -11152,7 +11152,7 @@ pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) ->
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0i8")]
         fn _vld2q_lane_f32(
             ptr: *const i8,
@@ -11176,7 +11176,7 @@ pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0i8")]
         fn _vld2q_lane_s16(
             ptr: *const i8,
@@ -11200,7 +11200,7 @@ pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) ->
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0i8")]
         fn _vld2q_lane_s32(
             ptr: *const i8,
@@ -11224,7 +11224,7 @@ pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) ->
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0i8")]
         fn _vld2_lane_s8(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32)
             -> int8x8x2_t;
@@ -11243,7 +11243,7 @@ pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0i8")]
         fn _vld2_lane_s16(
             ptr: *const i8,
@@ -11267,7 +11267,7 @@ pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> i
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0i8")]
         fn _vld2_lane_s32(
             ptr: *const i8,
@@ -11512,7 +11512,7 @@ pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64.p0i8")]
         fn _vld2_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
     }
@@ -11528,7 +11528,7 @@ pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld2.v1i64.p0v1i64"
@@ -11800,7 +11800,7 @@ pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3r.v2f32.p0f32"
@@ -11819,7 +11819,7 @@ pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3r.v4f32.p0f32"
@@ -11838,7 +11838,7 @@ pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3r.v8i8.p0i8"
@@ -11857,7 +11857,7 @@ pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3r.v16i8.p0i8"
@@ -11876,7 +11876,7 @@ pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3r.v4i16.p0i16"
@@ -11895,7 +11895,7 @@ pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3r.v8i16.p0i16"
@@ -11914,7 +11914,7 @@ pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3r.v2i32.p0i32"
@@ -11933,7 +11933,7 @@ pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3r.v4i32.p0i32"
@@ -11952,7 +11952,7 @@ pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3r.v1i64.p0i64"
@@ -11971,7 +11971,7 @@ pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0i8")]
         fn _vld3_dup_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
     }
@@ -11987,7 +11987,7 @@ pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0i8")]
         fn _vld3q_dup_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
     }
@@ -12003,7 +12003,7 @@ pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0i8")]
         fn _vld3_dup_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
     }
@@ -12019,7 +12019,7 @@ pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0i8")]
         fn _vld3q_dup_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
     }
@@ -12035,7 +12035,7 @@ pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0i8")]
         fn _vld3_dup_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
     }
@@ -12051,7 +12051,7 @@ pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0i8")]
         fn _vld3q_dup_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
     }
@@ -12067,7 +12067,7 @@ pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0i8")]
         fn _vld3_dup_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
     }
@@ -12083,7 +12083,7 @@ pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0i8")]
         fn _vld3q_dup_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
     }
@@ -12122,7 +12122,7 @@ pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0i8")]
         fn _vld3_dup_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
     }
@@ -12391,7 +12391,7 @@ pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(ld3))]
 pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3.v2f32.p0v2f32"
@@ -12410,7 +12410,7 @@ pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(ld3))]
 pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3.v4f32.p0v4f32"
@@ -12429,7 +12429,7 @@ pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(ld3))]
 pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3.v8i8.p0v8i8"
@@ -12448,7 +12448,7 @@ pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(ld3))]
 pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3.v16i8.p0v16i8"
@@ -12467,7 +12467,7 @@ pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(ld3))]
 pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3.v4i16.p0v4i16"
@@ -12486,7 +12486,7 @@ pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(ld3))]
 pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3.v8i16.p0v8i16"
@@ -12505,7 +12505,7 @@ pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(ld3))]
 pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3.v2i32.p0v2i32"
@@ -12524,7 +12524,7 @@ pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(ld3))]
 pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3.v4i32.p0v4i32"
@@ -12543,7 +12543,7 @@ pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0i8")]
         fn _vld3_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
     }
@@ -12559,7 +12559,7 @@ pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0i8")]
         fn _vld3q_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
     }
@@ -12575,7 +12575,7 @@ pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0i8")]
         fn _vld3_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
     }
@@ -12591,7 +12591,7 @@ pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0i8")]
         fn _vld3q_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
     }
@@ -12607,7 +12607,7 @@ pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0i8")]
         fn _vld3_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
     }
@@ -12623,7 +12623,7 @@ pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0i8")]
         fn _vld3q_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
     }
@@ -12639,7 +12639,7 @@ pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0i8")]
         fn _vld3_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
     }
@@ -12655,7 +12655,7 @@ pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld3))]
 pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0i8")]
         fn _vld3q_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
     }
@@ -12673,7 +12673,7 @@ pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0i8"
@@ -12700,7 +12700,7 @@ pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) ->
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0i8"
@@ -12727,7 +12727,7 @@ pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0i8")]
         fn _vld3_lane_f32(
             ptr: *const i8,
@@ -12752,7 +12752,7 @@ pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) ->
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0i8"
@@ -12779,7 +12779,7 @@ pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0i8"
@@ -12806,7 +12806,7 @@ pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> i
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
     static_assert_uimm_bits!(LANE, 4);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0i8"
@@ -12833,7 +12833,7 @@ pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) ->
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0i8"
@@ -12860,7 +12860,7 @@ pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> i
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0i8"
@@ -12887,7 +12887,7 @@ pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) ->
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0i8")]
         fn _vld3_lane_s8(
             ptr: *const i8,
@@ -12912,7 +12912,7 @@ pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0i8")]
         fn _vld3_lane_s16(
             ptr: *const i8,
@@ -12937,7 +12937,7 @@ pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> i
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0i8")]
         fn _vld3q_lane_s16(
             ptr: *const i8,
@@ -12962,7 +12962,7 @@ pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) ->
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0i8")]
         fn _vld3_lane_s32(
             ptr: *const i8,
@@ -12987,7 +12987,7 @@ pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> i
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0i8")]
         fn _vld3q_lane_s32(
             ptr: *const i8,
@@ -13233,7 +13233,7 @@ pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t {
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld3.v1i64.p0v1i64"
@@ -13252,7 +13252,7 @@ pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0i8")]
         fn _vld3_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
     }
@@ -13523,7 +13523,7 @@ pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0i8")]
         fn _vld3q_lane_f32(
             ptr: *const i8,
@@ -13546,7 +13546,7 @@ pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -
 #[cfg_attr(test, assert_instr(vld4))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0i8")]
         fn _vld4_dup_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
     }
@@ -13562,7 +13562,7 @@ pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
 #[cfg_attr(test, assert_instr(vld4))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0i8")]
         fn _vld4q_dup_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
     }
@@ -13578,7 +13578,7 @@ pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
 #[cfg_attr(test, assert_instr(vld4))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0i8")]
         fn _vld4_dup_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
     }
@@ -13594,7 +13594,7 @@ pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
 #[cfg_attr(test, assert_instr(vld4))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0i8")]
         fn _vld4q_dup_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
     }
@@ -13610,7 +13610,7 @@ pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
 #[cfg_attr(test, assert_instr(vld4))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0i8")]
         fn _vld4_dup_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
     }
@@ -13626,7 +13626,7 @@ pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
 #[cfg_attr(test, assert_instr(vld4))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0i8")]
         fn _vld4q_dup_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
     }
@@ -13642,7 +13642,7 @@ pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
 #[cfg_attr(test, assert_instr(vld4))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0i8")]
         fn _vld4_dup_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
     }
@@ -13658,7 +13658,7 @@ pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
 #[cfg_attr(test, assert_instr(vld4))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0i8")]
         fn _vld4q_dup_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
     }
@@ -13674,7 +13674,7 @@ pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4r.v2f32.p0f32"
@@ -13693,7 +13693,7 @@ pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4r.v4f32.p0f32"
@@ -13712,7 +13712,7 @@ pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4r.v8i8.p0i8"
@@ -13731,7 +13731,7 @@ pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4r.v16i8.p0i8"
@@ -13750,7 +13750,7 @@ pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4r.v4i16.p0i16"
@@ -13769,7 +13769,7 @@ pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4r.v8i16.p0i16"
@@ -13788,7 +13788,7 @@ pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4r.v2i32.p0i32"
@@ -13807,7 +13807,7 @@ pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4r.v4i32.p0i32"
@@ -13826,7 +13826,7 @@ pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4r.v1i64.p0i64"
@@ -13868,7 +13868,7 @@ pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t {
 #[cfg_attr(test, assert_instr(nop))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0i8")]
         fn _vld4_dup_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
     }
@@ -14137,7 +14137,7 @@ pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld4))]
 pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4.v2f32.p0v2f32"
@@ -14156,7 +14156,7 @@ pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld4))]
 pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4.v4f32.p0v4f32"
@@ -14175,7 +14175,7 @@ pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld4))]
 pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4.v8i8.p0v8i8"
@@ -14194,7 +14194,7 @@ pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld4))]
 pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4.v16i8.p0v16i8"
@@ -14213,7 +14213,7 @@ pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld4))]
 pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4.v4i16.p0v4i16"
@@ -14232,7 +14232,7 @@ pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld4))]
 pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4.v8i16.p0v8i16"
@@ -14251,7 +14251,7 @@ pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld4))]
 pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4.v2i32.p0v2i32"
@@ -14270,7 +14270,7 @@ pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld4))]
 pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4.v4i32.p0v4i32"
@@ -14289,7 +14289,7 @@ pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld4))]
 pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0i8")]
         fn _vld4_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
     }
@@ -14305,7 +14305,7 @@ pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld4))]
 pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0i8")]
         fn _vld4q_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
     }
@@ -14321,7 +14321,7 @@ pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld4))]
 pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0i8")]
         fn _vld4_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
     }
@@ -14337,7 +14337,7 @@ pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld4))]
 pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0i8")]
         fn _vld4q_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
     }
@@ -14353,7 +14353,7 @@ pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld4))]
 pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0i8")]
         fn _vld4_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
     }
@@ -14369,7 +14369,7 @@ pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld4))]
 pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0i8")]
         fn _vld4q_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
     }
@@ -14385,7 +14385,7 @@ pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld4))]
 pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0i8")]
         fn _vld4_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
     }
@@ -14401,7 +14401,7 @@ pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vld4))]
 pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0i8")]
         fn _vld4q_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
     }
@@ -14419,7 +14419,7 @@ pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0i8"
@@ -14447,7 +14447,7 @@ pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) ->
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0i8"
@@ -14475,7 +14475,7 @@ pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0i8"
@@ -14503,7 +14503,7 @@ pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0i8"
@@ -14531,7 +14531,7 @@ pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> i
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0i8"
@@ -14559,7 +14559,7 @@ pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) ->
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0i8"
@@ -14587,7 +14587,7 @@ pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> i
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0i8"
@@ -14615,7 +14615,7 @@ pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) ->
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0i8")]
         fn _vld4_lane_f32(
             ptr: *const i8,
@@ -14641,7 +14641,7 @@ pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) ->
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0i8")]
         fn _vld4q_lane_f32(
             ptr: *const i8,
@@ -14667,7 +14667,7 @@ pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0i8")]
         fn _vld4_lane_s8(
             ptr: *const i8,
@@ -14693,7 +14693,7 @@ pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0i8")]
         fn _vld4_lane_s16(
             ptr: *const i8,
@@ -14719,7 +14719,7 @@ pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> i
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0i8")]
         fn _vld4q_lane_s16(
             ptr: *const i8,
@@ -14745,7 +14745,7 @@ pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) ->
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0i8")]
         fn _vld4_lane_s32(
             ptr: *const i8,
@@ -14771,7 +14771,7 @@ pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> i
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0i8")]
         fn _vld4q_lane_s32(
             ptr: *const i8,
@@ -15018,7 +15018,7 @@ pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.ld4.v1i64.p0v1i64"
@@ -15037,7 +15037,7 @@ pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0i8")]
         fn _vld4_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
     }
@@ -15317,7 +15317,7 @@ pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15348,7 +15348,7 @@ pub unsafe fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15379,7 +15379,7 @@ pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15410,7 +15410,7 @@ pub unsafe fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15441,7 +15441,7 @@ pub unsafe fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15472,7 +15472,7 @@ pub unsafe fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15503,7 +15503,7 @@ pub unsafe fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15534,7 +15534,7 @@ pub unsafe fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15565,7 +15565,7 @@ pub unsafe fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15596,7 +15596,7 @@ pub unsafe fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15627,7 +15627,7 @@ pub unsafe fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15658,7 +15658,7 @@ pub unsafe fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15689,7 +15689,7 @@ pub unsafe fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15720,7 +15720,7 @@ pub unsafe fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15751,7 +15751,7 @@ pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15782,7 +15782,7 @@ pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15813,7 +15813,7 @@ pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15844,7 +15844,7 @@ pub unsafe fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15875,7 +15875,7 @@ pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15906,7 +15906,7 @@ pub unsafe fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15937,7 +15937,7 @@ pub unsafe fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15968,7 +15968,7 @@ pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -15999,7 +15999,7 @@ pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -16030,7 +16030,7 @@ pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -16061,7 +16061,7 @@ pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -16092,7 +16092,7 @@ pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -16123,7 +16123,7 @@ pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -16154,7 +16154,7 @@ pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -16185,7 +16185,7 @@ pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -16216,7 +16216,7 @@ pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -16247,7 +16247,7 @@ pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -16278,7 +16278,7 @@ pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -20615,7 +20615,7 @@ pub unsafe fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -20646,7 +20646,7 @@ pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -21257,7 +21257,7 @@ pub unsafe fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.pmull.v8i8"
@@ -21288,7 +21288,7 @@ pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.smull.v4i16"
@@ -21319,7 +21319,7 @@ pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.smull.v2i32"
@@ -21350,7 +21350,7 @@ pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.smull.v8i8"
@@ -21381,7 +21381,7 @@ pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.umull.v8i8"
@@ -21412,7 +21412,7 @@ pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.umull.v4i16"
@@ -21443,7 +21443,7 @@ pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.umull.v2i32"
@@ -22026,7 +22026,7 @@ pub unsafe fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -22057,7 +22057,7 @@ pub unsafe fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqabs.v8i8"
@@ -22088,7 +22088,7 @@ pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqabs.v16i8"
@@ -22119,7 +22119,7 @@ pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqabs.v4i16"
@@ -22150,7 +22150,7 @@ pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqabs.v8i16"
@@ -22181,7 +22181,7 @@ pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqabs.v2i32"
@@ -22212,7 +22212,7 @@ pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqabs.v4i32"
@@ -22243,7 +22243,7 @@ pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqadd.v8i8"
@@ -22274,7 +22274,7 @@ pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqadd.v16i8"
@@ -22305,7 +22305,7 @@ pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqadd.v4i16"
@@ -22336,7 +22336,7 @@ pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqadd.v8i16"
@@ -22367,7 +22367,7 @@ pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqadd.v2i32"
@@ -22398,7 +22398,7 @@ pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqadd.v4i32"
@@ -22429,7 +22429,7 @@ pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqadd.v1i64"
@@ -22460,7 +22460,7 @@ pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqadd.v2i64"
@@ -22491,7 +22491,7 @@ pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqadd.v8i8"
@@ -22522,7 +22522,7 @@ pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqadd.v16i8"
@@ -22553,7 +22553,7 @@ pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqadd.v4i16"
@@ -22584,7 +22584,7 @@ pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqadd.v8i16"
@@ -22615,7 +22615,7 @@ pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqadd.v2i32"
@@ -22646,7 +22646,7 @@ pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqadd.v4i32"
@@ -22677,7 +22677,7 @@ pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqadd.v1i64"
@@ -22708,7 +22708,7 @@ pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqadd.v2i64"
@@ -23235,7 +23235,7 @@ pub unsafe fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23266,7 +23266,7 @@ pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23297,7 +23297,7 @@ pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23328,7 +23328,7 @@ pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23457,7 +23457,7 @@ pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23488,7 +23488,7 @@ pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23519,7 +23519,7 @@ pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23550,7 +23550,7 @@ pub unsafe fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23581,7 +23581,7 @@ pub unsafe fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23612,7 +23612,7 @@ pub unsafe fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23643,7 +23643,7 @@ pub unsafe fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23674,7 +23674,7 @@ pub unsafe fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23705,7 +23705,7 @@ pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23736,7 +23736,7 @@ pub unsafe fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23767,7 +23767,7 @@ pub unsafe fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -23798,7 +23798,7 @@ pub unsafe fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqneg_s8(a: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqneg.v8i8"
@@ -23829,7 +23829,7 @@ pub unsafe fn vqneg_s8(a: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqneg.v16i8"
@@ -23860,7 +23860,7 @@ pub unsafe fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqneg_s16(a: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqneg.v4i16"
@@ -23891,7 +23891,7 @@ pub unsafe fn vqneg_s16(a: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqneg.v8i16"
@@ -23922,7 +23922,7 @@ pub unsafe fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqneg_s32(a: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqneg.v2i32"
@@ -23953,7 +23953,7 @@ pub unsafe fn vqneg_s32(a: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqneg.v4i32"
@@ -24310,7 +24310,7 @@ pub unsafe fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24341,7 +24341,7 @@ pub unsafe fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24372,7 +24372,7 @@ pub unsafe fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24403,7 +24403,7 @@ pub unsafe fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24434,7 +24434,7 @@ pub unsafe fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24465,7 +24465,7 @@ pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24496,7 +24496,7 @@ pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24527,7 +24527,7 @@ pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24558,7 +24558,7 @@ pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24589,7 +24589,7 @@ pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24620,7 +24620,7 @@ pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24651,7 +24651,7 @@ pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24682,7 +24682,7 @@ pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24713,7 +24713,7 @@ pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24744,7 +24744,7 @@ pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24775,7 +24775,7 @@ pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24806,7 +24806,7 @@ pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24837,7 +24837,7 @@ pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24868,7 +24868,7 @@ pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24899,7 +24899,7 @@ pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -24921,7 +24921,7 @@ pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")]
         fn _vqrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
@@ -24947,7 +24947,7 @@ pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")]
         fn _vqrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
@@ -24968,7 +24968,7 @@ pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")]
         fn _vqrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
@@ -24986,7 +24986,7 @@ pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrshrn.v8i8"
@@ -25007,7 +25007,7 @@ pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrshrn.v4i16"
@@ -25028,7 +25028,7 @@ pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrshrn.v2i32"
@@ -25049,7 +25049,7 @@ pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
         fn _vqrshrn_n_u16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
@@ -25077,7 +25077,7 @@ pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
         fn _vqrshrn_n_u32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
@@ -25099,7 +25099,7 @@ pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
         fn _vqrshrn_n_u64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
@@ -25121,7 +25121,7 @@ pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqrshrn.v8i8"
@@ -25142,7 +25142,7 @@ pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqrshrn.v4i16"
@@ -25163,7 +25163,7 @@ pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqrshrn.v2i32"
@@ -25184,7 +25184,7 @@ pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")]
         fn _vqrshrun_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
@@ -25211,7 +25211,7 @@ pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")]
         fn _vqrshrun_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
@@ -25233,7 +25233,7 @@ pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")]
         fn _vqrshrun_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
@@ -25251,7 +25251,7 @@ pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrshrun.v8i8"
@@ -25272,7 +25272,7 @@ pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrshrun.v4i16"
@@ -25293,7 +25293,7 @@ pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqrshrun.v2i32"
@@ -25723,7 +25723,7 @@ pub unsafe fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -25754,7 +25754,7 @@ pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -25785,7 +25785,7 @@ pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -25816,7 +25816,7 @@ pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -25847,7 +25847,7 @@ pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -25878,7 +25878,7 @@ pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -25909,7 +25909,7 @@ pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -25940,7 +25940,7 @@ pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -25971,7 +25971,7 @@ pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -26002,7 +26002,7 @@ pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -26033,7 +26033,7 @@ pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -26064,7 +26064,7 @@ pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -26095,7 +26095,7 @@ pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -26126,7 +26126,7 @@ pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -26157,7 +26157,7 @@ pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -26188,7 +26188,7 @@ pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -26210,7 +26210,7 @@ pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
     static_assert_uimm_bits!(N, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")]
         fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> int8x8_t;
     }
@@ -26236,7 +26236,7 @@ pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
     static_assert_uimm_bits!(N, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")]
         fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> int8x16_t;
     }
@@ -26263,7 +26263,7 @@ pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(N, 4);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")]
         fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> int16x4_t;
     }
@@ -26285,7 +26285,7 @@ pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
     static_assert_uimm_bits!(N, 4);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")]
         fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> int16x8_t;
     }
@@ -26311,7 +26311,7 @@ pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
     static_assert_uimm_bits!(N, 5);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")]
         fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> int32x2_t;
     }
@@ -26329,7 +26329,7 @@ pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(N, 5);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")]
         fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> int32x4_t;
     }
@@ -26351,7 +26351,7 @@ pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
     static_assert_uimm_bits!(N, 6);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")]
         fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> int64x1_t;
     }
@@ -26369,7 +26369,7 @@ pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(N, 6);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")]
         fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> int64x2_t;
     }
@@ -26387,7 +26387,7 @@ pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
     static_assert_uimm_bits!(N, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshlu.v8i8"
@@ -26416,7 +26416,7 @@ pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
     static_assert_uimm_bits!(N, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshlu.v16i8"
@@ -26446,7 +26446,7 @@ pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(N, 4);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshlu.v4i16"
@@ -26471,7 +26471,7 @@ pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
     static_assert_uimm_bits!(N, 4);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshlu.v8i16"
@@ -26500,7 +26500,7 @@ pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
     static_assert_uimm_bits!(N, 5);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshlu.v2i32"
@@ -26521,7 +26521,7 @@ pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(N, 5);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshlu.v4i32"
@@ -26546,7 +26546,7 @@ pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
     static_assert_uimm_bits!(N, 6);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshlu.v1i64"
@@ -26567,7 +26567,7 @@ pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(N, 6);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshlu.v2i64"
@@ -26588,7 +26588,7 @@ pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")]
         fn _vqshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
@@ -26614,7 +26614,7 @@ pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")]
         fn _vqshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
@@ -26635,7 +26635,7 @@ pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")]
         fn _vqshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
@@ -26653,7 +26653,7 @@ pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshrn.v8i8"
@@ -26674,7 +26674,7 @@ pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshrn.v4i16"
@@ -26695,7 +26695,7 @@ pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshrn.v2i32"
@@ -26716,7 +26716,7 @@ pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")]
         fn _vqshrn_n_u16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
@@ -26744,7 +26744,7 @@ pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")]
         fn _vqshrn_n_u32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
@@ -26766,7 +26766,7 @@ pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")]
         fn _vqshrn_n_u64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
@@ -26788,7 +26788,7 @@ pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqshrn.v8i8"
@@ -26809,7 +26809,7 @@ pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqshrn.v4i16"
@@ -26830,7 +26830,7 @@ pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqshrn.v2i32"
@@ -26851,7 +26851,7 @@ pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")]
         fn _vqshrun_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
@@ -26878,7 +26878,7 @@ pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")]
         fn _vqshrun_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
@@ -26900,7 +26900,7 @@ pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")]
         fn _vqshrun_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
@@ -26918,7 +26918,7 @@ pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshrun.v8i8"
@@ -26939,7 +26939,7 @@ pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshrun.v4i16"
@@ -26960,7 +26960,7 @@ pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqshrun.v2i32"
@@ -26990,7 +26990,7 @@ pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqsub.v8i8"
@@ -27021,7 +27021,7 @@ pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqsub.v16i8"
@@ -27052,7 +27052,7 @@ pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqsub.v4i16"
@@ -27083,7 +27083,7 @@ pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqsub.v8i16"
@@ -27114,7 +27114,7 @@ pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqsub.v2i32"
@@ -27145,7 +27145,7 @@ pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqsub.v4i32"
@@ -27176,7 +27176,7 @@ pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqsub.v1i64"
@@ -27207,7 +27207,7 @@ pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.sqsub.v2i64"
@@ -27238,7 +27238,7 @@ pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqsub.v8i8"
@@ -27269,7 +27269,7 @@ pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqsub.v16i8"
@@ -27300,7 +27300,7 @@ pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqsub.v4i16"
@@ -27331,7 +27331,7 @@ pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqsub.v8i16"
@@ -27362,7 +27362,7 @@ pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqsub.v2i32"
@@ -27393,7 +27393,7 @@ pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqsub.v4i32"
@@ -27424,7 +27424,7 @@ pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqsub.v1i64"
@@ -27455,7 +27455,7 @@ pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.uqsub.v2i64"
@@ -27486,7 +27486,7 @@ pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -27517,7 +27517,7 @@ pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -27548,7 +27548,7 @@ pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -27579,7 +27579,7 @@ pub unsafe fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -27610,7 +27610,7 @@ pub unsafe fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -27641,7 +27641,7 @@ pub unsafe fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34020,7 +34020,7 @@ pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.srhadd.v8i8"
@@ -34051,7 +34051,7 @@ pub unsafe fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.srhadd.v16i8"
@@ -34082,7 +34082,7 @@ pub unsafe fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.srhadd.v4i16"
@@ -34113,7 +34113,7 @@ pub unsafe fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.srhadd.v8i16"
@@ -34144,7 +34144,7 @@ pub unsafe fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.srhadd.v2i32"
@@ -34175,7 +34175,7 @@ pub unsafe fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.srhadd.v4i32"
@@ -34206,7 +34206,7 @@ pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.urhadd.v8i8"
@@ -34237,7 +34237,7 @@ pub unsafe fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.urhadd.v16i8"
@@ -34268,7 +34268,7 @@ pub unsafe fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.urhadd.v4i16"
@@ -34299,7 +34299,7 @@ pub unsafe fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.urhadd.v8i16"
@@ -34330,7 +34330,7 @@ pub unsafe fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.urhadd.v2i32"
@@ -34361,7 +34361,7 @@ pub unsafe fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.urhadd.v4i32"
@@ -34392,7 +34392,7 @@ pub unsafe fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frintn.v2f32"
@@ -34423,7 +34423,7 @@ pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.frintn.v4f32"
@@ -34454,7 +34454,7 @@ pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34485,7 +34485,7 @@ pub unsafe fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34516,7 +34516,7 @@ pub unsafe fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34547,7 +34547,7 @@ pub unsafe fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34578,7 +34578,7 @@ pub unsafe fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34609,7 +34609,7 @@ pub unsafe fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34640,7 +34640,7 @@ pub unsafe fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34671,7 +34671,7 @@ pub unsafe fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34702,7 +34702,7 @@ pub unsafe fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34733,7 +34733,7 @@ pub unsafe fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34764,7 +34764,7 @@ pub unsafe fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34795,7 +34795,7 @@ pub unsafe fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34826,7 +34826,7 @@ pub unsafe fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34857,7 +34857,7 @@ pub unsafe fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34888,7 +34888,7 @@ pub unsafe fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -34919,7 +34919,7 @@ pub unsafe fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -35341,7 +35341,7 @@ pub unsafe fn vrshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")]
         fn _vrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
@@ -35367,7 +35367,7 @@ pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")]
         fn _vrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
@@ -35388,7 +35388,7 @@ pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")]
         fn _vrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
@@ -35406,7 +35406,7 @@ pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.rshrn.v8i8"
@@ -35427,7 +35427,7 @@ pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.rshrn.v4i16"
@@ -35448,7 +35448,7 @@ pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.rshrn.v2i32"
@@ -35553,7 +35553,7 @@ pub unsafe fn vrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -35584,7 +35584,7 @@ pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -35615,7 +35615,7 @@ pub unsafe fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -35646,7 +35646,7 @@ pub unsafe fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -35677,7 +35677,7 @@ pub unsafe fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -35708,7 +35708,7 @@ pub unsafe fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -36139,7 +36139,7 @@ pub unsafe fn vrsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -36170,7 +36170,7 @@ pub unsafe fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -36201,7 +36201,7 @@ pub unsafe fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37301,7 +37301,7 @@ pub unsafe fn vshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37332,7 +37332,7 @@ pub unsafe fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37363,7 +37363,7 @@ pub unsafe fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37394,7 +37394,7 @@ pub unsafe fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37425,7 +37425,7 @@ pub unsafe fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37456,7 +37456,7 @@ pub unsafe fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37487,7 +37487,7 @@ pub unsafe fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37518,7 +37518,7 @@ pub unsafe fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37549,7 +37549,7 @@ pub unsafe fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37580,7 +37580,7 @@ pub unsafe fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37611,7 +37611,7 @@ pub unsafe fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37642,7 +37642,7 @@ pub unsafe fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37673,7 +37673,7 @@ pub unsafe fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37704,7 +37704,7 @@ pub unsafe fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37735,7 +37735,7 @@ pub unsafe fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -37766,7 +37766,7 @@ pub unsafe fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -38934,7 +38934,7 @@ pub unsafe fn vsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x
 #[cfg_attr(test, assert_instr(vst1))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f32.v2f32")]
         fn _vst1_f32_x2(ptr: *mut f32, a: float32x2_t, b: float32x2_t);
     }
@@ -38950,7 +38950,7 @@ pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f32.v4f32")]
         fn _vst1q_f32_x2(ptr: *mut f32, a: float32x4_t, b: float32x4_t);
     }
@@ -38966,7 +38966,7 @@ pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x2.v2f32.p0f32"
@@ -38985,7 +38985,7 @@ pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x2.v4f32.p0f32"
@@ -39004,7 +39004,7 @@ pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f32.v2f32")]
         fn _vst1_f32_x3(ptr: *mut f32, a: float32x2_t, b: float32x2_t, c: float32x2_t);
     }
@@ -39020,7 +39020,7 @@ pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f32.v4f32")]
         fn _vst1q_f32_x3(ptr: *mut f32, a: float32x4_t, b: float32x4_t, c: float32x4_t);
     }
@@ -39036,7 +39036,7 @@ pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x3.v2f32.p0f32"
@@ -39055,7 +39055,7 @@ pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x3.v4f32.p0f32"
@@ -39074,7 +39074,7 @@ pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v2f32")]
         fn _vst1_f32_x4(
             ptr: *mut f32,
@@ -39096,7 +39096,7 @@ pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v4f32")]
         fn _vst1q_f32_x4(
             ptr: *mut f32,
@@ -39118,7 +39118,7 @@ pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x4.v2f32.p0f32"
@@ -39143,7 +39143,7 @@ pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
 #[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x4.v4f32.p0f32"
@@ -39881,7 +39881,7 @@ pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x2.v8i8.p0i8"
@@ -39900,7 +39900,7 @@ pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x2.v16i8.p0i8"
@@ -39919,7 +39919,7 @@ pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x2.v4i16.p0i16"
@@ -39938,7 +39938,7 @@ pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x2.v8i16.p0i16"
@@ -39957,7 +39957,7 @@ pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x2.v2i32.p0i32"
@@ -39976,7 +39976,7 @@ pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x2.v4i32.p0i32"
@@ -39995,7 +39995,7 @@ pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x2.v1i64.p0i64"
@@ -40014,7 +40014,7 @@ pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x2.v2i64.p0i64"
@@ -40033,7 +40033,7 @@ pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i8.v8i8")]
         fn _vst1_s8_x2(ptr: *mut i8, a: int8x8_t, b: int8x8_t);
     }
@@ -40049,7 +40049,7 @@ pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i8.v16i8")]
         fn _vst1q_s8_x2(ptr: *mut i8, a: int8x16_t, b: int8x16_t);
     }
@@ -40065,7 +40065,7 @@ pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i16.v4i16")]
         fn _vst1_s16_x2(ptr: *mut i16, a: int16x4_t, b: int16x4_t);
     }
@@ -40081,7 +40081,7 @@ pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i16.v8i16")]
         fn _vst1q_s16_x2(ptr: *mut i16, a: int16x8_t, b: int16x8_t);
     }
@@ -40097,7 +40097,7 @@ pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i32.v2i32")]
         fn _vst1_s32_x2(ptr: *mut i32, a: int32x2_t, b: int32x2_t);
     }
@@ -40113,7 +40113,7 @@ pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i32.v4i32")]
         fn _vst1q_s32_x2(ptr: *mut i32, a: int32x4_t, b: int32x4_t);
     }
@@ -40129,7 +40129,7 @@ pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i64.v1i64")]
         fn _vst1_s64_x2(ptr: *mut i64, a: int64x1_t, b: int64x1_t);
     }
@@ -40145,7 +40145,7 @@ pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i64.v2i64")]
         fn _vst1q_s64_x2(ptr: *mut i64, a: int64x2_t, b: int64x2_t);
     }
@@ -40161,7 +40161,7 @@ pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x3.v8i8.p0i8"
@@ -40180,7 +40180,7 @@ pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x3.v16i8.p0i8"
@@ -40199,7 +40199,7 @@ pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x3.v4i16.p0i16"
@@ -40218,7 +40218,7 @@ pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x3.v8i16.p0i16"
@@ -40237,7 +40237,7 @@ pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x3.v2i32.p0i32"
@@ -40256,7 +40256,7 @@ pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x3.v4i32.p0i32"
@@ -40275,7 +40275,7 @@ pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x3.v1i64.p0i64"
@@ -40294,7 +40294,7 @@ pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x3.v2i64.p0i64"
@@ -40313,7 +40313,7 @@ pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v8i8")]
         fn _vst1_s8_x3(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t);
     }
@@ -40329,7 +40329,7 @@ pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v16i8")]
         fn _vst1q_s8_x3(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t);
     }
@@ -40345,7 +40345,7 @@ pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v4i16")]
         fn _vst1_s16_x3(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t);
     }
@@ -40361,7 +40361,7 @@ pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v8i16")]
         fn _vst1q_s16_x3(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t);
     }
@@ -40377,7 +40377,7 @@ pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v2i32")]
         fn _vst1_s32_x3(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t);
     }
@@ -40393,7 +40393,7 @@ pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v4i32")]
         fn _vst1q_s32_x3(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t);
     }
@@ -40409,7 +40409,7 @@ pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v1i64")]
         fn _vst1_s64_x3(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t);
     }
@@ -40425,7 +40425,7 @@ pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v2i64")]
         fn _vst1q_s64_x3(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t);
     }
@@ -40441,7 +40441,7 @@ pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x4.v8i8.p0i8"
@@ -40460,7 +40460,7 @@ pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x4.v16i8.p0i8"
@@ -40479,7 +40479,7 @@ pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x4.v4i16.p0i16"
@@ -40498,7 +40498,7 @@ pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x4.v8i16.p0i16"
@@ -40517,7 +40517,7 @@ pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x4.v2i32.p0i32"
@@ -40536,7 +40536,7 @@ pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x4.v4i32.p0i32"
@@ -40555,7 +40555,7 @@ pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x4.v1i64.p0i64"
@@ -40574,7 +40574,7 @@ pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
 pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st1x4.v2i64.p0i64"
@@ -40593,7 +40593,7 @@ pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v8i8")]
         fn _vst1_s8_x4(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t);
     }
@@ -40609,7 +40609,7 @@ pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v16i8")]
         fn _vst1q_s8_x4(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t);
     }
@@ -40625,7 +40625,7 @@ pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v4i16")]
         fn _vst1_s16_x4(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t);
     }
@@ -40641,7 +40641,7 @@ pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v8i16")]
         fn _vst1q_s16_x4(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t);
     }
@@ -40657,7 +40657,7 @@ pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v2i32")]
         fn _vst1_s32_x4(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t);
     }
@@ -40673,7 +40673,7 @@ pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v4i32")]
         fn _vst1q_s32_x4(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t);
     }
@@ -40689,7 +40689,7 @@ pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v1i64")]
         fn _vst1_s64_x4(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t);
     }
@@ -40705,7 +40705,7 @@ pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v2i64")]
         fn _vst1q_s64_x4(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t);
     }
@@ -41574,7 +41574,7 @@ pub unsafe fn vst1q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2.v2f32.p0i8"
@@ -41593,7 +41593,7 @@ pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2.v4f32.p0i8"
@@ -41612,7 +41612,7 @@ pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2.v8i8.p0i8"
@@ -41631,7 +41631,7 @@ pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2.v16i8.p0i8"
@@ -41650,7 +41650,7 @@ pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2.v4i16.p0i8"
@@ -41669,7 +41669,7 @@ pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2.v8i16.p0i8"
@@ -41688,7 +41688,7 @@ pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2.v2i32.p0i8"
@@ -41707,7 +41707,7 @@ pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2.v4i32.p0i8"
@@ -41726,7 +41726,7 @@ pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v2f32")]
         fn _vst2_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, size: i32);
     }
@@ -41742,7 +41742,7 @@ pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4f32")]
         fn _vst2q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, size: i32);
     }
@@ -41758,7 +41758,7 @@ pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v8i8")]
         fn _vst2_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, size: i32);
     }
@@ -41774,7 +41774,7 @@ pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v16i8")]
         fn _vst2q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, size: i32);
     }
@@ -41790,7 +41790,7 @@ pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4i16")]
         fn _vst2_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, size: i32);
     }
@@ -41806,7 +41806,7 @@ pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v8i16")]
         fn _vst2q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, size: i32);
     }
@@ -41822,7 +41822,7 @@ pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v2i32")]
         fn _vst2_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, size: i32);
     }
@@ -41838,7 +41838,7 @@ pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4i32")]
         fn _vst2q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, size: i32);
     }
@@ -41856,7 +41856,7 @@ pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2lane.v2f32.p0i8"
@@ -41877,7 +41877,7 @@ pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2lane.v4f32.p0i8"
@@ -41898,7 +41898,7 @@ pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2lane.v8i8.p0i8"
@@ -41919,7 +41919,7 @@ pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2lane.v4i16.p0i8"
@@ -41940,7 +41940,7 @@ pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2lane.v8i16.p0i8"
@@ -41961,7 +41961,7 @@ pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2lane.v2i32.p0i8"
@@ -41982,7 +41982,7 @@ pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2lane.v4i32.p0i8"
@@ -42003,7 +42003,7 @@ pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v2f32")]
         fn _vst2_lane_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32);
     }
@@ -42021,7 +42021,7 @@ pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4f32")]
         fn _vst2q_lane_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32);
     }
@@ -42039,7 +42039,7 @@ pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v8i8")]
         fn _vst2_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32);
     }
@@ -42057,7 +42057,7 @@ pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4i16")]
         fn _vst2_lane_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32);
     }
@@ -42075,7 +42075,7 @@ pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v8i16")]
         fn _vst2q_lane_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32);
     }
@@ -42093,7 +42093,7 @@ pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v2i32")]
         fn _vst2_lane_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32);
     }
@@ -42111,7 +42111,7 @@ pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4i32")]
         fn _vst2q_lane_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32);
     }
@@ -42350,7 +42350,7 @@ pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v1i64")]
         fn _vst2_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, size: i32);
     }
@@ -42366,7 +42366,7 @@ pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st2.v1i64.p0i8"
@@ -42638,7 +42638,7 @@ pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v2f32")]
         fn _vst3_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, size: i32);
     }
@@ -42654,7 +42654,7 @@ pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4f32")]
         fn _vst3q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, size: i32);
     }
@@ -42670,7 +42670,7 @@ pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v8i8")]
         fn _vst3_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, size: i32);
     }
@@ -42686,7 +42686,7 @@ pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v16i8")]
         fn _vst3q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, size: i32);
     }
@@ -42702,7 +42702,7 @@ pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4i16")]
         fn _vst3_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, size: i32);
     }
@@ -42718,7 +42718,7 @@ pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v8i16")]
         fn _vst3q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, size: i32);
     }
@@ -42734,7 +42734,7 @@ pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v2i32")]
         fn _vst3_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, size: i32);
     }
@@ -42750,7 +42750,7 @@ pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4i32")]
         fn _vst3q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, size: i32);
     }
@@ -42766,7 +42766,7 @@ pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3.v2f32.p0i8"
@@ -42785,7 +42785,7 @@ pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3.v4f32.p0i8"
@@ -42804,7 +42804,7 @@ pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3.v8i8.p0i8"
@@ -42823,7 +42823,7 @@ pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3.v16i8.p0i8"
@@ -42842,7 +42842,7 @@ pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3.v4i16.p0i8"
@@ -42861,7 +42861,7 @@ pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3.v8i16.p0i8"
@@ -42880,7 +42880,7 @@ pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3.v2i32.p0i8"
@@ -42899,7 +42899,7 @@ pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3.v4i32.p0i8"
@@ -42920,7 +42920,7 @@ pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v2f32")]
         fn _vst3_lane_f32(
             ptr: *mut i8,
@@ -42945,7 +42945,7 @@ pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4f32")]
         fn _vst3q_lane_f32(
             ptr: *mut i8,
@@ -42970,7 +42970,7 @@ pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v8i8")]
         fn _vst3_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i32, size: i32);
     }
@@ -42988,7 +42988,7 @@ pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4i16")]
         fn _vst3_lane_s16(
             ptr: *mut i8,
@@ -43013,7 +43013,7 @@ pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v8i16")]
         fn _vst3q_lane_s16(
             ptr: *mut i8,
@@ -43038,7 +43038,7 @@ pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v2i32")]
         fn _vst3_lane_s32(
             ptr: *mut i8,
@@ -43063,7 +43063,7 @@ pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4i32")]
         fn _vst3q_lane_s32(
             ptr: *mut i8,
@@ -43088,7 +43088,7 @@ pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3lane.v2f32.p0i8"
@@ -43109,7 +43109,7 @@ pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3lane.v4f32.p0i8"
@@ -43130,7 +43130,7 @@ pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3lane.v8i8.p0i8"
@@ -43151,7 +43151,7 @@ pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3lane.v4i16.p0i8"
@@ -43172,7 +43172,7 @@ pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3lane.v8i16.p0i8"
@@ -43193,7 +43193,7 @@ pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3lane.v2i32.p0i8"
@@ -43214,7 +43214,7 @@ pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3lane.v4i32.p0i8"
@@ -43456,7 +43456,7 @@ pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st3.v1i64.p0i8"
@@ -43475,7 +43475,7 @@ pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v1i64")]
         fn _vst3_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, c: int64x1_t, size: i32);
     }
@@ -43744,7 +43744,7 @@ pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst4))]
 pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v2f32")]
         fn _vst4_f32(
             ptr: *mut i8,
@@ -43767,7 +43767,7 @@ pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst4))]
 pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4f32")]
         fn _vst4q_f32(
             ptr: *mut i8,
@@ -43790,7 +43790,7 @@ pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst4))]
 pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v8i8")]
         fn _vst4_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, size: i32);
     }
@@ -43806,7 +43806,7 @@ pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst4))]
 pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v16i8")]
         fn _vst4q_s8(
             ptr: *mut i8,
@@ -43829,7 +43829,7 @@ pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst4))]
 pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4i16")]
         fn _vst4_s16(
             ptr: *mut i8,
@@ -43852,7 +43852,7 @@ pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst4))]
 pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v8i16")]
         fn _vst4q_s16(
             ptr: *mut i8,
@@ -43875,7 +43875,7 @@ pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst4))]
 pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v2i32")]
         fn _vst4_s32(
             ptr: *mut i8,
@@ -43898,7 +43898,7 @@ pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst4))]
 pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4i32")]
         fn _vst4q_s32(
             ptr: *mut i8,
@@ -43921,7 +43921,7 @@ pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4.v2f32.p0i8"
@@ -43940,7 +43940,7 @@ pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4.v4f32.p0i8"
@@ -43959,7 +43959,7 @@ pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4.v8i8.p0i8"
@@ -43978,7 +43978,7 @@ pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4.v16i8.p0i8"
@@ -43997,7 +43997,7 @@ pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4.v4i16.p0i8"
@@ -44016,7 +44016,7 @@ pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4.v8i16.p0i8"
@@ -44035,7 +44035,7 @@ pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4.v2i32.p0i8"
@@ -44054,7 +44054,7 @@ pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4.v4i32.p0i8"
@@ -44075,7 +44075,7 @@ pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v2f32")]
         fn _vst4_lane_f32(
             ptr: *mut i8,
@@ -44101,7 +44101,7 @@ pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4f32")]
         fn _vst4q_lane_f32(
             ptr: *mut i8,
@@ -44127,7 +44127,7 @@ pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v8i8")]
         fn _vst4_lane_s8(
             ptr: *mut i8,
@@ -44153,7 +44153,7 @@ pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4i16")]
         fn _vst4_lane_s16(
             ptr: *mut i8,
@@ -44179,7 +44179,7 @@ pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v8i16")]
         fn _vst4q_lane_s16(
             ptr: *mut i8,
@@ -44205,7 +44205,7 @@ pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v2i32")]
         fn _vst4_lane_s32(
             ptr: *mut i8,
@@ -44231,7 +44231,7 @@ pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4i32")]
         fn _vst4q_lane_s32(
             ptr: *mut i8,
@@ -44257,7 +44257,7 @@ pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4lane.v2f32.p0i8"
@@ -44285,7 +44285,7 @@ pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4lane.v4f32.p0i8"
@@ -44313,7 +44313,7 @@ pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4lane.v8i8.p0i8"
@@ -44334,7 +44334,7 @@ pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4lane.v4i16.p0i8"
@@ -44362,7 +44362,7 @@ pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
     static_assert_uimm_bits!(LANE, 3);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4lane.v8i16.p0i8"
@@ -44390,7 +44390,7 @@ pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4lane.v2i32.p0i8"
@@ -44418,7 +44418,7 @@ pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4lane.v4i32.p0i8"
@@ -44667,7 +44667,7 @@ pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v1i64")]
         fn _vst4_s64(
             ptr: *mut i8,
@@ -44690,7 +44690,7 @@ pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.st4.v1i64.p0i8"
@@ -46961,7 +46961,7 @@ pub unsafe fn vusdotq_lane_s32<const LANE: i32>(
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8"
@@ -46992,7 +46992,7 @@ pub unsafe fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8"
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
index 7f95e9ed98d..2a28b71b292 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -1145,7 +1145,7 @@ impl_sign_conversions_neon! {
 }
 
 #[allow(improper_ctypes)]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     // absolute value (64-bit)
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i8")]
     #[cfg_attr(
@@ -9921,7 +9921,7 @@ pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
 )]
 pub unsafe fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -9951,7 +9951,7 @@ pub unsafe fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t
 )]
 pub unsafe fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -9981,7 +9981,7 @@ pub unsafe fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x
 )]
 pub unsafe fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "unadjusted" {
+    unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usmmla.v4i32.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
index 15bef7d7b57..2e56d8fb9b8 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
@@ -9,7 +9,7 @@
 use super::types::*;
 
 #[allow(improper_ctypes)]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.loongarch.lasx.xvsll.b"]
     fn __lasx_xvsll_b(a: v32i8, b: v32i8) -> v32i8;
     #[link_name = "llvm.loongarch.lasx.xvsll.h"]
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
index b00e00f1a40..2bc364f3e06 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
@@ -9,7 +9,7 @@
 use super::types::*;
 
 #[allow(improper_ctypes)]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.loongarch.lsx.vsll.b"]
     fn __lsx_vsll_b(a: v16i8, b: v16i8) -> v16i8;
     #[link_name = "llvm.loongarch.lsx.vsll.h"]
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/mod.rs b/library/stdarch/crates/core_arch/src/loongarch64/mod.rs
index 8d3f70bad27..ed4bcc06f7c 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/mod.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/mod.rs
@@ -41,7 +41,7 @@ pub unsafe fn rdtimeh_w() -> (i32, isize) {
 }
 
 #[allow(improper_ctypes)]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.loongarch.crc.w.b.w"]
     fn __crc_w_b_w(a: i32, b: i32) -> i32;
     #[link_name = "llvm.loongarch.crc.w.h.w"]
diff --git a/library/stdarch/crates/core_arch/src/mips/msa.rs b/library/stdarch/crates/core_arch/src/mips/msa.rs
index 8bee1def401..563e121a7ba 100644
--- a/library/stdarch/crates/core_arch/src/mips/msa.rs
+++ b/library/stdarch/crates/core_arch/src/mips/msa.rs
@@ -45,7 +45,7 @@ types! {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.mips.add.a.b"]
     fn msa_add_a_b(a: v16i8, b: v16i8) -> v16i8;
     #[link_name = "llvm.mips.add.a.h"]
diff --git a/library/stdarch/crates/core_arch/src/nvptx/mod.rs b/library/stdarch/crates/core_arch/src/nvptx/mod.rs
index b971a32bc81..8d16dfb53d4 100644
--- a/library/stdarch/crates/core_arch/src/nvptx/mod.rs
+++ b/library/stdarch/crates/core_arch/src/nvptx/mod.rs
@@ -19,7 +19,7 @@ mod packed;
 pub use packed::*;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.nvvm.barrier0"]
     fn syncthreads() -> ();
     #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
@@ -147,7 +147,7 @@ pub unsafe fn trap() -> ! {
 }
 
 // Basic CUDA syscall declarations.
-extern "C" {
+unsafe extern "C" {
     /// Print formatted output from a kernel to a host-side output stream.
     ///
     /// Syscall arguments:
diff --git a/library/stdarch/crates/core_arch/src/nvptx/packed.rs b/library/stdarch/crates/core_arch/src/nvptx/packed.rs
index 03cfa2eb5ca..ceccd255d79 100644
--- a/library/stdarch/crates/core_arch/src/nvptx/packed.rs
+++ b/library/stdarch/crates/core_arch/src/nvptx/packed.rs
@@ -7,7 +7,7 @@
 use crate::intrinsics::simd::*;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.minnum.v2f16"]
     fn llvm_f16x2_minnum(a: f16x2, b: f16x2) -> f16x2;
     #[link_name = "llvm.minimum.v2f16"]
diff --git a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
index cbe7b26402a..274ec88dde0 100644
--- a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
@@ -48,7 +48,7 @@ types! {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.ppc.altivec.lvx"]
     fn lvx(p: *const i8) -> vector_unsigned_int;
 
@@ -688,7 +688,7 @@ mod sealed {
                 let addr = (b as *const u8).offset(a);
 
                 // Workaround ptr::copy_nonoverlapping not being inlined
-                extern "rust-intrinsic" {
+                unsafe extern "rust-intrinsic" {
                     #[rustc_nounwind]
                     pub fn copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize);
                 }
@@ -743,7 +743,7 @@ mod sealed {
                 let addr = (b as *mut u8).offset(a);
 
                 // Workaround ptr::copy_nonoverlapping not being inlined
-                extern "rust-intrinsic" {
+                unsafe extern "rust-intrinsic" {
                     #[rustc_nounwind]
                     pub fn copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize);
                 }
diff --git a/library/stdarch/crates/core_arch/src/powerpc/vsx.rs b/library/stdarch/crates/core_arch/src/powerpc/vsx.rs
index c107575aeef..ca9fcaabe8b 100644
--- a/library/stdarch/crates/core_arch/src/powerpc/vsx.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/vsx.rs
@@ -35,7 +35,7 @@ types! {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.ppc.altivec.vperm"]
     fn vperm(
         a: vector_signed_int,
diff --git a/library/stdarch/crates/core_arch/src/powerpc64/vsx.rs b/library/stdarch/crates/core_arch/src/powerpc64/vsx.rs
index d74d1b0495d..7b42be8653c 100644
--- a/library/stdarch/crates/core_arch/src/powerpc64/vsx.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc64/vsx.rs
@@ -17,7 +17,7 @@ use stdarch_test::assert_instr;
 use crate::mem::transmute;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.ppc.vsx.lxvl"]
     fn lxvl(a: *const u8, l: usize) -> vector_signed_int;
 
diff --git a/library/stdarch/crates/core_arch/src/riscv32/zk.rs b/library/stdarch/crates/core_arch/src/riscv32/zk.rs
index 15eecc2ea4f..6ea3793ce96 100644
--- a/library/stdarch/crates/core_arch/src/riscv32/zk.rs
+++ b/library/stdarch/crates/core_arch/src/riscv32/zk.rs
@@ -1,7 +1,7 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.riscv.aes32esi"]
     fn _aes32esi(rs1: i32, rs2: i32, bs: i32) -> i32;
 
diff --git a/library/stdarch/crates/core_arch/src/riscv64/zk.rs b/library/stdarch/crates/core_arch/src/riscv64/zk.rs
index f89412fce75..37ee65876f1 100644
--- a/library/stdarch/crates/core_arch/src/riscv64/zk.rs
+++ b/library/stdarch/crates/core_arch/src/riscv64/zk.rs
@@ -1,7 +1,7 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.riscv.aes64es"]
     fn _aes64es(rs1: i64, rs2: i64) -> i64;
 
diff --git a/library/stdarch/crates/core_arch/src/riscv_shared/zb.rs b/library/stdarch/crates/core_arch/src/riscv_shared/zb.rs
index 841b7079894..d7ae05d46dd 100644
--- a/library/stdarch/crates/core_arch/src/riscv_shared/zb.rs
+++ b/library/stdarch/crates/core_arch/src/riscv_shared/zb.rs
@@ -2,7 +2,7 @@
 use stdarch_test::assert_instr;
 
 #[cfg(target_arch = "riscv32")]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.riscv.orc.b.i32"]
     fn _orc_b_32(rs: i32) -> i32;
 
@@ -17,7 +17,7 @@ extern "unadjusted" {
 }
 
 #[cfg(target_arch = "riscv64")]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.riscv.orc.b.i64"]
     fn _orc_b_64(rs1: i64) -> i64;
 
diff --git a/library/stdarch/crates/core_arch/src/riscv_shared/zk.rs b/library/stdarch/crates/core_arch/src/riscv_shared/zk.rs
index 2d0e8602f41..f333c826c5a 100644
--- a/library/stdarch/crates/core_arch/src/riscv_shared/zk.rs
+++ b/library/stdarch/crates/core_arch/src/riscv_shared/zk.rs
@@ -1,7 +1,7 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.riscv.sm4ed"]
     fn _sm4ed(rs1: i32, rs2: i32, bs: i32) -> i32;
 
@@ -28,7 +28,7 @@ extern "unadjusted" {
 }
 
 #[cfg(target_arch = "riscv32")]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.riscv.xperm8.i32"]
     fn _xperm8_32(rs1: i32, rs2: i32) -> i32;
 
@@ -37,7 +37,7 @@ extern "unadjusted" {
 }
 
 #[cfg(target_arch = "riscv64")]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.riscv.xperm8.i64"]
     fn _xperm8_64(rs1: i64, rs2: i64) -> i64;
 
diff --git a/library/stdarch/crates/core_arch/src/s390x/vector.rs b/library/stdarch/crates/core_arch/src/s390x/vector.rs
index 56996cef5ce..5449d047f3c 100644
--- a/library/stdarch/crates/core_arch/src/s390x/vector.rs
+++ b/library/stdarch/crates/core_arch/src/s390x/vector.rs
@@ -53,7 +53,7 @@ types! {
 
 #[allow(improper_ctypes)]
 #[rustfmt::skip]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.smax.v16i8"] fn vmxb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
     #[link_name = "llvm.smax.v8i16"] fn vmxh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short;
     #[link_name = "llvm.smax.v4i32"] fn vmxf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
diff --git a/library/stdarch/crates/core_arch/src/wasm32/atomic.rs b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
index 0d5aba2f124..cdd2302d7f1 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
@@ -1,7 +1,7 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.wasm.memory.atomic.wait32"]
     fn llvm_atomic_wait_i32(ptr: *mut i32, exp: i32, timeout: i64) -> i32;
     #[link_name = "llvm.wasm.memory.atomic.wait64"]
diff --git a/library/stdarch/crates/core_arch/src/wasm32/memory.rs b/library/stdarch/crates/core_arch/src/wasm32/memory.rs
index 882e068152a..348add286ac 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/memory.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/memory.rs
@@ -1,7 +1,7 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.wasm.memory.grow"]
     fn llvm_memory_grow(mem: u32, pages: usize) -> usize;
     #[link_name = "llvm.wasm.memory.size"]
diff --git a/library/stdarch/crates/core_arch/src/wasm32/mod.rs b/library/stdarch/crates/core_arch/src/wasm32/mod.rs
index e6d79ea1eff..f287d6088f6 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/mod.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/mod.rs
@@ -174,7 +174,7 @@ pub fn f64_sqrt(a: f64) -> f64 {
     unsafe { crate::intrinsics::sqrtf64(a) }
 }
 
-extern "C-unwind" {
+unsafe extern "C-unwind" {
     #[link_name = "llvm.wasm.throw"]
     fn wasm_throw(tag: i32, ptr: *mut u8) -> !;
 }
diff --git a/library/stdarch/crates/core_arch/src/wasm32/relaxed_simd.rs b/library/stdarch/crates/core_arch/src/wasm32/relaxed_simd.rs
index e846379ef04..1a15f581b3e 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/relaxed_simd.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/relaxed_simd.rs
@@ -5,7 +5,7 @@ use crate::core_arch::simd;
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.wasm.relaxed.swizzle"]
     fn llvm_relaxed_swizzle(a: simd::i8x16, b: simd::i8x16) -> simd::i8x16;
     #[link_name = "llvm.wasm.relaxed.trunc.signed"]
diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
index a009c6f3b77..c54d9966224 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@@ -73,7 +73,7 @@ conversions! {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.wasm.swizzle"]
     fn llvm_swizzle(a: simd::i8x16, b: simd::i8x16) -> simd::i8x16;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/adx.rs b/library/stdarch/crates/core_arch/src/x86/adx.rs
index 587490ec880..5ba76646165 100644
--- a/library/stdarch/crates/core_arch/src/x86/adx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/adx.rs
@@ -2,7 +2,7 @@
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.x86.addcarry.32"]
     fn llvm_addcarry_u32(a: u8, b: u32, c: u32) -> (u8, u32);
     #[link_name = "llvm.x86.addcarryx.u32"]
diff --git a/library/stdarch/crates/core_arch/src/x86/aes.rs b/library/stdarch/crates/core_arch/src/x86/aes.rs
index 0346c8e05b5..789081cdb5d 100644
--- a/library/stdarch/crates/core_arch/src/x86/aes.rs
+++ b/library/stdarch/crates/core_arch/src/x86/aes.rs
@@ -13,7 +13,7 @@ use crate::core_arch::x86::__m128i;
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.aesni.aesdec"]
     fn aesdec(a: __m128i, round_key: __m128i) -> __m128i;
     #[link_name = "llvm.x86.aesni.aesdeclast"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs
index fd37c6c077d..f78ca6d8391 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx.rs
@@ -2984,7 +2984,7 @@ pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 {
 
 // LLVM intrinsics used in the above functions
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx.round.pd.256"]
     fn roundpd256(a: __m256d, b: i32) -> __m256d;
     #[link_name = "llvm.x86.avx.round.ps.256"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs
index 38aa5548a3e..0bc89ba45c8 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs
@@ -3638,7 +3638,7 @@ pub unsafe fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx2.phadd.w"]
     fn phaddw(a: i16x16, b: i16x16) -> i16x16;
     #[link_name = "llvm.x86.avx2.phadd.d"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs b/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs
index bd0fe2f1e28..6789fb1c315 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs
@@ -10,7 +10,7 @@ use crate::intrinsics::simd::*;
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.128"]
     fn cvtne2ps2bf16(a: f32x4, b: f32x4) -> i16x8;
     #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.256"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
index 0605b2dccac..819a52110da 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
@@ -27,7 +27,7 @@ use crate::mem::transmute;
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
     fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
     #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
index a51e15c8eac..5e79142ae60 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -11255,7 +11255,7 @@ pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a:
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
     fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512cd.rs b/library/stdarch/crates/core_arch/src/x86/avx512cd.rs
index f8e3a37307d..71eceab6bd9 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512cd.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512cd.rs
@@ -491,7 +491,7 @@ pub unsafe fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.conflict.d.512"]
     fn vpconflictd(a: i32x16) -> i32x16;
     #[link_name = "llvm.x86.avx512.conflict.d.256"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512dq.rs b/library/stdarch/crates/core_arch/src/x86/avx512dq.rs
index 757231279e7..9e5ac789afb 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512dq.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512dq.rs
@@ -6738,7 +6738,7 @@ pub unsafe fn _mm_mask_fpclass_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128)
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.sitofp.round.v2f64.v2i64"]
     fn vcvtqq2pd_128(a: i64x2, rounding: i32) -> f64x2;
     #[link_name = "llvm.x86.avx512.sitofp.round.v4f64.v4i64"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index 639dcde3be7..5c45d815f2b 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -40275,7 +40275,7 @@ pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
 pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
     fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
     #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs b/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
index 5ed03975451..b9e269f6527 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
@@ -15990,7 +15990,7 @@ pub unsafe fn _mm_cvtsi16_si128(a: i16) -> __m128i {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
     fn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
     #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs b/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs
index a1cb339b388..12123c2162a 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs
@@ -399,7 +399,7 @@ pub unsafe fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: _
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
     fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
     #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vbmi.rs b/library/stdarch/crates/core_arch/src/x86/avx512vbmi.rs
index 3c16c9c4240..b9bded92d6a 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512vbmi.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512vbmi.rs
@@ -431,7 +431,7 @@ pub unsafe fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m12
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.vpermi2var.qi.512"]
     fn vpermi2b(a: i8x64, idx: i8x64, b: i8x64) -> i8x64;
     #[link_name = "llvm.x86.avx512.vpermi2var.qi.256"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs b/library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs
index a14d0d7081c..f5a9cce3e60 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs
@@ -2053,7 +2053,7 @@ pub unsafe fn _mm_maskz_shrdi_epi16<const IMM8: i32>(
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.mask.compress.store.w.512"]
     fn vcompressstorew(mem: *mut i8, data: i16x32, mask: u32);
     #[link_name = "llvm.x86.avx512.mask.compress.store.w.256"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vnni.rs b/library/stdarch/crates/core_arch/src/x86/avx512vnni.rs
index 8d207d16384..1e1639b700d 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512vnni.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512vnni.rs
@@ -1011,7 +1011,7 @@ pub unsafe fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.vpdpwssd.512"]
     fn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
     #[link_name = "llvm.x86.avx512.vpdpwssd.256"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avxneconvert.rs b/library/stdarch/crates/core_arch/src/x86/avxneconvert.rs
index b31923c1695..4520529934b 100644
--- a/library/stdarch/crates/core_arch/src/x86/avxneconvert.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avxneconvert.rs
@@ -233,7 +233,7 @@ pub unsafe fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.vbcstnebf162ps128"]
     fn bcstnebf162ps_128(a: *const bf16) -> __m128;
     #[link_name = "llvm.x86.vbcstnebf162ps256"]
diff --git a/library/stdarch/crates/core_arch/src/x86/bmi1.rs b/library/stdarch/crates/core_arch/src/x86/bmi1.rs
index 620358c982d..b8eca101de3 100644
--- a/library/stdarch/crates/core_arch/src/x86/bmi1.rs
+++ b/library/stdarch/crates/core_arch/src/x86/bmi1.rs
@@ -124,7 +124,7 @@ pub unsafe fn _mm_tzcnt_32(x: u32) -> i32 {
     x.trailing_zeros() as i32
 }
 
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.bmi.bextr.32"]
     fn x86_bmi_bextr_32(x: u32, y: u32) -> u32;
 }
diff --git a/library/stdarch/crates/core_arch/src/x86/bmi2.rs b/library/stdarch/crates/core_arch/src/x86/bmi2.rs
index efe7199e98a..2f7b31c43b3 100644
--- a/library/stdarch/crates/core_arch/src/x86/bmi2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/bmi2.rs
@@ -66,7 +66,7 @@ pub unsafe fn _pext_u32(a: u32, mask: u32) -> u32 {
     x86_bmi2_pext_32(a, mask)
 }
 
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.bmi.bzhi.32"]
     fn x86_bmi2_bzhi_32(x: u32, y: u32) -> u32;
     #[link_name = "llvm.x86.bmi.pdep.32"]
diff --git a/library/stdarch/crates/core_arch/src/x86/f16c.rs b/library/stdarch/crates/core_arch/src/x86/f16c.rs
index 9feb0f44f99..ff3a5441170 100644
--- a/library/stdarch/crates/core_arch/src/x86/f16c.rs
+++ b/library/stdarch/crates/core_arch/src/x86/f16c.rs
@@ -8,7 +8,7 @@ use crate::core_arch::{simd::*, x86::*};
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.x86.vcvtph2ps.128"]
     fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4;
     #[link_name = "llvm.x86.vcvtph2ps.256"]
diff --git a/library/stdarch/crates/core_arch/src/x86/fxsr.rs b/library/stdarch/crates/core_arch/src/x86/fxsr.rs
index 691aafd6553..71fd52ca149 100644
--- a/library/stdarch/crates/core_arch/src/x86/fxsr.rs
+++ b/library/stdarch/crates/core_arch/src/x86/fxsr.rs
@@ -4,7 +4,7 @@
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.fxsave"]
     fn fxsave(p: *mut u8);
     #[link_name = "llvm.x86.fxrstor"]
diff --git a/library/stdarch/crates/core_arch/src/x86/gfni.rs b/library/stdarch/crates/core_arch/src/x86/gfni.rs
index 206b528f286..42387e3d211 100644
--- a/library/stdarch/crates/core_arch/src/x86/gfni.rs
+++ b/library/stdarch/crates/core_arch/src/x86/gfni.rs
@@ -23,7 +23,7 @@ use crate::mem::transmute;
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.vgf2p8affineinvqb.512"]
     fn vgf2p8affineinvqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64;
     #[link_name = "llvm.x86.vgf2p8affineinvqb.256"]
diff --git a/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs
index cf192eedfe9..e346464fb30 100644
--- a/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs
+++ b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs
@@ -11,7 +11,7 @@ use crate::core_arch::x86::__m128i;
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.pclmulqdq"]
     fn pclmulqdq(a: __m128i, round_key: __m128i, imm8: u8) -> __m128i;
 }
diff --git a/library/stdarch/crates/core_arch/src/x86/rdrand.rs b/library/stdarch/crates/core_arch/src/x86/rdrand.rs
index cfb7fc7969f..50097915213 100644
--- a/library/stdarch/crates/core_arch/src/x86/rdrand.rs
+++ b/library/stdarch/crates/core_arch/src/x86/rdrand.rs
@@ -4,7 +4,7 @@
 #![allow(clippy::module_name_repetitions)]
 
 #[allow(improper_ctypes)]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.x86.rdrand.16"]
     fn x86_rdrand16_step() -> (u16, i32);
     #[link_name = "llvm.x86.rdrand.32"]
diff --git a/library/stdarch/crates/core_arch/src/x86/rdtsc.rs b/library/stdarch/crates/core_arch/src/x86/rdtsc.rs
index c8f3c418ce4..e714aa863b9 100644
--- a/library/stdarch/crates/core_arch/src/x86/rdtsc.rs
+++ b/library/stdarch/crates/core_arch/src/x86/rdtsc.rs
@@ -50,7 +50,7 @@ pub unsafe fn __rdtscp(aux: *mut u32) -> u64 {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.rdtsc"]
     fn rdtsc() -> u64;
     #[link_name = "llvm.x86.rdtscp"]
diff --git a/library/stdarch/crates/core_arch/src/x86/rtm.rs b/library/stdarch/crates/core_arch/src/x86/rtm.rs
index 65a9f0e3cb1..b807305d6aa 100644
--- a/library/stdarch/crates/core_arch/src/x86/rtm.rs
+++ b/library/stdarch/crates/core_arch/src/x86/rtm.rs
@@ -16,7 +16,7 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.xbegin"]
     fn x86_xbegin() -> i32;
     #[link_name = "llvm.x86.xend"]
diff --git a/library/stdarch/crates/core_arch/src/x86/sha.rs b/library/stdarch/crates/core_arch/src/x86/sha.rs
index 144677818ab..13aee75c002 100644
--- a/library/stdarch/crates/core_arch/src/x86/sha.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sha.rs
@@ -1,7 +1,7 @@
 use crate::core_arch::{simd::*, x86::*};
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.sha1msg1"]
     fn sha1msg1(a: i32x4, b: i32x4) -> i32x4;
     #[link_name = "llvm.x86.sha1msg2"]
diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index ee03628cba9..6753caa21bb 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -1907,7 +1907,7 @@ pub unsafe fn _MM_TRANSPOSE4_PS(
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.sse.rcp.ss"]
     fn rcpss(a: __m128) -> __m128;
     #[link_name = "llvm.x86.sse.rcp.ps"]
diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs
index d63ac8e7e11..2f5d234c74b 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@@ -2945,7 +2945,7 @@ pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.sse2.pause"]
     fn pause();
     #[link_name = "llvm.x86.sse2.clflush"]
diff --git a/library/stdarch/crates/core_arch/src/x86/sse3.rs b/library/stdarch/crates/core_arch/src/x86/sse3.rs
index 94553844e11..35960441fd8 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse3.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse3.rs
@@ -148,7 +148,7 @@ pub unsafe fn _mm_moveldup_ps(a: __m128) -> __m128 {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.sse3.hadd.pd"]
     fn haddpd(a: __m128d, b: __m128d) -> __m128d;
     #[link_name = "llvm.x86.sse3.hadd.ps"]
diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs
index 17d3e719ba2..a4c7b587dc2 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse41.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs
@@ -1087,7 +1087,7 @@ pub unsafe fn _mm_stream_load_si128(mem_addr: *const __m128i) -> __m128i {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.sse41.insertps"]
     fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
     #[link_name = "llvm.x86.sse41.packusdw"]
diff --git a/library/stdarch/crates/core_arch/src/x86/sse42.rs b/library/stdarch/crates/core_arch/src/x86/sse42.rs
index 36350a1d93c..206e9014b03 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse42.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse42.rs
@@ -568,7 +568,7 @@ pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     // SSE 4.2 string and text comparison ops
     #[link_name = "llvm.x86.sse42.pcmpestrm128"]
     fn pcmpestrm128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> u8x16;
diff --git a/library/stdarch/crates/core_arch/src/x86/sse4a.rs b/library/stdarch/crates/core_arch/src/x86/sse4a.rs
index 61989035c2d..16642a0df0f 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse4a.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse4a.rs
@@ -6,7 +6,7 @@ use crate::core_arch::{simd::*, x86::*};
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.sse4a.extrq"]
     fn extrq(x: i64x2, y: i8x16) -> i64x2;
     #[link_name = "llvm.x86.sse4a.extrqi"]
diff --git a/library/stdarch/crates/core_arch/src/x86/ssse3.rs b/library/stdarch/crates/core_arch/src/x86/ssse3.rs
index 5a35d5cb3cc..ce8d749c806 100644
--- a/library/stdarch/crates/core_arch/src/x86/ssse3.rs
+++ b/library/stdarch/crates/core_arch/src/x86/ssse3.rs
@@ -293,7 +293,7 @@ pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
     fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/tbm.rs b/library/stdarch/crates/core_arch/src/x86/tbm.rs
index ecd788b5b04..a245e693284 100644
--- a/library/stdarch/crates/core_arch/src/x86/tbm.rs
+++ b/library/stdarch/crates/core_arch/src/x86/tbm.rs
@@ -13,7 +13,7 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.tbm.bextri.u32"]
     fn bextri_u32(a: u32, control: u32) -> u32;
 }
diff --git a/library/stdarch/crates/core_arch/src/x86/vaes.rs b/library/stdarch/crates/core_arch/src/x86/vaes.rs
index 2c3cead874f..0a7abbea8a3 100644
--- a/library/stdarch/crates/core_arch/src/x86/vaes.rs
+++ b/library/stdarch/crates/core_arch/src/x86/vaes.rs
@@ -14,7 +14,7 @@ use crate::core_arch::x86::__m512i;
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.aesni.aesenc.256"]
     fn aesenc_256(a: __m256i, round_key: __m256i) -> __m256i;
     #[link_name = "llvm.x86.aesni.aesenclast.256"]
diff --git a/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs
index 37bbd502e54..2afc4a0bd78 100644
--- a/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs
+++ b/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs
@@ -12,7 +12,7 @@ use crate::core_arch::x86::__m512i;
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.pclmulqdq.256"]
     fn pclmulqdq_256(a: __m256i, round_key: __m256i, imm8: u8) -> __m256i;
     #[link_name = "llvm.x86.pclmulqdq.512"]
diff --git a/library/stdarch/crates/core_arch/src/x86/xsave.rs b/library/stdarch/crates/core_arch/src/x86/xsave.rs
index a05fd05b9fd..b5dc73c0681 100644
--- a/library/stdarch/crates/core_arch/src/x86/xsave.rs
+++ b/library/stdarch/crates/core_arch/src/x86/xsave.rs
@@ -5,7 +5,7 @@
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.xsave"]
     fn xsave(p: *mut u8, hi: u32, lo: u32);
     #[link_name = "llvm.x86.xrstor"]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/adx.rs b/library/stdarch/crates/core_arch/src/x86_64/adx.rs
index f1d58531258..bdc534b5a52 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/adx.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/adx.rs
@@ -2,7 +2,7 @@
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.x86.addcarry.64"]
     fn llvm_addcarry_u64(a: u8, b: u64, c: u64) -> (u8, u64);
     #[link_name = "llvm.x86.addcarryx.u64"]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/amx.rs b/library/stdarch/crates/core_arch/src/x86_64/amx.rs
index 69c62e88011..7c437e9704f 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/amx.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/amx.rs
@@ -252,7 +252,7 @@ pub unsafe fn _tile_cmmrlfp16ps<const DST: i32, const A: i32, const B: i32>() {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.ldtilecfg"]
     fn ldtilecfg(mem_addr: *const u8);
     #[link_name = "llvm.x86.sttilecfg"]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
index 8a8a82b7354..c1c79585b09 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
@@ -479,7 +479,7 @@ pub unsafe fn _mm_cvtt_roundss_u64<const SAE: i32>(a: __m128) -> u64 {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.vcvtss2si64"]
     fn vcvtss2si64(a: f32x4, rounding: i32) -> i64;
     #[link_name = "llvm.x86.avx512.vcvtss2usi64"]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512fp16.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512fp16.rs
index aca3b015561..dbf88ab57f1 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/avx512fp16.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx512fp16.rs
@@ -199,7 +199,7 @@ pub unsafe fn _mm_cvtt_roundsh_u64<const SAE: i32>(a: __m128h) -> u64 {
 }
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.avx512fp16.vcvtsi642sh"]
     fn vcvtsi642sh(a: __m128h, b: i64, rounding: i32) -> __m128h;
     #[link_name = "llvm.x86.avx512fp16.vcvtusi642sh"]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
index 3345b361c7e..167ad26bfce 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
@@ -116,7 +116,7 @@ pub unsafe fn _mm_tzcnt_64(x: u64) -> i64 {
     x.trailing_zeros() as i64
 }
 
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.bmi.bextr.64"]
     fn x86_bmi_bextr_64(x: u64, y: u64) -> u64;
 }
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
index efc252d8f46..5ca4c6bde4a 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
@@ -68,7 +68,7 @@ pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 {
     x86_bmi2_pext_64(a, mask)
 }
 
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.bmi.bzhi.64"]
     fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64;
     #[link_name = "llvm.x86.bmi.pdep.64"]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
index 65ebc2da8e2..a24b44fb1f7 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
@@ -4,7 +4,7 @@
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.fxsave64"]
     fn fxsave64(p: *mut u8);
     #[link_name = "llvm.x86.fxrstor64"]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
index c5bb929759c..42e907b4e47 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
@@ -5,7 +5,7 @@
 #![allow(clippy::module_name_repetitions)]
 
 #[allow(improper_ctypes)]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
     #[link_name = "llvm.x86.rdrand.64"]
     fn x86_rdrand64_step() -> (u64, i32);
     #[link_name = "llvm.x86.rdseed.64"]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse.rs b/library/stdarch/crates/core_arch/src/x86_64/sse.rs
index 75d0a7b46ae..c5f70ccb394 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse.rs
@@ -6,7 +6,7 @@ use crate::core_arch::x86::*;
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.sse.cvtss2si64"]
     fn cvtss2si64(a: __m128) -> i64;
     #[link_name = "llvm.x86.sse.cvttss2si64"]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse2.rs b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs
index 5c814b09411..b1cd12506a2 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs
@@ -6,7 +6,7 @@ use crate::core_arch::x86::*;
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.sse2.cvtsd2si64"]
     fn cvtsd2si64(a: __m128d) -> i64;
     #[link_name = "llvm.x86.sse2.cvttsd2si64"]
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse42.rs b/library/stdarch/crates/core_arch/src/x86_64/sse42.rs
index 164def43362..6c3111b7198 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/sse42.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse42.rs
@@ -4,7 +4,7 @@
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.sse42.crc32.64.64"]
     fn crc32_64_64(crc: u64, v: u64) -> u64;
 }
diff --git a/library/stdarch/crates/core_arch/src/x86_64/tbm.rs b/library/stdarch/crates/core_arch/src/x86_64/tbm.rs
index 119f637ebd1..002e0059160 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/tbm.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/tbm.rs
@@ -13,7 +13,7 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.tbm.bextri.u64"]
     fn bextri_u64(a: u64, control: u64) -> u64;
 }
diff --git a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
index 91770691001..d8e72969ed1 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
@@ -6,7 +6,7 @@
 use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
-extern "C" {
+unsafe extern "C" {
     #[link_name = "llvm.x86.xsave64"]
     fn xsave64(p: *mut u8, hi: u32, lo: u32);
     #[link_name = "llvm.x86.xrstor64"]
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index cee07b0ec7e..3056c28a806 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -140,7 +140,7 @@ cfg_if::cfg_if! {
                 if #[cfg(windows)] {
                     use alloc::vec;
                     #[link(name = "kernel32")]
-                    extern "system" {
+                    unsafe extern "system" {
                         fn GetEnvironmentVariableA(name: *const u8, buffer: *mut u8, size: u32) -> u32;
                     }
                     let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::<u8>(), core::ptr::null_mut(), 0) };
diff --git a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
index faded671cc0..ec31701aecf 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
@@ -20,7 +20,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
     const PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE: u32 = 44;
     const PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE: u32 = 45;
 
-    extern "system" {
+    unsafe extern "system" {
         pub fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL;
     }
 
diff --git a/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs b/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs
index c3f2ad7b910..60470dd77e2 100644
--- a/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs
@@ -663,7 +663,7 @@ impl ToTokens for LLVMLink {
         let signature = self.signature.as_ref().unwrap();
         let links = self.links.as_ref().unwrap();
         tokens.append_all(quote! {
-            extern "unadjusted" {
+            unsafe extern "unadjusted" {
                 #(#links)*
                 #signature;
             }
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs b/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs
index d0b9261347c..160926293bf 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs
+++ b/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs
@@ -163,7 +163,7 @@ use super::types::*;
     out.push_str(
         r#"
 #[allow(improper_ctypes)]
-extern "unadjusted" {
+unsafe extern "unadjusted" {
 "#,
     );
 
-- 
cgit 1.4.1-3-g733a5


From 844a604bf05d5d2aefb87a0696807e33e770cc67 Mon Sep 17 00:00:00 2001
From: Eric Huss <eric@huss.org>
Date: Sun, 9 Feb 2025 10:13:17 -0800
Subject: Allow unsafe_op_in_unsafe_fn

Because stdarch has a really large number of unsafe functions with
single-line calls, `unsafe_op_in_unsafe_fn` would end up adding a lot of
noise, so for now we will allow it to migrate to 2024.
---
 library/stdarch/crates/core_arch/src/lib.rs  | 1 +
 library/stdarch/crates/std_detect/src/lib.rs | 1 +
 library/stdarch/examples/connect5.rs         | 1 +
 library/stdarch/examples/hex.rs              | 1 +
 library/stdarch/examples/wasm.rs             | 1 +
 5 files changed, 5 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index acec8d3f767..b85200a73a4 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -3,6 +3,7 @@
 #![allow(dead_code)]
 #![allow(unused_features)]
 #![allow(internal_features)]
+#![allow(unsafe_op_in_unsafe_fn)]
 #![deny(rust_2018_idioms)]
 #![feature(
     custom_inner_attributes,
diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index ab1b77bad5b..7a074529921 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -18,6 +18,7 @@
 #![feature(staged_api, doc_cfg, allow_internal_unstable)]
 #![deny(rust_2018_idioms)]
 #![allow(clippy::shadow_reuse)]
+#![allow(unsafe_op_in_unsafe_fn)]
 #![cfg_attr(test, allow(unused_imports))]
 #![no_std]
 #![allow(internal_features)]
diff --git a/library/stdarch/examples/connect5.rs b/library/stdarch/examples/connect5.rs
index 6f33c5a1409..88bb18878cc 100644
--- a/library/stdarch/examples/connect5.rs
+++ b/library/stdarch/examples/connect5.rs
@@ -29,6 +29,7 @@
 //! each move.
 
 #![allow(internal_features)]
+#![allow(unsafe_op_in_unsafe_fn)]
 #![feature(avx512_target_feature)]
 #![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512, stdarch_internal))]
 #![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512, stdarch_internal))]
diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs
index add2bc47f6f..be42e2e41c9 100644
--- a/library/stdarch/examples/hex.rs
+++ b/library/stdarch/examples/hex.rs
@@ -29,6 +29,7 @@
     clippy::cast_sign_loss,
     clippy::missing_docs_in_private_items
 )]
+#![allow(unsafe_op_in_unsafe_fn)]
 
 use std::{
     io::{self, Read},
diff --git a/library/stdarch/examples/wasm.rs b/library/stdarch/examples/wasm.rs
index 7d75fefea2c..8ad38f3a031 100644
--- a/library/stdarch/examples/wasm.rs
+++ b/library/stdarch/examples/wasm.rs
@@ -1,6 +1,7 @@
 //! A simple slab allocator for pages in wasm
 
 #![cfg(target_arch = "wasm32")]
+#![allow(unsafe_op_in_unsafe_fn)]
 
 use std::ptr;
 
-- 
cgit 1.4.1-3-g733a5


From d9ec0157da005ace756c7a92a81d32adfb58e273 Mon Sep 17 00:00:00 2001
From: Eric Huss <eric@huss.org>
Date: Sun, 9 Feb 2025 10:19:55 -0800
Subject: Format with style edition 2024

---
 .../stdarch/crates/assert-instr-macro/src/lib.rs   |   2 +-
 .../crates/core_arch/src/aarch64/neon/mod.rs       | 258 ++--------
 library/stdarch/crates/core_arch/src/arm/dsp.rs    |   2 +-
 library/stdarch/crates/core_arch/src/arm/simd32.rs |   2 +-
 .../crates/core_arch/src/arm_shared/crypto.rs      |   6 +-
 .../crates/core_arch/src/arm_shared/neon/mod.rs    | 560 +++------------------
 library/stdarch/crates/core_arch/src/macros.rs     |  12 +-
 library/stdarch/crates/core_arch/src/mod.rs        |   2 +-
 .../crates/core_arch/src/powerpc/altivec.rs        |   8 +-
 .../stdarch/crates/core_arch/src/wasm32/simd128.rs |   4 +-
 library/stdarch/crates/core_arch/src/x86/avx2.rs   |   4 +-
 .../crates/core_arch/src/x86/avx512bitalg.rs       |   8 +-
 .../stdarch/crates/core_arch/src/x86/avx512bw.rs   |   4 +-
 .../stdarch/crates/core_arch/src/x86/avx512dq.rs   |   2 +-
 .../stdarch/crates/core_arch/src/x86/avx512f.rs    |   8 +-
 .../stdarch/crates/core_arch/src/x86/avx512fp16.rs |  22 +-
 .../crates/core_arch/src/x86/avx512vpopcntdq.rs    |   2 +-
 library/stdarch/crates/core_arch/src/x86/sse2.rs   |   6 +-
 library/stdarch/crates/core_arch/src/x86/test.rs   |   2 +-
 library/stdarch/crates/core_arch/src/x86_64/amx.rs |   2 +-
 .../stdarch/crates/intrinsic-test/src/argument.rs  |   2 +-
 library/stdarch/crates/intrinsic-test/src/types.rs |   2 +-
 .../crates/std_detect/src/detect/os/aarch64.rs     |   2 +-
 .../std_detect/src/detect/os/darwin/aarch64.rs     |   2 +-
 .../crates/std_detect/src/detect/os/freebsd/arm.rs |   2 +-
 .../std_detect/src/detect/os/freebsd/powerpc.rs    |   2 +-
 .../std_detect/src/detect/os/linux/aarch64.rs      |   2 +-
 .../crates/std_detect/src/detect/os/linux/arm.rs   |   2 +-
 .../std_detect/src/detect/os/linux/loongarch.rs    |   2 +-
 .../crates/std_detect/src/detect/os/linux/mips.rs  |   2 +-
 .../std_detect/src/detect/os/linux/powerpc.rs      |   2 +-
 .../crates/std_detect/src/detect/os/linux/riscv.rs |   2 +-
 .../crates/std_detect/src/detect/os/linux/s390x.rs |   2 +-
 .../std_detect/src/detect/os/windows/aarch64.rs    |   2 +-
 .../stdarch/crates/std_detect/src/detect/os/x86.rs |   2 +-
 .../crates/stdarch-gen-arm/src/assert_instr.rs     |   4 +-
 .../stdarch/crates/stdarch-gen-arm/src/context.rs  |   4 +-
 .../crates/stdarch-gen-arm/src/expression.rs       |   2 +-
 .../stdarch/crates/stdarch-gen-arm/src/input.rs    |   4 +-
 .../crates/stdarch-gen-arm/src/intrinsic.rs        |  24 +-
 .../crates/stdarch-gen-arm/src/load_store_tests.rs |   5 +-
 .../crates/stdarch-gen-arm/src/typekinds.rs        |   2 +-
 .../crates/stdarch-gen-arm/src/wildstring.rs       |   2 +-
 .../crates/stdarch-gen-loongarch/src/main.rs       |  32 +-
 library/stdarch/examples/connect5.rs               |   6 +-
 45 files changed, 228 insertions(+), 803 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/assert-instr-macro/src/lib.rs b/library/stdarch/crates/assert-instr-macro/src/lib.rs
index 43dce850cfe..acc764e7aa8 100644
--- a/library/stdarch/crates/assert-instr-macro/src/lib.rs
+++ b/library/stdarch/crates/assert-instr-macro/src/lib.rs
@@ -202,7 +202,7 @@ struct Invoc {
 
 impl syn::parse::Parse for Invoc {
     fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result<Self> {
-        use syn::{ext::IdentExt, Token};
+        use syn::{Token, ext::IdentExt};
 
         let mut instr = String::new();
         while !input.is_empty() {
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index a238bae6a1b..1b82a90719c 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -300,7 +300,7 @@ unsafe extern "unadjusted" {
 
     #[link_name = "llvm.aarch64.neon.tbl4.v8i8"]
     fn vqtbl4(a0: int8x16_t, a1: int8x16_t, a2: int8x16_t, a3: int8x16_t, b: uint8x8_t)
-        -> int8x8_t;
+    -> int8x8_t;
     #[link_name = "llvm.aarch64.neon.tbl4.v16i8"]
     fn vqtbl4q(
         a0: int8x16_t,
@@ -4448,26 +4448,14 @@ mod tests {
     unsafe fn test_vceq_u64() {
         test_cmp_u64(
             |i, j| vceq_u64(i, j),
-            |a: u64, b: u64| -> u64 {
-                if a == b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u64, b: u64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceqq_u64() {
         testq_cmp_u64(
             |i, j| vceqq_u64(i, j),
-            |a: u64, b: u64| -> u64 {
-                if a == b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u64, b: u64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4475,26 +4463,14 @@ mod tests {
     unsafe fn test_vceq_s64() {
         test_cmp_s64(
             |i, j| vceq_s64(i, j),
-            |a: i64, b: i64| -> u64 {
-                if a == b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i64, b: i64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceqq_s64() {
         testq_cmp_s64(
             |i, j| vceqq_s64(i, j),
-            |a: i64, b: i64| -> u64 {
-                if a == b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i64, b: i64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4502,26 +4478,14 @@ mod tests {
     unsafe fn test_vceq_p64() {
         test_cmp_p64(
             |i, j| vceq_p64(i, j),
-            |a: u64, b: u64| -> u64 {
-                if a == b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u64, b: u64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceqq_p64() {
         testq_cmp_p64(
             |i, j| vceqq_p64(i, j),
-            |a: u64, b: u64| -> u64 {
-                if a == b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u64, b: u64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4529,26 +4493,14 @@ mod tests {
     unsafe fn test_vceq_f64() {
         test_cmp_f64(
             |i, j| vceq_f64(i, j),
-            |a: f64, b: f64| -> u64 {
-                if a == b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f64, b: f64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceqq_f64() {
         testq_cmp_f64(
             |i, j| vceqq_f64(i, j),
-            |a: f64, b: f64| -> u64 {
-                if a == b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f64, b: f64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4556,26 +4508,14 @@ mod tests {
     unsafe fn test_vcgt_s64() {
         test_cmp_s64(
             |i, j| vcgt_s64(i, j),
-            |a: i64, b: i64| -> u64 {
-                if a > b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i64, b: i64| -> u64 { if a > b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgtq_s64() {
         testq_cmp_s64(
             |i, j| vcgtq_s64(i, j),
-            |a: i64, b: i64| -> u64 {
-                if a > b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i64, b: i64| -> u64 { if a > b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4583,26 +4523,14 @@ mod tests {
     unsafe fn test_vcgt_u64() {
         test_cmp_u64(
             |i, j| vcgt_u64(i, j),
-            |a: u64, b: u64| -> u64 {
-                if a > b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u64, b: u64| -> u64 { if a > b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgtq_u64() {
         testq_cmp_u64(
             |i, j| vcgtq_u64(i, j),
-            |a: u64, b: u64| -> u64 {
-                if a > b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u64, b: u64| -> u64 { if a > b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4610,26 +4538,14 @@ mod tests {
     unsafe fn test_vcgt_f64() {
         test_cmp_f64(
             |i, j| vcgt_f64(i, j),
-            |a: f64, b: f64| -> u64 {
-                if a > b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f64, b: f64| -> u64 { if a > b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgtq_f64() {
         testq_cmp_f64(
             |i, j| vcgtq_f64(i, j),
-            |a: f64, b: f64| -> u64 {
-                if a > b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f64, b: f64| -> u64 { if a > b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4637,26 +4553,14 @@ mod tests {
     unsafe fn test_vclt_s64() {
         test_cmp_s64(
             |i, j| vclt_s64(i, j),
-            |a: i64, b: i64| -> u64 {
-                if a < b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i64, b: i64| -> u64 { if a < b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcltq_s64() {
         testq_cmp_s64(
             |i, j| vcltq_s64(i, j),
-            |a: i64, b: i64| -> u64 {
-                if a < b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i64, b: i64| -> u64 { if a < b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4664,26 +4568,14 @@ mod tests {
     unsafe fn test_vclt_u64() {
         test_cmp_u64(
             |i, j| vclt_u64(i, j),
-            |a: u64, b: u64| -> u64 {
-                if a < b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u64, b: u64| -> u64 { if a < b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcltq_u64() {
         testq_cmp_u64(
             |i, j| vcltq_u64(i, j),
-            |a: u64, b: u64| -> u64 {
-                if a < b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u64, b: u64| -> u64 { if a < b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4691,26 +4583,14 @@ mod tests {
     unsafe fn test_vltq_f64() {
         test_cmp_f64(
             |i, j| vclt_f64(i, j),
-            |a: f64, b: f64| -> u64 {
-                if a < b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f64, b: f64| -> u64 { if a < b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcltq_f64() {
         testq_cmp_f64(
             |i, j| vcltq_f64(i, j),
-            |a: f64, b: f64| -> u64 {
-                if a < b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f64, b: f64| -> u64 { if a < b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4718,26 +4598,14 @@ mod tests {
     unsafe fn test_vcle_s64() {
         test_cmp_s64(
             |i, j| vcle_s64(i, j),
-            |a: i64, b: i64| -> u64 {
-                if a <= b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i64, b: i64| -> u64 { if a <= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcleq_s64() {
         testq_cmp_s64(
             |i, j| vcleq_s64(i, j),
-            |a: i64, b: i64| -> u64 {
-                if a <= b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i64, b: i64| -> u64 { if a <= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4745,26 +4613,14 @@ mod tests {
     unsafe fn test_vcle_u64() {
         test_cmp_u64(
             |i, j| vcle_u64(i, j),
-            |a: u64, b: u64| -> u64 {
-                if a <= b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u64, b: u64| -> u64 { if a <= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcleq_u64() {
         testq_cmp_u64(
             |i, j| vcleq_u64(i, j),
-            |a: u64, b: u64| -> u64 {
-                if a <= b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u64, b: u64| -> u64 { if a <= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4772,26 +4628,14 @@ mod tests {
     unsafe fn test_vleq_f64() {
         test_cmp_f64(
             |i, j| vcle_f64(i, j),
-            |a: f64, b: f64| -> u64 {
-                if a <= b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f64, b: f64| -> u64 { if a <= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcleq_f64() {
         testq_cmp_f64(
             |i, j| vcleq_f64(i, j),
-            |a: f64, b: f64| -> u64 {
-                if a <= b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f64, b: f64| -> u64 { if a <= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4799,26 +4643,14 @@ mod tests {
     unsafe fn test_vcge_s64() {
         test_cmp_s64(
             |i, j| vcge_s64(i, j),
-            |a: i64, b: i64| -> u64 {
-                if a >= b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i64, b: i64| -> u64 { if a >= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgeq_s64() {
         testq_cmp_s64(
             |i, j| vcgeq_s64(i, j),
-            |a: i64, b: i64| -> u64 {
-                if a >= b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i64, b: i64| -> u64 { if a >= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4826,26 +4658,14 @@ mod tests {
     unsafe fn test_vcge_u64() {
         test_cmp_u64(
             |i, j| vcge_u64(i, j),
-            |a: u64, b: u64| -> u64 {
-                if a >= b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u64, b: u64| -> u64 { if a >= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgeq_u64() {
         testq_cmp_u64(
             |i, j| vcgeq_u64(i, j),
-            |a: u64, b: u64| -> u64 {
-                if a >= b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u64, b: u64| -> u64 { if a >= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
@@ -4853,26 +4673,14 @@ mod tests {
     unsafe fn test_vgeq_f64() {
         test_cmp_f64(
             |i, j| vcge_f64(i, j),
-            |a: f64, b: f64| -> u64 {
-                if a >= b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f64, b: f64| -> u64 { if a >= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgeq_f64() {
         testq_cmp_f64(
             |i, j| vcgeq_f64(i, j),
-            |a: f64, b: f64| -> u64 {
-                if a >= b {
-                    0xFFFFFFFFFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f64, b: f64| -> u64 { if a >= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
         );
     }
 
diff --git a/library/stdarch/crates/core_arch/src/arm/dsp.rs b/library/stdarch/crates/core_arch/src/arm/dsp.rs
index 9c03b388843..22517e5929a 100644
--- a/library/stdarch/crates/core_arch/src/arm/dsp.rs
+++ b/library/stdarch/crates/core_arch/src/arm/dsp.rs
@@ -244,7 +244,7 @@ pub unsafe fn __smlawt(a: i32, b: i32, c: i32) -> i32 {
 mod tests {
     use crate::core_arch::{
         arm::*,
-        simd::{i16x2, i8x4, u8x4},
+        simd::{i8x4, i16x2, u8x4},
     };
     use std::mem::transmute;
     use stdarch_test::simd_test;
diff --git a/library/stdarch/crates/core_arch/src/arm/simd32.rs b/library/stdarch/crates/core_arch/src/arm/simd32.rs
index 0d7c2328e22..2a9908ab2b9 100644
--- a/library/stdarch/crates/core_arch/src/arm/simd32.rs
+++ b/library/stdarch/crates/core_arch/src/arm/simd32.rs
@@ -500,7 +500,7 @@ pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 {
 
 #[cfg(test)]
 mod tests {
-    use crate::core_arch::simd::{i16x2, i8x4, u8x4};
+    use crate::core_arch::simd::{i8x4, i16x2, u8x4};
     use std::mem::transmute;
     use stdarch_test::simd_test;
 
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs b/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs
index 2dcd6fdaf03..07c96008d92 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/crypto.rs
@@ -1,4 +1,4 @@
-use crate::core_arch::arm_shared::{uint32x4_t, uint8x16_t};
+use crate::core_arch::arm_shared::{uint8x16_t, uint32x4_t};
 
 #[allow(improper_ctypes)]
 unsafe extern "unadjusted" {
@@ -418,7 +418,9 @@ mod tests {
         let r: u8x16 = mem::transmute(vaesimcq_u8(data));
         assert_eq!(
             r,
-            u8x16::new(43, 60, 33, 50, 103, 80, 125, 70, 43, 60, 33, 50, 103, 80, 125, 70)
+            u8x16::new(
+                43, 60, 33, 50, 103, 80, 125, 70, 43, 60, 33, 50, 103, 80, 125, 70
+            )
         );
     }
 
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
index 2a28b71b292..b7854051aed 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -13818,78 +13818,42 @@ mod tests {
     unsafe fn test_vceq_s8() {
         test_cmp_s8(
             |i, j| vceq_s8(i, j),
-            |a: i8, b: i8| -> u8 {
-                if a == b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: i8, b: i8| -> u8 { if a == b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceqq_s8() {
         testq_cmp_s8(
             |i, j| vceqq_s8(i, j),
-            |a: i8, b: i8| -> u8 {
-                if a == b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: i8, b: i8| -> u8 { if a == b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceq_s16() {
         test_cmp_s16(
             |i, j| vceq_s16(i, j),
-            |a: i16, b: i16| -> u16 {
-                if a == b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i16, b: i16| -> u16 { if a == b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceqq_s16() {
         testq_cmp_s16(
             |i, j| vceqq_s16(i, j),
-            |a: i16, b: i16| -> u16 {
-                if a == b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i16, b: i16| -> u16 { if a == b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceq_s32() {
         test_cmp_s32(
             |i, j| vceq_s32(i, j),
-            |a: i32, b: i32| -> u32 {
-                if a == b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i32, b: i32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceqq_s32() {
         testq_cmp_s32(
             |i, j| vceqq_s32(i, j),
-            |a: i32, b: i32| -> u32 {
-                if a == b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i32, b: i32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -13897,78 +13861,42 @@ mod tests {
     unsafe fn test_vceq_u8() {
         test_cmp_u8(
             |i, j| vceq_u8(i, j),
-            |a: u8, b: u8| -> u8 {
-                if a == b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: u8, b: u8| -> u8 { if a == b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceqq_u8() {
         testq_cmp_u8(
             |i, j| vceqq_u8(i, j),
-            |a: u8, b: u8| -> u8 {
-                if a == b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: u8, b: u8| -> u8 { if a == b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceq_u16() {
         test_cmp_u16(
             |i, j| vceq_u16(i, j),
-            |a: u16, b: u16| -> u16 {
-                if a == b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u16, b: u16| -> u16 { if a == b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceqq_u16() {
         testq_cmp_u16(
             |i, j| vceqq_u16(i, j),
-            |a: u16, b: u16| -> u16 {
-                if a == b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u16, b: u16| -> u16 { if a == b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceq_u32() {
         test_cmp_u32(
             |i, j| vceq_u32(i, j),
-            |a: u32, b: u32| -> u32 {
-                if a == b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u32, b: u32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceqq_u32() {
         testq_cmp_u32(
             |i, j| vceqq_u32(i, j),
-            |a: u32, b: u32| -> u32 {
-                if a == b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u32, b: u32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -13976,26 +13904,14 @@ mod tests {
     unsafe fn test_vceq_f32() {
         test_cmp_f32(
             |i, j| vcge_f32(i, j),
-            |a: f32, b: f32| -> u32 {
-                if a == b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f32, b: f32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vceqq_f32() {
         testq_cmp_f32(
             |i, j| vcgeq_f32(i, j),
-            |a: f32, b: f32| -> u32 {
-                if a == b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f32, b: f32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -14003,78 +13919,42 @@ mod tests {
     unsafe fn test_vcgt_s8() {
         test_cmp_s8(
             |i, j| vcgt_s8(i, j),
-            |a: i8, b: i8| -> u8 {
-                if a > b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: i8, b: i8| -> u8 { if a > b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgtq_s8() {
         testq_cmp_s8(
             |i, j| vcgtq_s8(i, j),
-            |a: i8, b: i8| -> u8 {
-                if a > b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: i8, b: i8| -> u8 { if a > b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgt_s16() {
         test_cmp_s16(
             |i, j| vcgt_s16(i, j),
-            |a: i16, b: i16| -> u16 {
-                if a > b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i16, b: i16| -> u16 { if a > b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgtq_s16() {
         testq_cmp_s16(
             |i, j| vcgtq_s16(i, j),
-            |a: i16, b: i16| -> u16 {
-                if a > b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i16, b: i16| -> u16 { if a > b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgt_s32() {
         test_cmp_s32(
             |i, j| vcgt_s32(i, j),
-            |a: i32, b: i32| -> u32 {
-                if a > b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i32, b: i32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgtq_s32() {
         testq_cmp_s32(
             |i, j| vcgtq_s32(i, j),
-            |a: i32, b: i32| -> u32 {
-                if a > b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i32, b: i32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -14082,78 +13962,42 @@ mod tests {
     unsafe fn test_vcgt_u8() {
         test_cmp_u8(
             |i, j| vcgt_u8(i, j),
-            |a: u8, b: u8| -> u8 {
-                if a > b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: u8, b: u8| -> u8 { if a > b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgtq_u8() {
         testq_cmp_u8(
             |i, j| vcgtq_u8(i, j),
-            |a: u8, b: u8| -> u8 {
-                if a > b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: u8, b: u8| -> u8 { if a > b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgt_u16() {
         test_cmp_u16(
             |i, j| vcgt_u16(i, j),
-            |a: u16, b: u16| -> u16 {
-                if a > b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u16, b: u16| -> u16 { if a > b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgtq_u16() {
         testq_cmp_u16(
             |i, j| vcgtq_u16(i, j),
-            |a: u16, b: u16| -> u16 {
-                if a > b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u16, b: u16| -> u16 { if a > b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgt_u32() {
         test_cmp_u32(
             |i, j| vcgt_u32(i, j),
-            |a: u32, b: u32| -> u32 {
-                if a > b {
-                    0xFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u32, b: u32| -> u32 { if a > b { 0xFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgtq_u32() {
         testq_cmp_u32(
             |i, j| vcgtq_u32(i, j),
-            |a: u32, b: u32| -> u32 {
-                if a > b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u32, b: u32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -14161,26 +14005,14 @@ mod tests {
     unsafe fn test_vcgt_f32() {
         test_cmp_f32(
             |i, j| vcgt_f32(i, j),
-            |a: f32, b: f32| -> u32 {
-                if a > b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f32, b: f32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgtq_f32() {
         testq_cmp_f32(
             |i, j| vcgtq_f32(i, j),
-            |a: f32, b: f32| -> u32 {
-                if a > b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f32, b: f32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -14188,78 +14020,42 @@ mod tests {
     unsafe fn test_vclt_s8() {
         test_cmp_s8(
             |i, j| vclt_s8(i, j),
-            |a: i8, b: i8| -> u8 {
-                if a < b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: i8, b: i8| -> u8 { if a < b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcltq_s8() {
         testq_cmp_s8(
             |i, j| vcltq_s8(i, j),
-            |a: i8, b: i8| -> u8 {
-                if a < b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: i8, b: i8| -> u8 { if a < b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vclt_s16() {
         test_cmp_s16(
             |i, j| vclt_s16(i, j),
-            |a: i16, b: i16| -> u16 {
-                if a < b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i16, b: i16| -> u16 { if a < b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcltq_s16() {
         testq_cmp_s16(
             |i, j| vcltq_s16(i, j),
-            |a: i16, b: i16| -> u16 {
-                if a < b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i16, b: i16| -> u16 { if a < b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vclt_s32() {
         test_cmp_s32(
             |i, j| vclt_s32(i, j),
-            |a: i32, b: i32| -> u32 {
-                if a < b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i32, b: i32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcltq_s32() {
         testq_cmp_s32(
             |i, j| vcltq_s32(i, j),
-            |a: i32, b: i32| -> u32 {
-                if a < b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i32, b: i32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -14267,78 +14063,42 @@ mod tests {
     unsafe fn test_vclt_u8() {
         test_cmp_u8(
             |i, j| vclt_u8(i, j),
-            |a: u8, b: u8| -> u8 {
-                if a < b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: u8, b: u8| -> u8 { if a < b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcltq_u8() {
         testq_cmp_u8(
             |i, j| vcltq_u8(i, j),
-            |a: u8, b: u8| -> u8 {
-                if a < b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: u8, b: u8| -> u8 { if a < b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vclt_u16() {
         test_cmp_u16(
             |i, j| vclt_u16(i, j),
-            |a: u16, b: u16| -> u16 {
-                if a < b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u16, b: u16| -> u16 { if a < b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcltq_u16() {
         testq_cmp_u16(
             |i, j| vcltq_u16(i, j),
-            |a: u16, b: u16| -> u16 {
-                if a < b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u16, b: u16| -> u16 { if a < b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vclt_u32() {
         test_cmp_u32(
             |i, j| vclt_u32(i, j),
-            |a: u32, b: u32| -> u32 {
-                if a < b {
-                    0xFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u32, b: u32| -> u32 { if a < b { 0xFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcltq_u32() {
         testq_cmp_u32(
             |i, j| vcltq_u32(i, j),
-            |a: u32, b: u32| -> u32 {
-                if a < b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u32, b: u32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -14346,26 +14106,14 @@ mod tests {
     unsafe fn test_vclt_f32() {
         test_cmp_f32(
             |i, j| vclt_f32(i, j),
-            |a: f32, b: f32| -> u32 {
-                if a < b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f32, b: f32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcltq_f32() {
         testq_cmp_f32(
             |i, j| vcltq_f32(i, j),
-            |a: f32, b: f32| -> u32 {
-                if a < b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f32, b: f32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -14373,78 +14121,42 @@ mod tests {
     unsafe fn test_vcle_s8() {
         test_cmp_s8(
             |i, j| vcle_s8(i, j),
-            |a: i8, b: i8| -> u8 {
-                if a <= b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: i8, b: i8| -> u8 { if a <= b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcleq_s8() {
         testq_cmp_s8(
             |i, j| vcleq_s8(i, j),
-            |a: i8, b: i8| -> u8 {
-                if a <= b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: i8, b: i8| -> u8 { if a <= b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcle_s16() {
         test_cmp_s16(
             |i, j| vcle_s16(i, j),
-            |a: i16, b: i16| -> u16 {
-                if a <= b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i16, b: i16| -> u16 { if a <= b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcleq_s16() {
         testq_cmp_s16(
             |i, j| vcleq_s16(i, j),
-            |a: i16, b: i16| -> u16 {
-                if a <= b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i16, b: i16| -> u16 { if a <= b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcle_s32() {
         test_cmp_s32(
             |i, j| vcle_s32(i, j),
-            |a: i32, b: i32| -> u32 {
-                if a <= b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i32, b: i32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcleq_s32() {
         testq_cmp_s32(
             |i, j| vcleq_s32(i, j),
-            |a: i32, b: i32| -> u32 {
-                if a <= b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i32, b: i32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -14452,78 +14164,42 @@ mod tests {
     unsafe fn test_vcle_u8() {
         test_cmp_u8(
             |i, j| vcle_u8(i, j),
-            |a: u8, b: u8| -> u8 {
-                if a <= b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: u8, b: u8| -> u8 { if a <= b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcleq_u8() {
         testq_cmp_u8(
             |i, j| vcleq_u8(i, j),
-            |a: u8, b: u8| -> u8 {
-                if a <= b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: u8, b: u8| -> u8 { if a <= b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcle_u16() {
         test_cmp_u16(
             |i, j| vcle_u16(i, j),
-            |a: u16, b: u16| -> u16 {
-                if a <= b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u16, b: u16| -> u16 { if a <= b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcleq_u16() {
         testq_cmp_u16(
             |i, j| vcleq_u16(i, j),
-            |a: u16, b: u16| -> u16 {
-                if a <= b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u16, b: u16| -> u16 { if a <= b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcle_u32() {
         test_cmp_u32(
             |i, j| vcle_u32(i, j),
-            |a: u32, b: u32| -> u32 {
-                if a <= b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u32, b: u32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcleq_u32() {
         testq_cmp_u32(
             |i, j| vcleq_u32(i, j),
-            |a: u32, b: u32| -> u32 {
-                if a <= b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u32, b: u32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -14531,26 +14207,14 @@ mod tests {
     unsafe fn test_vcle_f32() {
         test_cmp_f32(
             |i, j| vcle_f32(i, j),
-            |a: f32, b: f32| -> u32 {
-                if a <= b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f32, b: f32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcleq_f32() {
         testq_cmp_f32(
             |i, j| vcleq_f32(i, j),
-            |a: f32, b: f32| -> u32 {
-                if a <= b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f32, b: f32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -14558,78 +14222,42 @@ mod tests {
     unsafe fn test_vcge_s8() {
         test_cmp_s8(
             |i, j| vcge_s8(i, j),
-            |a: i8, b: i8| -> u8 {
-                if a >= b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: i8, b: i8| -> u8 { if a >= b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgeq_s8() {
         testq_cmp_s8(
             |i, j| vcgeq_s8(i, j),
-            |a: i8, b: i8| -> u8 {
-                if a >= b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: i8, b: i8| -> u8 { if a >= b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcge_s16() {
         test_cmp_s16(
             |i, j| vcge_s16(i, j),
-            |a: i16, b: i16| -> u16 {
-                if a >= b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i16, b: i16| -> u16 { if a >= b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgeq_s16() {
         testq_cmp_s16(
             |i, j| vcgeq_s16(i, j),
-            |a: i16, b: i16| -> u16 {
-                if a >= b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i16, b: i16| -> u16 { if a >= b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcge_s32() {
         test_cmp_s32(
             |i, j| vcge_s32(i, j),
-            |a: i32, b: i32| -> u32 {
-                if a >= b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i32, b: i32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgeq_s32() {
         testq_cmp_s32(
             |i, j| vcgeq_s32(i, j),
-            |a: i32, b: i32| -> u32 {
-                if a >= b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: i32, b: i32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -14637,78 +14265,42 @@ mod tests {
     unsafe fn test_vcge_u8() {
         test_cmp_u8(
             |i, j| vcge_u8(i, j),
-            |a: u8, b: u8| -> u8 {
-                if a >= b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: u8, b: u8| -> u8 { if a >= b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgeq_u8() {
         testq_cmp_u8(
             |i, j| vcgeq_u8(i, j),
-            |a: u8, b: u8| -> u8 {
-                if a >= b {
-                    0xFF
-                } else {
-                    0
-                }
-            },
+            |a: u8, b: u8| -> u8 { if a >= b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcge_u16() {
         test_cmp_u16(
             |i, j| vcge_u16(i, j),
-            |a: u16, b: u16| -> u16 {
-                if a >= b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u16, b: u16| -> u16 { if a >= b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgeq_u16() {
         testq_cmp_u16(
             |i, j| vcgeq_u16(i, j),
-            |a: u16, b: u16| -> u16 {
-                if a >= b {
-                    0xFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u16, b: u16| -> u16 { if a >= b { 0xFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcge_u32() {
         test_cmp_u32(
             |i, j| vcge_u32(i, j),
-            |a: u32, b: u32| -> u32 {
-                if a >= b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u32, b: u32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgeq_u32() {
         testq_cmp_u32(
             |i, j| vcgeq_u32(i, j),
-            |a: u32, b: u32| -> u32 {
-                if a >= b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: u32, b: u32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
@@ -14716,26 +14308,14 @@ mod tests {
     unsafe fn test_vcge_f32() {
         test_cmp_f32(
             |i, j| vcge_f32(i, j),
-            |a: f32, b: f32| -> u32 {
-                if a >= b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f32, b: f32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
     unsafe fn test_vcgeq_f32() {
         testq_cmp_f32(
             |i, j| vcgeq_f32(i, j),
-            |a: f32, b: f32| -> u32 {
-                if a >= b {
-                    0xFFFFFFFF
-                } else {
-                    0
-                }
-            },
+            |a: f32, b: f32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } },
         );
     }
 
diff --git a/library/stdarch/crates/core_arch/src/macros.rs b/library/stdarch/crates/core_arch/src/macros.rs
index e2ddcde98aa..f9c650b2cac 100644
--- a/library/stdarch/crates/core_arch/src/macros.rs
+++ b/library/stdarch/crates/core_arch/src/macros.rs
@@ -131,17 +131,11 @@ macro_rules! simd_shuffle {
 
 #[allow(unused)]
 macro_rules! simd_insert {
-    ($x:expr, $idx:expr, $val:expr $(,)?) => {{
-        $crate::intrinsics::simd::simd_insert($x, const { $idx }, $val)
-    }};
+    ($x:expr, $idx:expr, $val:expr $(,)?) => {{ $crate::intrinsics::simd::simd_insert($x, const { $idx }, $val) }};
 }
 
 #[allow(unused)]
 macro_rules! simd_extract {
-    ($x:expr, $idx:expr $(,)?) => {{
-        $crate::intrinsics::simd::simd_extract($x, const { $idx })
-    }};
-    ($x:expr, $idx:expr, $ty:ty $(,)?) => {{
-        $crate::intrinsics::simd::simd_extract::<_, $ty>($x, const { $idx })
-    }};
+    ($x:expr, $idx:expr $(,)?) => {{ $crate::intrinsics::simd::simd_extract($x, const { $idx }) }};
+    ($x:expr, $idx:expr, $ty:ty $(,)?) => {{ $crate::intrinsics::simd::simd_extract::<_, $ty>($x, const { $idx }) }};
 }
diff --git a/library/stdarch/crates/core_arch/src/mod.rs b/library/stdarch/crates/core_arch/src/mod.rs
index 87ce18ca7bc..d6714a16497 100644
--- a/library/stdarch/crates/core_arch/src/mod.rs
+++ b/library/stdarch/crates/core_arch/src/mod.rs
@@ -72,8 +72,8 @@ pub mod arch {
     #[doc(cfg(any(target_arch = "riscv32")))]
     #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
     pub mod riscv32 {
-        pub use crate::core_arch::riscv32::*;
         pub use crate::core_arch::riscv_shared::*;
+        pub use crate::core_arch::riscv32::*;
     }
 
     /// Platform-specific intrinsics for the `riscv64` platform.
diff --git a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
index 274ec88dde0..7d0081f03aa 100644
--- a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
@@ -4720,7 +4720,9 @@ mod tests {
             let v: u8x16 = transmute(vec_ld(0, (pat.as_ptr() as *const u8).offset(off)));
             assert_eq!(
                 v,
-                u8x16::new(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
+                u8x16::new(
+                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+                )
             );
         }
     }
@@ -4778,7 +4780,9 @@ mod tests {
             let v: u8x16 = transmute(vec_ldl(0, (pat.as_ptr() as *const u8).offset(off)));
             assert_eq!(
                 v,
-                u8x16::new(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
+                u8x16::new(
+                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+                )
             );
         }
     }
diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
index c54d9966224..ce74323b68b 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@@ -4361,7 +4361,9 @@ mod tests {
         };
         assert_eq!(
             bytes,
-            [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16]
+            [
+                -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16
+            ]
         );
     }
 
diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs
index 0bc89ba45c8..10c1f2de8d2 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs
@@ -3785,7 +3785,7 @@ unsafe extern "C" {
     ) -> __m256d;
     #[link_name = "llvm.x86.avx2.gather.d.ps"]
     fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
-        -> __m128;
+    -> __m128;
     #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
     fn vpgatherdps(
         src: __m256,
@@ -3796,7 +3796,7 @@ unsafe extern "C" {
     ) -> __m256;
     #[link_name = "llvm.x86.avx2.gather.q.ps"]
     fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
-        -> __m128;
+    -> __m128;
     #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
     fn vpgatherqps(
         src: __m128,
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
index 819a52110da..5640ef8bf44 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
@@ -7,19 +7,19 @@
 //!
 //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 
-use crate::core_arch::simd::i16x16;
-use crate::core_arch::simd::i16x32;
-use crate::core_arch::simd::i16x8;
 use crate::core_arch::simd::i8x16;
 use crate::core_arch::simd::i8x32;
 use crate::core_arch::simd::i8x64;
+use crate::core_arch::simd::i16x8;
+use crate::core_arch::simd::i16x16;
+use crate::core_arch::simd::i16x32;
 use crate::core_arch::x86::__m128i;
 use crate::core_arch::x86::__m256i;
 use crate::core_arch::x86::__m512i;
+use crate::core_arch::x86::__mmask8;
 use crate::core_arch::x86::__mmask16;
 use crate::core_arch::x86::__mmask32;
 use crate::core_arch::x86::__mmask64;
-use crate::core_arch::x86::__mmask8;
 use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask};
 use crate::mem::transmute;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
index 5e79142ae60..caac75b3462 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -9572,7 +9572,7 @@ pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 #[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
-                                           // using vpmovb2m plus converting the mask register to a standard register.
+// using vpmovb2m plus converting the mask register to a standard register.
 pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
     let filter = _mm256_set1_epi8(1 << 7);
     let a = _mm256_and_si256(a, filter);
@@ -9586,7 +9586,7 @@ pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 #[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
-                                           // using vpmovb2m plus converting the mask register to a standard register.
+// using vpmovb2m plus converting the mask register to a standard register.
 pub unsafe fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
     let filter = _mm_set1_epi8(1 << 7);
     let a = _mm_and_si128(a, filter);
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512dq.rs b/library/stdarch/crates/core_arch/src/x86/avx512dq.rs
index 9e5ac789afb..66d0feebb69 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512dq.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512dq.rs
@@ -6836,7 +6836,7 @@ unsafe extern "C" {
     fn vrangeps_256(a: f32x8, b: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
     #[link_name = "llvm.x86.avx512.mask.range.ps.512"]
     fn vrangeps_512(a: f32x16, b: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32)
-        -> f32x16;
+    -> f32x16;
 
     #[link_name = "llvm.x86.avx512.mask.range.sd"]
     fn vrangesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index 5c45d815f2b..45ba31ec38f 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -26220,12 +26220,16 @@ pub unsafe fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __
         0 => simd_shuffle!(
             a,
             b,
-            [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,],
+            [
+                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+            ],
         ),
         1 => simd_shuffle!(
             a,
             b,
-            [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,],
+            [
+                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
+            ],
         ),
         2 => simd_shuffle!(
             a,
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs b/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
index b9e269f6527..20dace5e9c1 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
@@ -10866,7 +10866,9 @@ pub unsafe fn _mm512_reduce_add_ph(a: __m512h) -> f16 {
     let q = simd_shuffle!(
         a,
         a,
-        [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
+        [
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        ]
     );
     _mm256_reduce_add_ph(_mm256_add_ph(p, q))
 }
@@ -10911,7 +10913,9 @@ pub unsafe fn _mm512_reduce_mul_ph(a: __m512h) -> f16 {
     let q = simd_shuffle!(
         a,
         a,
-        [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
+        [
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        ]
     );
     _mm256_reduce_mul_ph(_mm256_mul_ph(p, q))
 }
@@ -10957,7 +10961,9 @@ pub unsafe fn _mm512_reduce_min_ph(a: __m512h) -> f16 {
     let q = simd_shuffle!(
         a,
         a,
-        [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
+        [
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        ]
     );
     _mm256_reduce_min_ph(_mm256_min_ph(p, q))
 }
@@ -11003,7 +11009,9 @@ pub unsafe fn _mm512_reduce_max_ph(a: __m512h) -> f16 {
     let q = simd_shuffle!(
         a,
         a,
-        [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
+        [
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+        ]
     );
     _mm256_reduce_max_ph(_mm256_max_ph(p, q))
 }
@@ -16059,10 +16067,10 @@ unsafe extern "C" {
     fn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
     #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
     fn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
-        -> __m512;
+    -> __m512;
     #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
     fn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
-        -> __m512;
+    -> __m512;
     #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
     fn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
     #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
@@ -16179,7 +16187,7 @@ unsafe extern "C" {
     fn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
     #[link_name = "llvm.x86.avx512fp16.mask.reduce.sh"]
     fn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32)
-        -> __m128h;
+    -> __m128h;
 
     #[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh"]
     fn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8;
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vpopcntdq.rs b/library/stdarch/crates/core_arch/src/x86/avx512vpopcntdq.rs
index d8f21e5ab02..0bc343acaef 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512vpopcntdq.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512vpopcntdq.rs
@@ -11,8 +11,8 @@ use crate::core_arch::simd::*;
 use crate::core_arch::x86::__m128i;
 use crate::core_arch::x86::__m256i;
 use crate::core_arch::x86::__m512i;
-use crate::core_arch::x86::__mmask16;
 use crate::core_arch::x86::__mmask8;
+use crate::core_arch::x86::__mmask16;
 use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask};
 use crate::mem::transmute;
 
diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs
index 2f5d234c74b..08b3712ea8f 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@@ -449,11 +449,7 @@ pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
 unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
     const fn mask(shift: i32, i: u32) -> u32 {
         let shift = shift as u32 & 0xff;
-        if shift > 15 {
-            i
-        } else {
-            16 - shift + i
-        }
+        if shift > 15 { i } else { 16 - shift + i }
     }
     transmute::<i8x16, _>(simd_shuffle!(
         i8x16::ZERO,
diff --git a/library/stdarch/crates/core_arch/src/x86/test.rs b/library/stdarch/crates/core_arch/src/x86/test.rs
index dd78321135f..050a51f9fab 100644
--- a/library/stdarch/crates/core_arch/src/x86/test.rs
+++ b/library/stdarch/crates/core_arch/src/x86/test.rs
@@ -133,7 +133,7 @@ mod x86_polyfill {
 
 #[cfg(target_arch = "x86_64")]
 mod x86_polyfill {
-    pub use crate::core_arch::x86_64::{_mm256_insert_epi64, _mm_insert_epi64};
+    pub use crate::core_arch::x86_64::{_mm_insert_epi64, _mm256_insert_epi64};
 }
 pub use self::x86_polyfill::*;
 
diff --git a/library/stdarch/crates/core_arch/src/x86_64/amx.rs b/library/stdarch/crates/core_arch/src/x86_64/amx.rs
index 7c437e9704f..4fd086dce10 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/amx.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/amx.rs
@@ -292,7 +292,7 @@ mod tests {
     use core::mem::transmute;
     use stdarch_test::simd_test;
     #[cfg(target_os = "linux")]
-    use syscalls::{syscall, Sysno};
+    use syscalls::{Sysno, syscall};
 
     #[allow(non_camel_case_types)]
     #[repr(packed)]
diff --git a/library/stdarch/crates/intrinsic-test/src/argument.rs b/library/stdarch/crates/intrinsic-test/src/argument.rs
index e80760ca3aa..ebabe319274 100644
--- a/library/stdarch/crates/intrinsic-test/src/argument.rs
+++ b/library/stdarch/crates/intrinsic-test/src/argument.rs
@@ -1,9 +1,9 @@
 use std::ops::Range;
 
+use crate::Language;
 use crate::format::Indentation;
 use crate::json_parser::ArgPrep;
 use crate::types::{IntrinsicType, TypeKind};
-use crate::Language;
 
 /// An argument for the intrinsic.
 #[derive(Debug, PartialEq, Clone)]
diff --git a/library/stdarch/crates/intrinsic-test/src/types.rs b/library/stdarch/crates/intrinsic-test/src/types.rs
index 594fe4d2d20..1eb44896f73 100644
--- a/library/stdarch/crates/intrinsic-test/src/types.rs
+++ b/library/stdarch/crates/intrinsic-test/src/types.rs
@@ -3,9 +3,9 @@ use std::str::FromStr;
 
 use itertools::Itertools as _;
 
+use crate::Language;
 use crate::format::Indentation;
 use crate::values::value_for_array;
-use crate::Language;
 
 #[derive(Debug, PartialEq, Copy, Clone)]
 pub enum TypeKind {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
index acc616c0e84..1ff2a17e6e1 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs
@@ -17,7 +17,7 @@
 //! - [Linux documentation](https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt)
 //! - [ARM documentation](https://developer.arm.com/documentation/ddi0601/2022-12/AArch64-Registers?lang=en)
 
-use crate::detect::{cache, Feature};
+use crate::detect::{Feature, cache};
 use core::arch::asm;
 
 /// Try to read the features from the system registers.
diff --git a/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
index aca10e90b5a..6699a66b1ad 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
@@ -2,7 +2,7 @@
 //!
 //! <https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics>
 
-use crate::detect::{cache, Feature};
+use crate::detect::{Feature, cache};
 use core::ffi::CStr;
 
 #[inline]
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
index d904eaebd14..0a15156e1bd 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for ARM on FreeBSD
 
 use super::auxvec;
-use crate::detect::{cache, Feature};
+use crate::detect::{Feature, cache};
 
 // Defined in machine/elf.h.
 // https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/arm/include/elf.h
diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs
index 6bfab631a9c..d03af68cd08 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for PowerPC on FreeBSD.
 
 use super::auxvec;
-use crate::detect::{cache, Feature};
+use crate::detect::{Feature, cache};
 
 pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index 27816c09e69..d13aa0168ce 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for Aarch64 on Linux.
 
 use super::auxvec;
-use crate::detect::{bit, cache, Feature};
+use crate::detect::{Feature, bit, cache};
 
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from /proc/cpuinfo.
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
index 4dc9590e18b..7a68d2f87ba 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for ARM on Linux.
 
 use super::auxvec;
-use crate::detect::{bit, cache, Feature};
+use crate::detect::{Feature, bit, cache};
 
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from /proc/cpuinfo.
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
index 27c800ec076..a46a4a9d087 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for LoongArch on Linux.
 
 use super::auxvec;
-use crate::detect::{bit, cache, Feature};
+use crate::detect::{Feature, bit, cache};
 use core::arch::asm;
 
 /// Try to read the features from the auxiliary vector.
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
index f1bea382522..159b053ba68 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for MIPS on Linux.
 
 use super::auxvec;
-use crate::detect::{bit, cache, Feature};
+use crate::detect::{Feature, bit, cache};
 
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from `/proc/cpuinfo`.
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
index c3308e81582..5ebae195f2f 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for PowerPC on Linux.
 
 use super::auxvec;
-use crate::detect::{cache, Feature};
+use crate::detect::{Feature, cache};
 
 /// Try to read the features from the auxiliary vector, and if that fails, try
 /// to read them from /proc/cpuinfo.
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index 91a85d58e7c..d3865cb5e9e 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for RISC-V on Linux.
 
 use super::auxvec;
-use crate::detect::{bit, cache, Feature};
+use crate::detect::{Feature, bit, cache};
 
 /// Read list of supported features from the auxiliary vector.
 pub(crate) fn detect_features() -> cache::Initializer {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs
index 91ce55e94e3..e0c79d167ea 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs
@@ -1,7 +1,7 @@
 //! Run-time feature detection for s390x on Linux.
 
 use super::auxvec;
-use crate::detect::{bit, cache, Feature};
+use crate::detect::{Feature, bit, cache};
 
 /// Try to read the features from the auxiliary vector
 pub(crate) fn detect_features() -> cache::Initializer {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
index ec31701aecf..4bda06d6734 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
@@ -1,6 +1,6 @@
 //! Run-time feature detection for Aarch64 on Windows.
 
-use crate::detect::{cache, Feature};
+use crate::detect::{Feature, cache};
 
 /// Try to read the features using IsProcessorFeaturePresent.
 pub(crate) fn detect_features() -> cache::Initializer {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index 979fd9ca1f7..c28f20baabc 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -7,7 +7,7 @@ use core::arch::x86_64::*;
 
 use core::mem;
 
-use crate::detect::{bit, cache, Feature};
+use crate::detect::{Feature, bit, cache};
 
 /// Run-time feature detection on x86 works by using the CPUID instruction.
 ///
diff --git a/library/stdarch/crates/stdarch-gen-arm/src/assert_instr.rs b/library/stdarch/crates/stdarch-gen-arm/src/assert_instr.rs
index ce1bbe8b55f..799b3379a85 100644
--- a/library/stdarch/crates/stdarch-gen-arm/src/assert_instr.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/assert_instr.rs
@@ -1,7 +1,7 @@
 use proc_macro2::TokenStream;
-use quote::{format_ident, quote, ToTokens, TokenStreamExt};
+use quote::{ToTokens, TokenStreamExt, format_ident, quote};
 use serde::de::{self, MapAccess, Visitor};
-use serde::{ser::SerializeSeq, Deserialize, Deserializer, Serialize};
+use serde::{Deserialize, Deserializer, Serialize, ser::SerializeSeq};
 use std::fmt;
 
 use crate::{
diff --git a/library/stdarch/crates/stdarch-gen-arm/src/context.rs b/library/stdarch/crates/stdarch-gen-arm/src/context.rs
index 8405428b7ad..aa29eda820d 100644
--- a/library/stdarch/crates/stdarch-gen-arm/src/context.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/context.rs
@@ -240,7 +240,9 @@ impl LocalContext {
                         format!("{variable}, 0, {max}"),
                     ))
                 } else {
-                    Err(format!("can't make an assertion out of constraint {self:?}: no types are being used"))
+                    Err(format!(
+                        "can't make an assertion out of constraint {self:?}: no types are being used"
+                    ))
                 }
             }
             _ => unreachable!("constraints were not built successfully!"),
diff --git a/library/stdarch/crates/stdarch-gen-arm/src/expression.rs b/library/stdarch/crates/stdarch-gen-arm/src/expression.rs
index 83984679588..4a572db3e8f 100644
--- a/library/stdarch/crates/stdarch-gen-arm/src/expression.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/expression.rs
@@ -1,7 +1,7 @@
 use itertools::Itertools;
 use lazy_static::lazy_static;
 use proc_macro2::{Literal, Punct, Spacing, TokenStream};
-use quote::{format_ident, quote, ToTokens, TokenStreamExt};
+use quote::{ToTokens, TokenStreamExt, format_ident, quote};
 use regex::Regex;
 use serde::de::{self, MapAccess, Visitor};
 use serde::{Deserialize, Deserializer, Serialize};
diff --git a/library/stdarch/crates/stdarch-gen-arm/src/input.rs b/library/stdarch/crates/stdarch-gen-arm/src/input.rs
index ca104fa9c0c..adefbf3215b 100644
--- a/library/stdarch/crates/stdarch-gen-arm/src/input.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/input.rs
@@ -1,5 +1,5 @@
 use itertools::Itertools;
-use serde::{de, Deserialize, Deserializer, Serialize};
+use serde::{Deserialize, Deserializer, Serialize, de};
 
 use crate::{
     context::{self, GlobalContext},
@@ -87,7 +87,7 @@ impl Ord for InputType {
 }
 
 mod many_or_one {
-    use serde::{de::Deserializer, ser::Serializer, Deserialize, Serialize};
+    use serde::{Deserialize, Serialize, de::Deserializer, ser::Serializer};
 
     pub fn serialize<T, S>(vec: &Vec<T>, serializer: S) -> Result<S::Ok, S::Error>
     where
diff --git a/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs b/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs
index 60470dd77e2..cabe58f9d60 100644
--- a/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs
@@ -1,6 +1,6 @@
 use itertools::Itertools;
 use proc_macro2::{Delimiter, Group, Punct, Spacing, TokenStream};
-use quote::{format_ident, quote, ToTokens, TokenStreamExt};
+use quote::{ToTokens, TokenStreamExt, format_ident, quote};
 use serde::{Deserialize, Serialize};
 use serde_with::{DeserializeFromStr, SerializeDisplay};
 use std::collections::{HashMap, HashSet};
@@ -17,7 +17,7 @@ use crate::{
     assert_instr::InstructionAssertionMethod,
     context::{self, ArchitectureSettings, Context, LocalContext, VariableType},
     expression::{Expression, FnCall, IdentifierType},
-    fn_suffix::{type_to_size, SuffixKind},
+    fn_suffix::{SuffixKind, type_to_size},
     input::IntrinsicInput,
     matching::{KindMatchable, SizeMatchable},
     typekinds::*,
@@ -995,7 +995,10 @@ impl Intrinsic {
         };
 
         if variant.attr.is_none() && variant.assert_instr.is_none() {
-            panic!("Error: {} is missing both 'attr' and 'assert_instr' fields. You must either manually declare the attributes using the 'attr' field or use 'assert_instr'!", variant.signature.name.to_string());
+            panic!(
+                "Error: {} is missing both 'attr' and 'assert_instr' fields. You must either manually declare the attributes using the 'attr' field or use 'assert_instr'!",
+                variant.signature.name.to_string()
+            );
         }
 
         if variant.attr.is_some() {
@@ -1181,7 +1184,10 @@ impl Intrinsic {
             .any(|w| matches!(w, Wildcard::NVariant));
 
         if !has_n_wildcard {
-            return Err(format!("cannot generate `_n` variant for {}, no wildcard {{_n}} was specified in the intrinsic's name", variant.signature.name));
+            return Err(format!(
+                "cannot generate `_n` variant for {}, no wildcard {{_n}} was specified in the intrinsic's name",
+                variant.signature.name
+            ));
         }
 
         // Build signature
@@ -1205,9 +1211,9 @@ impl Intrinsic {
             }
             _ => {
                 return Err(format!(
-                "cannot generate `_n` variant for {}, the given operand is not a valid SVE type",
-                variant.signature.name
-            ))
+                    "cannot generate `_n` variant for {}, the given operand is not a valid SVE type",
+                    variant.signature.name
+                ));
             }
         };
 
@@ -1434,7 +1440,9 @@ impl Intrinsic {
                             (Some(BaseTypeKind::Poly), Some(BaseTypeKind::Poly)) => ex,
 
                             (None, None) => ex,
-                            _ => unreachable!("unsupported conversion case from {from_base_type:?} to {to_base_type:?} hit"),
+                            _ => unreachable!(
+                                "unsupported conversion case from {from_base_type:?} to {to_base_type:?} hit"
+                            ),
                         }
                     } else {
                         ex
diff --git a/library/stdarch/crates/stdarch-gen-arm/src/load_store_tests.rs b/library/stdarch/crates/stdarch-gen-arm/src/load_store_tests.rs
index 83d0ac975c2..a238c5fb33c 100644
--- a/library/stdarch/crates/stdarch-gen-arm/src/load_store_tests.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/load_store_tests.rs
@@ -76,7 +76,10 @@ pub fn generate_load_store_tests(
         })
         .try_collect()?;
 
-    assert!(used_stores.into_iter().all(|b| b), "Not all store tests have been paired with a load. Consider generating specifc store-only tests");
+    assert!(
+        used_stores.into_iter().all(|b| b),
+        "Not all store tests have been paired with a load. Consider generating specifc store-only tests"
+    );
 
     let preamble =
         TokenStream::from_str(&PREAMBLE).map_err(|e| format!("Preamble is invalid: {e}"))?;
diff --git a/library/stdarch/crates/stdarch-gen-arm/src/typekinds.rs b/library/stdarch/crates/stdarch-gen-arm/src/typekinds.rs
index 7a4fed85ce7..bf22501b1dd 100644
--- a/library/stdarch/crates/stdarch-gen-arm/src/typekinds.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/typekinds.rs
@@ -1,6 +1,6 @@
 use lazy_static::lazy_static;
 use proc_macro2::TokenStream;
-use quote::{quote, ToTokens, TokenStreamExt};
+use quote::{ToTokens, TokenStreamExt, quote};
 use regex::Regex;
 use serde_with::{DeserializeFromStr, SerializeDisplay};
 use std::fmt;
diff --git a/library/stdarch/crates/stdarch-gen-arm/src/wildstring.rs b/library/stdarch/crates/stdarch-gen-arm/src/wildstring.rs
index 095b18b846e..616c1172d49 100644
--- a/library/stdarch/crates/stdarch-gen-arm/src/wildstring.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/wildstring.rs
@@ -1,6 +1,6 @@
 use itertools::Itertools;
 use proc_macro2::TokenStream;
-use quote::{quote, ToTokens, TokenStreamExt};
+use quote::{ToTokens, TokenStreamExt, quote};
 use serde_with::{DeserializeFromStr, SerializeDisplay};
 use std::str::pattern::Pattern;
 use std::{fmt, str::FromStr};
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs b/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs
index 160926293bf..ea67682b566 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs
+++ b/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs
@@ -495,7 +495,10 @@ fn gen_bind_body(
         };
     } else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "SI" {
         call_params = if asm_fmts[2].starts_with("si") {
-            format!("static_assert_simm_bits!(IMM_S{0}, {0});\n    __{current_name}(mem_addr, IMM_S{0})", asm_fmts[2].get(2..).unwrap())
+            format!(
+                "static_assert_simm_bits!(IMM_S{0}, {0});\n    __{current_name}(mem_addr, IMM_S{0})",
+                asm_fmts[2].get(2..).unwrap()
+            )
         } else {
             panic!("unsupported assembly format: {}", asm_fmts[2])
         }
@@ -515,7 +518,9 @@ fn gen_bind_body(
         }
     } else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "SI" {
         call_params = match asm_fmts[2].as_str() {
-            "si12" => format!("static_assert_simm_bits!(IMM_S12, 12);\n    __{current_name}(a, mem_addr, IMM_S12)"),
+            "si12" => format!(
+                "static_assert_simm_bits!(IMM_S12, 12);\n    __{current_name}(a, mem_addr, IMM_S12)"
+            ),
             _ => panic!("unsupported assembly format: {}", asm_fmts[2]),
         };
     } else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "DI" {
@@ -525,8 +530,14 @@ fn gen_bind_body(
         };
     } else if para_num == 4 {
         call_params = match (asm_fmts[2].as_str(), current_name.chars().last().unwrap()) {
-            ("si8", t) => format!("static_assert_simm_bits!(IMM_S8, 8);\n    static_assert_uimm_bits!(IMM{0}, {0});\n    __{current_name}(a, mem_addr, IMM_S8, IMM{0})", type_to_imm(t)),
-            (_, _) => panic!("unsupported assembly format: {} for {}", asm_fmts[2], current_name),
+            ("si8", t) => format!(
+                "static_assert_simm_bits!(IMM_S8, 8);\n    static_assert_uimm_bits!(IMM{0}, {0});\n    __{current_name}(a, mem_addr, IMM_S8, IMM{0})",
+                type_to_imm(t)
+            ),
+            (_, _) => panic!(
+                "unsupported assembly format: {} for {}",
+                asm_fmts[2], current_name
+            ),
         }
     }
     let function = if !rustc_legacy_const_generics.is_empty() {
@@ -1186,7 +1197,10 @@ fn gen_test_body(
             "UQI" => "    printf(\"    let r: u32 = %u;\\n\", o);",
             "QI" => "    printf(\"    let r: i32 = %d;\\n\", o);",
             "HI" => "    printf(\"    let r: i32 = %d;\\n\", o);",
-            "V32QI" | "V16HI" | "V8SI" | "V4DI" | "UV32QI" | "UV16HI" | "UV8SI" | "UV4DI" | "V8SF" | "V4DF" => "    printf(\"    let r = i64x4::new(%ld, %ld, %ld, %ld);\\n\", o.i64[0], o.i64[1], o.i64[2], o.i64[3]);",
+            "V32QI" | "V16HI" | "V8SI" | "V4DI" | "UV32QI" | "UV16HI" | "UV8SI" | "UV4DI"
+            | "V8SF" | "V4DF" => {
+                "    printf(\"    let r = i64x4::new(%ld, %ld, %ld, %ld);\\n\", o.i64[0], o.i64[1], o.i64[2], o.i64[3]);"
+            }
             _ => "    printf(\"    let r = i64x2::new(%ld, %ld);\\n\", o.i64[0], o.i64[1]);",
         }
     };
@@ -1480,9 +1494,13 @@ fn gen_test_body(
         };
         let fn_assert = {
             if out_t.to_lowercase() == "void" {
-                format!("    printf(\"\\n    {current_name}{as_params};\\n    assert_eq!(r, transmute(o));\\n\"{as_args});")
+                format!(
+                    "    printf(\"\\n    {current_name}{as_params};\\n    assert_eq!(r, transmute(o));\\n\"{as_args});"
+                )
             } else {
-                format!("    printf(\"\\n    assert_eq!(r, transmute({current_name}{as_params}));\\n\"{as_args});")
+                format!(
+                    "    printf(\"\\n    assert_eq!(r, transmute({current_name}{as_params}));\\n\"{as_args});"
+                )
             }
         };
         format!(
diff --git a/library/stdarch/examples/connect5.rs b/library/stdarch/examples/connect5.rs
index 88bb18878cc..0a357dd429a 100644
--- a/library/stdarch/examples/connect5.rs
+++ b/library/stdarch/examples/connect5.rs
@@ -560,11 +560,7 @@ fn search(pos: &Pos, alpha: i32, beta: i32, depth: i32, _ply: i32) -> i32 {
     assert_ne!(bm, MOVE_NONE);
     assert!(bs >= -EVAL_INF && bs <= EVAL_INF);
 
-    if _ply == 0 {
-        bm
-    } else {
-        bs
-    } //best move at the root node, best score elsewhere
+    if _ply == 0 { bm } else { bs } //best move at the root node, best score elsewhere
 }
 
 /// Evaluation function: give different scores to different patterns after a fixed depth.
-- 
cgit 1.4.1-3-g733a5


From 2a6953d38a99e0e626b5663f8b39f644fed5b546 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Thu, 6 Feb 2025 00:14:16 +0530
Subject: Add runtime feature detection for keylocker

---
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 6 ++++++
 library/stdarch/crates/std_detect/src/detect/os/x86.rs   | 7 +++++++
 library/stdarch/crates/std_detect/tests/x86-specific.rs  | 5 ++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 891fc427db9..38d7ebc7d34 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -103,6 +103,8 @@ features! {
     /// * `"xsaves"`
     /// * `"xsavec"`
     /// * `"cmpxchg16b"`
+    /// * `"kl"`
+    /// * `"widekl"`
     /// * `"adx"`
     /// * `"rtm"`
     /// * `"movbe"`
@@ -241,6 +243,10 @@ features! {
     /// XSAVEC (Save Processor Extended States Compacted)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] cmpxchg16b: "cmpxchg16b";
     /// CMPXCH16B (16-byte compare-and-swap instruction)
+    @FEATURE: #[unstable(feature = "keylocker_x86", issue = "134813")] kl: "kl";
+    /// Intel Key Locker
+    @FEATURE: #[unstable(feature = "keylocker_x86", issue = "134813")] widekl: "widekl";
+    /// Intel Key Locker Wide
     @FEATURE: #[stable(feature = "simd_x86_adx", since = "1.33.0")] adx: "adx";
     /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rtm: "rtm";
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index c28f20baabc..bb6a44b6438 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -141,6 +141,13 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
         enable(extended_features_ebx, 9, Feature::ermsb);
 
+        // Detect if CPUID.19h available
+        if bit::test(extended_features_ecx as usize, 23) {
+            let CpuidResult { ebx, .. } = unsafe { __cpuid(0x19) };
+            enable(ebx, 0, Feature::kl);
+            enable(ebx, 2, Feature::widekl);
+        }
+
         // `XSAVE` and `AVX` support:
         let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
         if cpu_xsave {
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
index 349bba86358..5f4441f1015 100644
--- a/library/stdarch/crates/std_detect/tests/x86-specific.rs
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -5,7 +5,8 @@
     avx512_target_feature,
     sha512_sm_x86,
     x86_amx_intrinsics,
-    xop_target_feature
+    xop_target_feature,
+    keylocker_x86
 )]
 
 extern crate cupid;
@@ -94,6 +95,8 @@ fn dump() {
     println!("amx-fp16: {:?}", is_x86_feature_detected!("amx-fp16"));
     println!("amx-complex: {:?}", is_x86_feature_detected!("amx-complex"));
     println!("xop: {:?}", is_x86_feature_detected!("xop"));
+    println!("kl: {:?}", is_x86_feature_detected!("kl"));
+    println!("widekl: {:?}", is_x86_feature_detected!("widekl"));
 }
 
 #[cfg(feature = "std_detect_env_override")]
-- 
cgit 1.4.1-3-g733a5


From 67468b20ff1355962089243abcd1f3fcae46890c Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sat, 22 Feb 2025 14:26:49 +0100
Subject: add newly-added s390x features to `is_s390x_feature_detected`

---
 .../crates/std_detect/src/detect/arch/s390x.rs     | 33 +++++++++++++++++++
 .../crates/std_detect/src/detect/os/linux/s390x.rs | 37 ++++++++++++++++++++++
 2 files changed, 70 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs b/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs
index be62aeb96ac..812607c6bcd 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs
@@ -7,6 +7,39 @@ features! {
     @MACRO_ATTRS:
     /// Checks if `s390x` feature is enabled.
     #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] deflate_conversion: "deflate-conversion";
+    /// s390x deflate-conversion facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] enhanced_sort: "enhanced-sort";
+    /// s390x enhanced-sort facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] guarded_storage: "guarded-storage";
+    /// s390x guarded-storage facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] high_word: "high-word";
+    /// s390x high-word facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] nnp_assist: "nnp-assist";
+    /// s390x nnp-assist facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] transactional_execution: "transactional-execution";
+    /// s390x transactional-execution facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
     @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector: "vector";
     /// s390x vector facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_1: "vector-enhancements-1";
+    /// s390x vector-enhancements-1 facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_2: "vector-enhancements-2";
+    /// s390x vector-enhancements-2 facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal: "vector-packed-decimal";
+    /// s390x vector-packed-decimal facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement: "vector-packed-decimal-enhancement";
+    /// s390x vector-packed-decimal-enhancement facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement_2: "vector-packed-decimal-enhancement-2";
+    /// s390x vector-packed-decimal-enhancement-2 facility
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs
index e0c79d167ea..5cc35fd3025 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs
@@ -87,8 +87,45 @@ impl AtHwcap {
                 }
             };
 
+            // vector and related
+
             // bit 129 of the extended facility list
             enable_feature(Feature::vector, self.vxrs);
+
+            // bit 135 of the extended facility list
+            enable_feature(Feature::vector_enhancements_1, self.vxrs_ext);
+
+            // bit 148 of the extended facility list
+            enable_feature(Feature::vector_enhancements_2, self.vxrs_ext2);
+
+            // bit 134 of the extended facility list
+            enable_feature(Feature::vector_packed_decimal, self.vxrs_bcd);
+
+            // bit 152 of the extended facility list
+            enable_feature(Feature::vector_packed_decimal_enhancement, self.vxrs_pde);
+
+            // bit 192 of the extended facility list
+            enable_feature(Feature::vector_packed_decimal_enhancement_2, self.vxrs_pde2);
+
+            // bit 165 of the extended facility list
+            enable_feature(Feature::nnp_assist, self.nnpa);
+
+            // others
+
+            // bit 45 of the extended facility list
+            enable_feature(Feature::high_word, self.high_gprs);
+
+            // bit 73 of the extended facility list
+            enable_feature(Feature::transactional_execution, self.te);
+
+            // bit 133 of the extended facility list
+            enable_feature(Feature::guarded_storage, self.gs);
+
+            // bit 150 of the extended facility list
+            enable_feature(Feature::enhanced_sort, self.sort);
+
+            // bit 151 of the extended facility list
+            enable_feature(Feature::deflate_conversion, self.dflt);
         }
         value
     }
-- 
cgit 1.4.1-3-g733a5


From b8d25bdefa5d755c406ebed587b5082a7e025d03 Mon Sep 17 00:00:00 2001
From: Eduardo Sánchez Muñoz <eduardosm-dev@e64.io>
Date: Mon, 24 Feb 2025 20:17:07 +0100
Subject: Remove some `allow(unsafe_op_in_unsafe_fn)`s and use target_feature
 1.1 in examples

---
 library/stdarch/crates/std_detect/src/lib.rs |  1 -
 library/stdarch/examples/connect5.rs         | 94 ++++++++++++++++------------
 library/stdarch/examples/hex.rs              | 68 +++++++++++++-------
 library/stdarch/examples/wasm.rs             | 19 +++---
 4 files changed, 111 insertions(+), 71 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs
index 7a074529921..ab1b77bad5b 100644
--- a/library/stdarch/crates/std_detect/src/lib.rs
+++ b/library/stdarch/crates/std_detect/src/lib.rs
@@ -18,7 +18,6 @@
 #![feature(staged_api, doc_cfg, allow_internal_unstable)]
 #![deny(rust_2018_idioms)]
 #![allow(clippy::shadow_reuse)]
-#![allow(unsafe_op_in_unsafe_fn)]
 #![cfg_attr(test, allow(unused_imports))]
 #![no_std]
 #![allow(internal_features)]
diff --git a/library/stdarch/examples/connect5.rs b/library/stdarch/examples/connect5.rs
index 0a357dd429a..45d30dbe4c9 100644
--- a/library/stdarch/examples/connect5.rs
+++ b/library/stdarch/examples/connect5.rs
@@ -29,7 +29,6 @@
 //! each move.
 
 #![allow(internal_features)]
-#![allow(unsafe_op_in_unsafe_fn)]
 #![feature(avx512_target_feature)]
 #![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512, stdarch_internal))]
 #![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512, stdarch_internal))]
@@ -419,12 +418,12 @@ fn pos_is_draw(pos: &Pos) -> bool {
     found && !pos_is_winner(pos)
 }
 
-#[target_feature(enable = "avx512f,avx512bw")]
+#[target_feature(enable = "avx512f,avx512bw,popcnt")]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-unsafe fn pos_is_draw_avx512(pos: &Pos) -> bool {
+fn pos_is_draw_avx512(pos: &Pos) -> bool {
     let empty = Color::Empty as usize;
 
-    let board0org = _mm512_loadu_epi32(&pos.bitboard[empty][0][0]);
+    let board0org = unsafe { _mm512_loadu_epi32(&pos.bitboard[empty][0][0]) };
 
     let answer = _mm512_set1_epi32(0);
 
@@ -481,7 +480,7 @@ fn search(pos: &Pos, alpha: i32, beta: i32, depth: i32, _ply: i32) -> i32 {
 
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
     {
-        if is_x86_feature_detected!("avx512bw") {
+        if check_x86_avx512_features() {
             unsafe {
                 if pos_is_winner_avx512(pos) {
                     return -EVAL_INF + _ply;
@@ -571,7 +570,7 @@ fn eval(pos: &Pos, _ply: i32) -> i32 {
     // check if opp has live4 which will win playing next move
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
     {
-        if is_x86_feature_detected!("avx512bw") {
+        if check_x86_avx512_features() {
             unsafe {
                 if check_patternlive4_avx512(pos, def) {
                     return -4096;
@@ -594,7 +593,7 @@ fn eval(pos: &Pos, _ply: i32) -> i32 {
     // check if self has live4 which will win playing next move
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
     {
-        if is_x86_feature_detected!("avx512bw") {
+        if check_x86_avx512_features() {
             unsafe {
                 if check_patternlive4_avx512(pos, atk) {
                     return 2560;
@@ -617,7 +616,7 @@ fn eval(pos: &Pos, _ply: i32) -> i32 {
     // check if self has dead4 which will win playing next move
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
     {
-        if is_x86_feature_detected!("avx512bw") {
+        if check_x86_avx512_features() {
             unsafe {
                 if check_patterndead4_avx512(pos, atk) > 0 {
                     return 2560;
@@ -639,7 +638,7 @@ fn eval(pos: &Pos, _ply: i32) -> i32 {
 
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
     {
-        if is_x86_feature_detected!("avx512bw") {
+        if check_x86_avx512_features() {
             unsafe {
                 let n_c4: i32 = check_patterndead4_avx512(pos, def);
                 let n_c3: i32 = check_patternlive3_avx512(pos, def);
@@ -854,16 +853,18 @@ fn check_patternlive3(pos: &Pos, sd: Side) -> i32 {
     n
 }
 
-#[target_feature(enable = "avx512f,avx512bw")]
+#[target_feature(enable = "avx512f,avx512bw,popcnt")]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-unsafe fn pos_is_winner_avx512(pos: &Pos) -> bool {
+fn pos_is_winner_avx512(pos: &Pos) -> bool {
     let current_side = side_opp(pos.p_turn);
     let coloridx = current_side as usize;
 
-    let board0org: [__m512i; 2] = [
-        _mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]),
-        _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0]),
-    ]; // load states from bitboard
+    let board0org: [__m512i; 2] = unsafe {
+        [
+            _mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]),
+            _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0]),
+        ]
+    }; // load states from bitboard
 
     #[rustfmt::skip]
     let answer = _mm512_set1_epi16((1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)); // an unbroken chain of five moves
@@ -928,9 +929,9 @@ unsafe fn pos_is_winner_avx512(pos: &Pos) -> bool {
     count_match > 0
 }
 
-#[target_feature(enable = "avx512f,avx512bw")]
+#[target_feature(enable = "avx512f,avx512bw,popcnt")]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-unsafe fn check_patternlive4_avx512(pos: &Pos, sd: Side) -> bool {
+fn check_patternlive4_avx512(pos: &Pos, sd: Side) -> bool {
     let coloridx = sd as usize;
     let emptyidx = Color::Empty as usize;
 
@@ -952,14 +953,18 @@ unsafe fn check_patternlive4_avx512(pos: &Pos, sd: Side) -> bool {
                                         0b00_10_10_11_11_11_11_11_10_10_10_10_10_11_11_10,
                                         0b00_10_10_10_11_11_11_10_10_10_10_10_11_11_11_10,
                                         0b00_10_10_10_10_11_10_10_10_10_10_11_11_11_11_10];
-    let board0org: [__m512i; 2] = [
-        _mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]),
-        _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0]),
-    ];
-    let board1org: [__m512i; 2] = [
-        _mm512_loadu_epi32(&pos.bitboard[emptyidx][0][0]),
-        _mm512_loadu_epi32(&pos.bitboard[emptyidx][1][0]),
-    ];
+    let board0org: [__m512i; 2] = unsafe {
+        [
+            _mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]),
+            _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0]),
+        ]
+    };
+    let board1org: [__m512i; 2] = unsafe {
+        [
+            _mm512_loadu_epi32(&pos.bitboard[emptyidx][0][0]),
+            _mm512_loadu_epi32(&pos.bitboard[emptyidx][1][0]),
+        ]
+    };
 
     let mut count_match: i32 = 0;
 
@@ -990,9 +995,9 @@ unsafe fn check_patternlive4_avx512(pos: &Pos, sd: Side) -> bool {
     count_match > 0
 }
 
-#[target_feature(enable = "avx512f,avx512bw")]
+#[target_feature(enable = "avx512f,avx512bw,popcnt")]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-unsafe fn check_patterndead4_avx512(pos: &Pos, sd: Side) -> i32 {
+fn check_patterndead4_avx512(pos: &Pos, sd: Side) -> i32 {
     let coloridx = sd as usize;
     let emptyidx = Color::Empty as usize;
 
@@ -1023,14 +1028,18 @@ unsafe fn check_patterndead4_avx512(pos: &Pos, sd: Side) -> i32 {
                                         0b00_10_10_11_11_11_11_11_10_10_10_10_11_11_11_10,
                                         0b00_10_10_10_11_11_11_10_10_10_10_11_11_11_11_10,
                                         0b00_10_10_10_10_11_10_10_10_10_11_11_11_11_11_10];
-    let board0org: [__m512i; 2] = [
-        _mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]),
-        _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0]),
-    ];
-    let board1org: [__m512i; 2] = [
-        _mm512_loadu_epi32(&pos.bitboard[emptyidx][0][0]),
-        _mm512_loadu_epi32(&pos.bitboard[emptyidx][1][0]),
-    ];
+    let board0org: [__m512i; 2] = unsafe {
+        [
+            _mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]),
+            _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0]),
+        ]
+    };
+    let board1org: [__m512i; 2] = unsafe {
+        [
+            _mm512_loadu_epi32(&pos.bitboard[emptyidx][0][0]),
+            _mm512_loadu_epi32(&pos.bitboard[emptyidx][1][0]),
+        ]
+    };
 
     let mut count_match: i32 = 0;
 
@@ -1063,16 +1072,16 @@ unsafe fn check_patterndead4_avx512(pos: &Pos, sd: Side) -> i32 {
     count_match
 }
 
-#[target_feature(enable = "avx512f,avx512bw")]
+#[target_feature(enable = "avx512f,avx512bw,popcnt")]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-unsafe fn check_patternlive3_avx512(pos: &Pos, sd: Side) -> i32 {
+fn check_patternlive3_avx512(pos: &Pos, sd: Side) -> i32 {
     let coloridx = sd as usize;
     let emptyidx = Color::Empty as usize;
 
     #[rustfmt::skip]
-    let board0org: [__m512i; 2]  = [_mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]), _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0])];
+    let board0org: [__m512i; 2] = unsafe { [_mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]), _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0])] };
     #[rustfmt::skip]
-    let board1org: [__m512i; 2]  = [_mm512_loadu_epi32(&pos.bitboard[emptyidx][0][0]), _mm512_loadu_epi32(&pos.bitboard[emptyidx][1][0])];
+    let board1org: [__m512i; 2] = unsafe { [_mm512_loadu_epi32(&pos.bitboard[emptyidx][0][0]), _mm512_loadu_epi32(&pos.bitboard[emptyidx][1][0])] };
 
     #[rustfmt::skip]
     let answer_color: [__m512i; 1] = [_mm512_set1_epi16(         (1<<14)|(1<<13)|(1<<12)         )];
@@ -1170,10 +1179,15 @@ unsafe fn check_patternlive3_avx512(pos: &Pos, sd: Side) -> i32 {
     count_match
 }
 
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn check_x86_avx512_features() -> bool {
+    is_x86_feature_detected!("avx512bw") && is_x86_feature_detected!("popcnt")
+}
+
 fn main() {
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
     {
-        if is_x86_feature_detected!("avx512bw") {
+        if check_x86_avx512_features() {
             println!("\n\nThe program is running with avx512f and avx512bw intrinsics\n\n");
         } else {
             println!("\n\nThe program is running with NO intrinsics.\n\n");
diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs
index be42e2e41c9..e393ad72716 100644
--- a/library/stdarch/examples/hex.rs
+++ b/library/stdarch/examples/hex.rs
@@ -29,7 +29,6 @@
     clippy::cast_sign_loss,
     clippy::missing_docs_in_private_items
 )]
-#![allow(unsafe_op_in_unsafe_fn)]
 
 use std::{
     io::{self, Read},
@@ -67,7 +66,7 @@ fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
     #[cfg(target_arch = "wasm32")]
     {
         if true {
-            return unsafe { hex_encode_simd128(src, dst) };
+            return hex_encode_simd128(src, dst);
         }
     }
 
@@ -76,7 +75,9 @@ fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
 
 #[target_feature(enable = "avx2")]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
+fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
+    assert!(dst.len() >= src.len().checked_mul(2).unwrap());
+
     let ascii_zero = _mm256_set1_epi8(b'0' as i8);
     let nines = _mm256_set1_epi8(9);
     let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
@@ -84,7 +85,8 @@ unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a s
 
     let mut i = 0_usize;
     while src.len() >= 32 {
-        let invec = _mm256_loadu_si256(src.as_ptr() as *const _);
+        // SAFETY: the loop condition ensures that we have at least 32 bytes
+        let invec = unsafe { _mm256_loadu_si256(src.as_ptr() as *const _) };
 
         let masked1 = _mm256_and_si256(invec, and4bits);
         let masked2 = _mm256_and_si256(_mm256_srli_epi64(invec, 4), and4bits);
@@ -102,26 +104,34 @@ unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a s
         let res2 = _mm256_unpackhi_epi8(masked2, masked1);
 
         // Store everything into the right destination now
-        let base = dst.as_mut_ptr().add(i * 2);
-        let base1 = base.add(0) as *mut _;
-        let base2 = base.add(16) as *mut _;
-        let base3 = base.add(32) as *mut _;
-        let base4 = base.add(48) as *mut _;
-        _mm256_storeu2_m128i(base3, base1, res1);
-        _mm256_storeu2_m128i(base4, base2, res2);
+        unsafe {
+            // SAFETY: the assertion at the beginning of the function ensures
+            // that `dst` is large enough.
+            let base = dst.as_mut_ptr().add(i * 2);
+            let base1 = base.add(0) as *mut _;
+            let base2 = base.add(16) as *mut _;
+            let base3 = base.add(32) as *mut _;
+            let base4 = base.add(48) as *mut _;
+            _mm256_storeu2_m128i(base3, base1, res1);
+            _mm256_storeu2_m128i(base4, base2, res2);
+        }
+
         src = &src[32..];
         i += 32;
     }
 
     let _ = hex_encode_sse41(src, &mut dst[i * 2..]);
 
-    Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
+    // SAFETY: `dst` only contains ASCII characters
+    unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2])) }
 }
 
 // copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
 #[target_feature(enable = "sse4.1")]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
+fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
+    assert!(dst.len() >= src.len().checked_mul(2).unwrap());
+
     let ascii_zero = _mm_set1_epi8(b'0' as i8);
     let nines = _mm_set1_epi8(9);
     let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
@@ -129,7 +139,8 @@ unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a
 
     let mut i = 0_usize;
     while src.len() >= 16 {
-        let invec = _mm_loadu_si128(src.as_ptr() as *const _);
+        // SAFETY: the loop condition ensures that we have at least 16 bytes
+        let invec = unsafe { _mm_loadu_si128(src.as_ptr() as *const _) };
 
         let masked1 = _mm_and_si128(invec, and4bits);
         let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);
@@ -146,20 +157,27 @@ unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a
         let res1 = _mm_unpacklo_epi8(masked2, masked1);
         let res2 = _mm_unpackhi_epi8(masked2, masked1);
 
-        _mm_storeu_si128(dst.as_mut_ptr().add(i * 2) as *mut _, res1);
-        _mm_storeu_si128(dst.as_mut_ptr().add(i * 2 + 16) as *mut _, res2);
+        unsafe {
+            // SAFETY: the assertion at the beginning of the function ensures
+            // that `dst` is large enough.
+            _mm_storeu_si128(dst.as_mut_ptr().add(i * 2) as *mut _, res1);
+            _mm_storeu_si128(dst.as_mut_ptr().add(i * 2 + 16) as *mut _, res2);
+        }
         src = &src[16..];
         i += 16;
     }
 
     let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
 
-    Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
+    // SAFETY: `dst` only contains ASCII characters
+    unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2])) }
 }
 
 #[cfg(target_arch = "wasm32")]
 #[target_feature(enable = "simd128")]
-unsafe fn hex_encode_simd128<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
+fn hex_encode_simd128<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
+    assert!(dst.len() >= src.len().checked_mul(2).unwrap());
+
     use core_arch::arch::wasm32::*;
 
     let ascii_zero = u8x16_splat(b'0');
@@ -169,7 +187,8 @@ unsafe fn hex_encode_simd128<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'
 
     let mut i = 0_usize;
     while src.len() >= 16 {
-        let invec = v128_load(src.as_ptr() as *const _);
+        // SAFETY: the loop condition ensures that we have at least 16 bytes
+        let invec = unsafe { v128_load(src.as_ptr() as *const _) };
 
         let masked1 = v128_and(invec, and4bits);
         let masked2 = v128_and(u8x16_shr(invec, 4), and4bits);
@@ -193,15 +212,20 @@ unsafe fn hex_encode_simd128<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'
             masked2, masked1,
         );
 
-        v128_store(dst.as_mut_ptr().add(i * 2) as *mut _, res1);
-        v128_store(dst.as_mut_ptr().add(i * 2 + 16) as *mut _, res2);
+        unsafe {
+            // SAFETY: the assertion at the beginning of the function ensures
+            // that `dst` is large enough.
+            v128_store(dst.as_mut_ptr().add(i * 2) as *mut _, res1);
+            v128_store(dst.as_mut_ptr().add(i * 2 + 16) as *mut _, res2);
+        }
         src = &src[16..];
         i += 16;
     }
 
     let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
 
-    Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
+    // SAFETY: `dst` only contains ASCII characters
+    unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2])) }
 }
 
 fn hex_encode_fallback<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
diff --git a/library/stdarch/examples/wasm.rs b/library/stdarch/examples/wasm.rs
index 8ad38f3a031..ed313b15d1e 100644
--- a/library/stdarch/examples/wasm.rs
+++ b/library/stdarch/examples/wasm.rs
@@ -1,7 +1,6 @@
 //! A simple slab allocator for pages in wasm
 
 #![cfg(target_arch = "wasm32")]
-#![allow(unsafe_op_in_unsafe_fn)]
 
 use std::ptr;
 
@@ -11,11 +10,13 @@ static mut HEAD: *mut *mut u8 = 0 as _;
 
 #[unsafe(no_mangle)]
 pub unsafe extern "C" fn page_alloc() -> *mut u8 {
-    if !HEAD.is_null() {
-        let next = *HEAD;
-        let ret = HEAD;
-        HEAD = next as *mut _;
-        return ret as *mut u8;
+    unsafe {
+        if !HEAD.is_null() {
+            let next = *HEAD;
+            let ret = HEAD;
+            HEAD = next as *mut _;
+            return ret as *mut u8;
+        }
     }
 
     let ret = memory_grow(0, 1);
@@ -31,8 +32,10 @@ pub unsafe extern "C" fn page_alloc() -> *mut u8 {
 #[unsafe(no_mangle)]
 pub unsafe extern "C" fn page_free(page: *mut u8) {
     let page = page as *mut *mut u8;
-    *page = HEAD as *mut u8;
-    HEAD = page;
+    unsafe {
+        *page = HEAD as *mut u8;
+        HEAD = page;
+    }
 }
 
 #[unsafe(no_mangle)]
-- 
cgit 1.4.1-3-g733a5


From c0fc23f2d894655ec747c5cde9f798f682903a1a Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Thu, 20 Mar 2025 12:35:48 +0000
Subject: Fix: stabilized version of RISC-V feature macro

RISC-V runtime feature detection macro is stabilized on Rust 1.78.0,
not Rust 1.76.0.
---
 .../crates/std_detect/src/detect/arch/riscv.rs     | 42 +++++++++++-----------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 0980406a7ac..7ecda95d079 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -88,7 +88,7 @@ features! {
     /// * Hypervisor: `"h"`
     ///
     /// [ISA manual]: https://github.com/riscv/riscv-isa-manual/
-    #[stable(feature = "riscv_ratified", since = "1.76.0")]
+    #[stable(feature = "riscv_ratified", since = "1.78.0")]
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32i: "rv32i";
     without cfg check: true;
     /// RV32I Base Integer Instruction Set
@@ -101,9 +101,9 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv64i: "rv64i";
     without cfg check: true;
     /// RV64I Base Integer Instruction Set
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] m: "m";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] m: "m";
     /// "M" Standard Extension for Integer Multiplication and Division
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] a: "a";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] a: "a";
     /// "A" Standard Extension for Atomic Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicsr: "zicsr";
     without cfg check: true;
@@ -121,7 +121,7 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] q: "q";
     without cfg check: true;
     /// "Q" Standard Extension for Quad-Precision Floating-Point
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] c: "c";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] c: "c";
     /// "C" Standard Extension for Compressed Instructions
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfinx: "zfinx";
@@ -174,39 +174,39 @@ features! {
     without cfg check: true;
     /// Hypervisor Extension
 
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zba: "zba";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zba: "zba";
     /// "Zba" Standard Extension for Address Generation Instructions
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zbb: "zbb";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbb: "zbb";
     /// "Zbb" Standard Extension for Basic Bit-Manipulation
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zbc: "zbc";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbc: "zbc";
     /// "Zbc" Standard Extension for Carry-less Multiplication
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zbs: "zbs";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbs: "zbs";
     /// "Zbs" Standard Extension for Single-Bit instructions
 
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zbkb: "zbkb";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkb: "zbkb";
     /// "Zbkb" Standard Extension for Bitmanip instructions for Cryptography
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zbkc: "zbkc";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkc: "zbkc";
     /// "Zbkc" Standard Extension for Carry-less multiply instructions
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zbkx: "zbkx";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkx: "zbkx";
     /// "Zbkx" Standard Extension for Crossbar permutation instructions
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zknd: "zknd";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zknd: "zknd";
     /// "Zknd" Standard Extension for NIST Suite: AES Decryption
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zkne: "zkne";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkne: "zkne";
     /// "Zkne" Standard Extension for NIST Suite: AES Encryption
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zknh: "zknh";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zknh: "zknh";
     /// "Zknh" Standard Extension for NIST Suite: Hash Function Instructions
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zksed: "zksed";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zksed: "zksed";
     /// "Zksed" Standard Extension for ShangMi Suite: SM4 Block Cipher Instructions
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zksh: "zksh";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zksh: "zksh";
     /// "Zksh" Standard Extension for ShangMi Suite: SM3 Hash Function Instructions
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zkr: "zkr";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkr: "zkr";
     /// "Zkr" Standard Extension for Entropy Source Extension
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zkn: "zkn";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkn: "zkn";
     /// "Zkn" Standard Extension for NIST Algorithm Suite
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zks: "zks";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zks: "zks";
     /// "Zks" Standard Extension for ShangMi Algorithm Suite
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zk: "zk";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zk: "zk";
     /// "Zk" Standard Extension for Standard scalar cryptography extension
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.76.0")] zkt: "zkt";
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkt: "zkt";
     /// "Zkt" Standard Extension for Data Independent Execution Latency
 }
-- 
cgit 1.4.1-3-g733a5


From ad03413c3912548e2bf400d91e29bd974e7ecdd7 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui@loongson.cn>
Date: Tue, 18 Mar 2025 13:54:15 +0800
Subject: std_detect: Add target features for LoongArch v1.1

---
 .../crates/std_detect/src/detect/arch/loongarch.rs        | 15 +++++++++++++++
 .../crates/std_detect/src/detect/os/linux/loongarch.rs    | 13 +++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
index c55f4ea272b..5405ff74cf6 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
@@ -11,8 +11,13 @@ features! {
     /// * `"f"`
     /// * `"d"`
     /// * `"frecipe"`
+    /// * `"div32"`
     /// * `"lsx"`
     /// * `"lasx"`
+    /// * `"lam-bh"`
+    /// * `"lamcas"`
+    /// * `"ld-seq-sa"`
+    /// * `"scq"`
     /// * `"lbt"`
     /// * `"lvz"`
     /// * `"ual"`
@@ -23,10 +28,20 @@ features! {
     /// D
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] frecipe: "frecipe";
     /// Frecipe
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] div32: "div32";
+    /// Div32
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lsx: "lsx";
     /// LSX
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lasx: "lasx";
     /// LASX
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lam_bh: "lam-bh";
+    /// LAM-BH
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lamcas: "lamcas";
+    /// LAM-CAS
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] ld_seq_sa: "ld-seq-sa";
+    /// LD-SEQ-SA
+    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] scq: "scq";
+    /// SCQ
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lbt: "lbt";
     /// LBT
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lvz: "lvz";
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
index a46a4a9d087..14cc7a73183 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs
@@ -24,7 +24,20 @@ pub(crate) fn detect_features() -> cache::Initializer {
             options(pure, nomem, preserves_flags, nostack)
         );
     }
+    let cpucfg3: usize;
+    unsafe {
+        asm!(
+            "cpucfg {}, {}",
+            out(reg) cpucfg3, in(reg) 3,
+            options(pure, nomem, preserves_flags, nostack)
+        );
+    }
     enable_feature(&mut value, Feature::frecipe, bit::test(cpucfg2, 25));
+    enable_feature(&mut value, Feature::div32, bit::test(cpucfg2, 26));
+    enable_feature(&mut value, Feature::lam_bh, bit::test(cpucfg2, 27));
+    enable_feature(&mut value, Feature::lamcas, bit::test(cpucfg2, 28));
+    enable_feature(&mut value, Feature::scq, bit::test(cpucfg2, 30));
+    enable_feature(&mut value, Feature::ld_seq_sa, bit::test(cpucfg3, 23));
 
     // The values are part of the platform-specific [asm/hwcap.h][hwcap]
     //
-- 
cgit 1.4.1-3-g733a5


From a9135c16348785e372f16402ed70f192906c874f Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Tue, 25 Mar 2025 03:09:19 +0530
Subject: Temporary fix: change the feature gate of VEX variants

---
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 38d7ebc7d34..6c45c76e9db 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -197,15 +197,15 @@ features! {
     /// AVX-512 P2INTERSECT
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512fp16: "avx512fp16";
     /// AVX-512 FP16 (FLOAT16 instructions)
-    @FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxifma: "avxifma";
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "44839")] avxifma: "avxifma";
     /// AVX-IFMA (Integer Fused Multiply Add)
-    @FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxneconvert: "avxneconvert";
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "44839")] avxneconvert: "avxneconvert";
     /// AVX-NE-CONVERT (Exceptionless Convert)
-    @FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxvnni: "avxvnni";
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "44839")] avxvnni: "avxvnni";
     /// AVX-VNNI (Vector Neural Network Instructions)
-    @FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxvnniint16: "avxvnniint16";
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "44839")] avxvnniint16: "avxvnniint16";
     /// AVX-VNNI_INT8 (VNNI with 16-bit Integers)
-    @FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxvnniint8: "avxvnniint8";
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "44839")] avxvnniint8: "avxvnniint8";
     /// AVX-VNNI_INT16 (VNNI with 8-bit integers)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tile: "amx-tile";
     /// AMX (Advanced Matrix Extensions) - Tile load/store
-- 
cgit 1.4.1-3-g733a5


From 1c136ddc5a3f232031c706425fbcecf2d6f4154f Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Wed, 19 Mar 2025 03:58:00 +0900
Subject: std_detect: Support detecting more features on AArch64 Windows

---
 .../std_detect/src/detect/os/windows/aarch64.rs    | 50 +++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
index 4bda06d6734..937f9f26eed 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs
@@ -19,9 +19,23 @@ pub(crate) fn detect_features() -> cache::Initializer {
     const PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE: u32 = 43;
     const PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE: u32 = 44;
     const PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE: u32 = 45;
+    const PF_ARM_SVE_INSTRUCTIONS_AVAILABLE: u32 = 46;
+    const PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE: u32 = 47;
+    const PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE: u32 = 48;
+    const PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE: u32 = 49;
+    const PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE: u32 = 50;
+    const PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE: u32 = 51;
+    // const PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE: u32 = 52;
+    // const PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE: u32 = 53;
+    const PF_ARM_SVE_B16B16_INSTRUCTIONS_AVAILABLE: u32 = 54;
+    const PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE: u32 = 55;
+    const PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE: u32 = 56;
+    // const PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE: u32 = 57;
+    // const PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE: u32 = 58;
+    // const PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE: u32 = 59;
 
     unsafe extern "system" {
-        pub fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL;
+        fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL;
     }
 
     let mut value = cache::Initializer::default();
@@ -64,6 +78,40 @@ pub(crate) fn detect_features() -> cache::Initializer {
                 Feature::rcpc,
                 IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE) != FALSE,
             );
+            enable_feature(
+                Feature::sve,
+                IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
+            enable_feature(
+                Feature::sve2,
+                IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
+            enable_feature(
+                Feature::sve2p1,
+                IsProcessorFeaturePresent(PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
+            enable_feature(
+                Feature::sve2_aes,
+                IsProcessorFeaturePresent(PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE) != FALSE
+                    && IsProcessorFeaturePresent(PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE)
+                        != FALSE,
+            );
+            enable_feature(
+                Feature::sve2_bitperm,
+                IsProcessorFeaturePresent(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
+            enable_feature(
+                Feature::sve_b16b16,
+                IsProcessorFeaturePresent(PF_ARM_SVE_B16B16_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
+            enable_feature(
+                Feature::sve2_sha3,
+                IsProcessorFeaturePresent(PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
+            enable_feature(
+                Feature::sve2_sm4,
+                IsProcessorFeaturePresent(PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE) != FALSE,
+            );
             // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and
             // pmull support
             let crypto =
-- 
cgit 1.4.1-3-g733a5


From be20f62a204605d798b858ce7f9b32f52f3eab08 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 21 Mar 2025 05:32:09 +0000
Subject: silence `clippy::eq_op` while checking

This error occurs when the RISC-V "A" Extension is being tested.
---
 library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index d3865cb5e9e..2a9671d75dd 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -23,6 +23,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
     //
     // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/riscv/include/asm/hwcap.h
     let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform
+    #[allow(clippy::eq_op)]
     enable_feature(
         &mut value,
         Feature::a,
-- 
cgit 1.4.1-3-g733a5


From 5feb3c989e8da5850b299427d9d072ff93b7c902 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 21 Mar 2025 05:51:01 +0000
Subject: resolve `clippy::doc_lazy_continuation`

This commit adds indentation as suggested by the Clippy warning.
---
 library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 79720a0dee0..45418c4e622 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -45,9 +45,9 @@ pub(crate) struct AuxVec {
 /// There is no perfect way of reading the auxiliary vector.
 ///
 /// - If the `std_detect_dlsym_getauxval` cargo feature is enabled, this will use
-/// `getauxval` if its linked to the binary, and otherwise proceed to a fallback implementation.
-/// When `std_detect_dlsym_getauxval` is disabled, this will assume that `getauxval` is
-/// linked to the binary - if that is not the case the behavior is undefined.
+///   `getauxval` if its linked to the binary, and otherwise proceed to a fallback implementation.
+///   When `std_detect_dlsym_getauxval` is disabled, this will assume that `getauxval` is
+///   linked to the binary - if that is not the case the behavior is undefined.
 /// - Otherwise, if the `std_detect_file_io` cargo feature is enabled, it will
 ///   try to read `/proc/self/auxv`.
 /// - If that fails, this function returns an error.
-- 
cgit 1.4.1-3-g733a5


From 14fc81b85fb31cfbc7dc5f33e0d9fd8dad36d997 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 21 Mar 2025 03:48:43 +0000
Subject: reorder all RISC-V features for maintenance

All RISC-V Features are reordered for better maintainability.
The author has a plan to add many RISC-V ratified extensions (mainly
discoverable from Linux) and this is a part of preparation.

Sections are divided as follows:

*   Base ISAs
*   "I"-related
    *   Extensions formerly a part of the base "I" extension
        but divided later (now all of them are ratified).
    *   Other user-mode extensions "Zi*".
*   "M"-related (currently "M" only)
*   "A"-related
    "A", "Za*" and "Ztso" which is named differently but absolutely
    related to memory operations.
*   Base FP extensions
*   Base FP extensions using integer registers
*   "C"-related (currently "C" only)
*   "B"-related (except cryptography-related "Zbk*")
*   Scalar cryptography extensions (including "Zbk*")
*   Base Vector extensions (currently "V" only)
*   Ratified privileged extensions
*   Non-extensions and non-ratified extensions which is *not*
    going to be ratified, at least in the draft form

The last section needs some explanation.

"S" is not an extension (although some buggy implementations such as QEMU
up to 7.0 emitted this character as well as "U" as an extension) and the
DeviceTree parser in the Linux kernel explicitly workarounds this issue.

There's no plan for ratification of the single-letter "J" extension
(there's a room for redefinition like the "B" extension but unlikely).
Instead, pointer masking extensions including "Supm" is one of the results
of the task group discussing J extension*s*.
There's also an instruction in the "Zfa" extension which accelerates
FP-to-int conversion matching JavaScript semantics.

"P" is being actively discussed (and will result in a single-letter "P"
extension and various "Zp*" extensions) but it seems there needs some time
until ratification.
And there's one Rust-specific issue: Rust implements Packed-SIMD intrinsics
based on an early draft of the "P" extension and they are *very unlikely*
kept as-is.  For instance, `add16` does not follow standard RISC-V
instruction naming (ADD16 is the name from the Andes' proposal) and
going to be renamed.

Before moving "P" to above, we have to clearly understand what the final
"P" extension will be and resolve existing intrinsics.
---
 .../crates/std_detect/src/detect/arch/riscv.rs     | 110 +++++++++++----------
 1 file changed, 59 insertions(+), 51 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 7ecda95d079..2d9505611e3 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -89,22 +89,20 @@ features! {
     ///
     /// [ISA manual]: https://github.com/riscv/riscv-isa-manual/
     #[stable(feature = "riscv_ratified", since = "1.78.0")]
+
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32i: "rv32i";
     without cfg check: true;
     /// RV32I Base Integer Instruction Set
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zifencei: "zifencei";
-    without cfg check: true;
-    /// "Zifencei" Instruction-Fetch Fence
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintpause: "zihintpause";
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32e: "rv32e";
     without cfg check: true;
-    /// "Zihintpause" Pause Hint
+    /// RV32E Base Integer Instruction Set
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv64i: "rv64i";
     without cfg check: true;
     /// RV64I Base Integer Instruction Set
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] m: "m";
-    /// "M" Standard Extension for Integer Multiplication and Division
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] a: "a";
-    /// "A" Standard Extension for Atomic Instructions
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv128i: "rv128i";
+    without cfg check: true;
+    /// RV128I Base Integer Instruction Set
+
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicsr: "zicsr";
     without cfg check: true;
     /// "Zicsr", Control and Status Register (CSR) Instructions
@@ -114,6 +112,26 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihpm: "zihpm";
     without cfg check: true;
     /// "Zihpm", Standard Extension for Hardware Performance Counters
+
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zifencei: "zifencei";
+    without cfg check: true;
+    /// "Zifencei" Instruction-Fetch Fence
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintpause: "zihintpause";
+    without cfg check: true;
+    /// "Zihintpause" Pause Hint
+
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] m: "m";
+    /// "M" Standard Extension for Integer Multiplication and Division
+
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] a: "a";
+    /// "A" Standard Extension for Atomic Instructions
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zam: "zam";
+    without cfg check: true;
+    /// "Zam" Standard Extension for Misaligned Atomics
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] ztso: "ztso";
+    without cfg check: true;
+    /// "Ztso" Standard Extension for Total Store Ordering
+
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] f: "f";
     /// "F" Standard Extension for Single-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] d: "d";
@@ -121,8 +139,10 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] q: "q";
     without cfg check: true;
     /// "Q" Standard Extension for Quad-Precision Floating-Point
-    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] c: "c";
-    /// "C" Standard Extension for Compressed Instructions
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfh: "zfh";
+    /// "Zfh" Standard Extension for 16-Bit Half-Precision Floating-Point
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfhmin: "zfhmin";
+    /// "Zfhmin" Standard Extension for Minimal Half-Precision Floating-Point Support
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfinx: "zfinx";
     /// "Zfinx" Standard Extension for Single-Precision Floating-Point in Integer Registers
@@ -132,47 +152,9 @@ features! {
     /// "Zhinx" Standard Extension for Half-Precision Floating-Point in Integer Registers
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zhinxmin: "zhinxmin";
     /// "Zhinxmin" Standard Extension for Minimal Half-Precision Floating-Point in Integer Registers
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] ztso: "ztso";
-    without cfg check: true;
-    /// "Ztso" Standard Extension for Total Store Ordering
-
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32e: "rv32e";
-    without cfg check: true;
-    /// RV32E Base Integer Instruction Set
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv128i: "rv128i";
-    without cfg check: true;
-    /// RV128I Base Integer Instruction Set
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfh: "zfh";
-    /// "Zfh" Standard Extension for 16-Bit Half-Precision Floating-Point
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfhmin: "zfhmin";
-    /// "Zfhmin" Standard Extension for Minimal Half-Precision Floating-Point Support
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] j: "j";
-    without cfg check: true;
-    /// "J" Standard Extension for Dynamically Translated Languages
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] p: "p";
-    without cfg check: true;
-    /// "P" Standard Extension for Packed-SIMD Instructions
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] v: "v";
-    /// "V" Standard Extension for Vector Operations
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zam: "zam";
-    without cfg check: true;
-    /// "Zam" Standard Extension for Misaligned Atomics
 
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] s: "s";
-    without cfg check: true;
-    /// Supervisor-Level ISA
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svnapot: "svnapot";
-    without cfg check: true;
-    /// "Svnapot" Standard Extension for NAPOT Translation Contiguity
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svpbmt: "svpbmt";
-    without cfg check: true;
-    /// "Svpbmt" Standard Extension for Page-Based Memory Types
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svinval: "svinval";
-    without cfg check: true;
-    /// "Svinval" Standard Extension for Fine-Grained Address-Translation Cache Invalidation
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] h: "h";
-    without cfg check: true;
-    /// Hypervisor Extension
+    @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] c: "c";
+    /// "C" Standard Extension for Compressed Instructions
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zba: "zba";
     /// "Zba" Standard Extension for Address Generation Instructions
@@ -209,4 +191,30 @@ features! {
     /// "Zk" Standard Extension for Standard scalar cryptography extension
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkt: "zkt";
     /// "Zkt" Standard Extension for Data Independent Execution Latency
+
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] v: "v";
+    /// "V" Standard Extension for Vector Operations
+
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svnapot: "svnapot";
+    without cfg check: true;
+    /// "Svnapot" Standard Extension for NAPOT Translation Contiguity
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svpbmt: "svpbmt";
+    without cfg check: true;
+    /// "Svpbmt" Standard Extension for Page-Based Memory Types
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svinval: "svinval";
+    without cfg check: true;
+    /// "Svinval" Standard Extension for Fine-Grained Address-Translation Cache Invalidation
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] h: "h";
+    without cfg check: true;
+    /// Hypervisor Extension
+
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] s: "s";
+    without cfg check: true;
+    /// Supervisor-Level ISA
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] j: "j";
+    without cfg check: true;
+    /// "J" Standard Extension for Dynamically Translated Languages
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] p: "p";
+    without cfg check: true;
+    /// "P" Standard Extension for Packed-SIMD Instructions
 }
-- 
cgit 1.4.1-3-g733a5


From 1c6d764b0ba92df8afe4c7fbd05c6f5d8c621882 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 21 Mar 2025 03:48:43 +0000
Subject: reword RISC-V feature documentation

As the version 20240411 of the RISC-V ISA Manual changed wording to
describe many of the standard extensions, this commit largely follows this
scheme in general.  In many cases, words "Standard Extension" are replaced
with "Extension" following the latest ratified ISA Manual.

Some RISC-V extensions had tentative summary but it also fixes that
(e.g. "Zihintpause").

Following extensions are described in parity with corresponding extensions
using floating-point registers:

*   "Zfinx" Extension for Single-Precision Floating-Point in Integer Registers
*   "Zdinx" Extension for Double-Precision Floating-Point in Integer Registers
*   "Zhinx" Extension for Half-Precision Floating-Point in Integer Registers
*   "Zhinxmin" Extension for Minimal Half-Precision Floating-Point in Integer Registers

Following extensions are named against the ISA Manual naming but
considered inconsistency inside the ISA manual:

*   "Zfhmin" Extension for Minimal Half-Precision Floating-Point
    ISA Manual: "Zfhmin" Standard Extension for Minimal Half-Precision Floating-Point
*   "V" Extension for Vector Operations
    ISA Manual: "V" Standard Extension for Vector Operations

Following extension is removed from the latest ratified ISA Manual but
named like others:

*   "Zam" Extension for Misaligned Atomics

"Zb*" extensions are described like "Extension for ..." using partial
summary per extension (including cryptography-related "Zbk*" extensions).

"Zk*" extensions are described like "Cryptography Extension for ..." using
partial summary per extension (e.g. 'Zkne - NIST Suite: AES Encryption' in
the ISA Manual to '"Zkne" Cryptography Extension for NIST Suite: AES
Encryption') except following extensions:

*   "Zkr" Entropy Source Extension
    Following the general rule will make the description redundant.
*   "Zk" Cryptography Extension for Standard scalar cryptography
    The last word "extension" is removed as seemed redundant.

Link:

<https://lf-riscv.atlassian.net/wiki/spaces/HOME/pages/16154769/RISC-V+Technical+Specifications>
(ISA Specifications, Version 20240411; published in May 2024)
---
 .../crates/std_detect/src/detect/arch/riscv.rs     | 86 +++++++++++-----------
 1 file changed, 43 insertions(+), 43 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 2d9505611e3..dd3f0522dd9 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -105,116 +105,116 @@ features! {
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicsr: "zicsr";
     without cfg check: true;
-    /// "Zicsr", Control and Status Register (CSR) Instructions
+    /// "Zicsr" Extension for Control and Status Register (CSR) Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicntr: "zicntr";
     without cfg check: true;
-    /// "Zicntr", Standard Extension for Base Counters and Timers
+    /// "Zicntr" Extension for Base Counters and Timers
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihpm: "zihpm";
     without cfg check: true;
-    /// "Zihpm", Standard Extension for Hardware Performance Counters
+    /// "Zihpm" Extension for Hardware Performance Counters
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zifencei: "zifencei";
     without cfg check: true;
-    /// "Zifencei" Instruction-Fetch Fence
+    /// "Zifencei" Extension for Instruction-Fetch Fence
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintpause: "zihintpause";
     without cfg check: true;
-    /// "Zihintpause" Pause Hint
+    /// "Zihintpause" Extension for Pause Hint
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] m: "m";
-    /// "M" Standard Extension for Integer Multiplication and Division
+    /// "M" Extension for Integer Multiplication and Division
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] a: "a";
-    /// "A" Standard Extension for Atomic Instructions
+    /// "A" Extension for Atomic Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zam: "zam";
     without cfg check: true;
-    /// "Zam" Standard Extension for Misaligned Atomics
+    /// "Zam" Extension for Misaligned Atomics
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] ztso: "ztso";
     without cfg check: true;
-    /// "Ztso" Standard Extension for Total Store Ordering
+    /// "Ztso" Extension for Total Store Ordering
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] f: "f";
-    /// "F" Standard Extension for Single-Precision Floating-Point
+    /// "F" Extension for Single-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] d: "d";
-    /// "D" Standard Extension for Double-Precision Floating-Point
+    /// "D" Extension for Double-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] q: "q";
     without cfg check: true;
-    /// "Q" Standard Extension for Quad-Precision Floating-Point
+    /// "Q" Extension for Quad-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfh: "zfh";
-    /// "Zfh" Standard Extension for 16-Bit Half-Precision Floating-Point
+    /// "Zfh" Extension for Half-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfhmin: "zfhmin";
-    /// "Zfhmin" Standard Extension for Minimal Half-Precision Floating-Point Support
+    /// "Zfhmin" Extension for Minimal Half-Precision Floating-Point
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfinx: "zfinx";
-    /// "Zfinx" Standard Extension for Single-Precision Floating-Point in Integer Registers
+    /// "Zfinx" Extension for Single-Precision Floating-Point in Integer Registers
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zdinx: "zdinx";
-    /// "Zdinx" Standard Extension for Double-Precision Floating-Point in Integer Registers
+    /// "Zdinx" Extension for Double-Precision Floating-Point in Integer Registers
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zhinx: "zhinx";
-    /// "Zhinx" Standard Extension for Half-Precision Floating-Point in Integer Registers
+    /// "Zhinx" Extension for Half-Precision Floating-Point in Integer Registers
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zhinxmin: "zhinxmin";
-    /// "Zhinxmin" Standard Extension for Minimal Half-Precision Floating-Point in Integer Registers
+    /// "Zhinxmin" Extension for Minimal Half-Precision Floating-Point in Integer Registers
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] c: "c";
-    /// "C" Standard Extension for Compressed Instructions
+    /// "C" Extension for Compressed Instructions
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zba: "zba";
-    /// "Zba" Standard Extension for Address Generation Instructions
+    /// "Zba" Extension for Address Generation
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbb: "zbb";
-    /// "Zbb" Standard Extension for Basic Bit-Manipulation
+    /// "Zbb" Extension for Basic Bit-Manipulation
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbc: "zbc";
-    /// "Zbc" Standard Extension for Carry-less Multiplication
+    /// "Zbc" Extension for Carry-less Multiplication
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbs: "zbs";
-    /// "Zbs" Standard Extension for Single-Bit instructions
+    /// "Zbs" Extension for Single-Bit instructions
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkb: "zbkb";
-    /// "Zbkb" Standard Extension for Bitmanip instructions for Cryptography
+    /// "Zbkb" Extension for Bit-manipulation for Cryptography
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkc: "zbkc";
-    /// "Zbkc" Standard Extension for Carry-less multiply instructions
+    /// "Zbkc" Extension for Carry-less multiplication for Cryptography
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkx: "zbkx";
-    /// "Zbkx" Standard Extension for Crossbar permutation instructions
+    /// "Zbkx" Extension for Crossbar permutations
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zknd: "zknd";
-    /// "Zknd" Standard Extension for NIST Suite: AES Decryption
+    /// "Zknd" Cryptography Extension for NIST Suite: AES Decryption
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkne: "zkne";
-    /// "Zkne" Standard Extension for NIST Suite: AES Encryption
+    /// "Zkne" Cryptography Extension for NIST Suite: AES Encryption
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zknh: "zknh";
-    /// "Zknh" Standard Extension for NIST Suite: Hash Function Instructions
+    /// "Zknh" Cryptography Extension for NIST Suite: Hash Function Instructions
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zksed: "zksed";
-    /// "Zksed" Standard Extension for ShangMi Suite: SM4 Block Cipher Instructions
+    /// "Zksed" Cryptography Extension for ShangMi Suite: SM4 Block Cipher Instructions
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zksh: "zksh";
-    /// "Zksh" Standard Extension for ShangMi Suite: SM3 Hash Function Instructions
+    /// "Zksh" Cryptography Extension for ShangMi Suite: SM3 Hash Function Instructions
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkr: "zkr";
-    /// "Zkr" Standard Extension for Entropy Source Extension
+    /// "Zkr" Entropy Source Extension
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkn: "zkn";
-    /// "Zkn" Standard Extension for NIST Algorithm Suite
+    /// "Zkn" Cryptography Extension for NIST Algorithm Suite
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zks: "zks";
-    /// "Zks" Standard Extension for ShangMi Algorithm Suite
+    /// "Zks" Cryptography Extension for ShangMi Algorithm Suite
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zk: "zk";
-    /// "Zk" Standard Extension for Standard scalar cryptography extension
+    /// "Zk" Cryptography Extension for Standard scalar cryptography
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkt: "zkt";
-    /// "Zkt" Standard Extension for Data Independent Execution Latency
+    /// "Zkt" Cryptography Extension for Data Independent Execution Latency
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] v: "v";
-    /// "V" Standard Extension for Vector Operations
+    /// "V" Extension for Vector Operations
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svnapot: "svnapot";
     without cfg check: true;
-    /// "Svnapot" Standard Extension for NAPOT Translation Contiguity
+    /// "Svnapot" Extension for NAPOT Translation Contiguity
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svpbmt: "svpbmt";
     without cfg check: true;
-    /// "Svpbmt" Standard Extension for Page-Based Memory Types
+    /// "Svpbmt" Extension for Page-Based Memory Types
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svinval: "svinval";
     without cfg check: true;
-    /// "Svinval" Standard Extension for Fine-Grained Address-Translation Cache Invalidation
+    /// "Svinval" Extension for Fine-Grained Address-Translation Cache Invalidation
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] h: "h";
     without cfg check: true;
-    /// Hypervisor Extension
+    /// "H" Extension for Hypervisor Support
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] s: "s";
     without cfg check: true;
     /// Supervisor-Level ISA
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] j: "j";
     without cfg check: true;
-    /// "J" Standard Extension for Dynamically Translated Languages
+    /// "J" Extension for Dynamically Translated Languages
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] p: "p";
     without cfg check: true;
-    /// "P" Standard Extension for Packed-SIMD Instructions
+    /// "P" Extension for Packed-SIMD Instructions
 }
-- 
cgit 1.4.1-3-g733a5


From 55fbe86255ad5079d9235349618baf63b55c8788 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 21 Mar 2025 12:10:41 +0000
Subject: tentatively remove the "B" RISC-V extension from the documentation

Although the "B" extension is redefined and ratified, keeping this in the
documentation as-is have two issues:

*   "B" extension is not added to `riscv.rs` yet (to be added later).
*   "B" extension is ratified as a combination of "Zba", "Zbb" and "Zbs"
    extensions and "Zbc" is *not* a part of "B" itself (despite that
    it is listed under "B"), which makes the documentation misleading.

This commit tentatively removes the reference to the "B" extension and
replaced with "Bit Manipulation Extensions" without an extension name.
---
 library/stdarch/crates/std_detect/src/detect/arch/riscv.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index dd3f0522dd9..2368131fea8 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -28,7 +28,7 @@ features! {
     /// * RV32I: `"rv32i"`
     /// * RV64I: `"rv64i"`
     /// * A: `"a"`
-    /// * B: `"b"`
+    /// * Bit-Manipulation Extensions:
     ///   * Zba: `"zba"`
     ///   * Zbb: `"zbb"`
     ///   * Zbc: `"zbc"`
-- 
cgit 1.4.1-3-g733a5


From 0965a880c2c3a122d3d3ef06b4f58ae2fd9144f6 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Tue, 18 Mar 2025 22:15:08 +0900
Subject: std_detect: Always avoid dlsym on *-linux-{musl,ohos}* targets

---
 library/stdarch/ci/build-std-detect.sh             |  6 +++++
 library/stdarch/crates/std_detect/README.md        |  9 ++++---
 .../std_detect/src/detect/os/linux/auxvec.rs       | 30 ++++++++++++++++------
 3 files changed, 34 insertions(+), 11 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/ci/build-std-detect.sh b/library/stdarch/ci/build-std-detect.sh
index e4560b3371b..e79a497cc35 100755
--- a/library/stdarch/ci/build-std-detect.sh
+++ b/library/stdarch/ci/build-std-detect.sh
@@ -10,6 +10,12 @@ set -ex
 cd "$(dirname "$0")"/..
 
 targets=(
+    # Linux
+    aarch64-unknown-linux-musl
+    armv5te-unknown-linux-musleabi
+    aarch64-unknown-linux-ohos
+    armv7-unknown-linux-ohos
+
     # Android
     aarch64-linux-android
     arm-linux-androideabi
diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md
index b9146d42c9e..5191b2f6654 100644
--- a/library/stdarch/crates/std_detect/README.md
+++ b/library/stdarch/crates/std_detect/README.md
@@ -30,12 +30,15 @@ run-time feature detection. When this feature is disabled, `std_detect` assumes
 that [`getauxval`] is linked to the binary. If that is not the case the behavior
 is undefined.
 
-  Note: This feature is ignored on `*-linux-gnu*` and `*-android*` targets
+  Note: This feature is ignored on `*-linux-{gnu,musl,ohos}*` and `*-android*` targets
   because we can safely assume `getauxval` is linked to the binary.
   * `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html))
-  have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html).
+    have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html).
+  * `*-linux-musl*` targets ([at least since Rust 1.15](https://github.com/rust-lang/rust/blob/1.15.0/src/ci/docker/x86_64-musl/build-musl.sh#L15))
+    use musl newer than [musl 1.1.0 that added `getauxval`](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.0#n1197)
+  * `*-linux-ohos*` targets use a [fork of musl 1.2](https://gitee.com/openharmony/docs/blob/master/en/application-dev/reference/native-lib/musl.md)
   * `*-android*` targets ([since Rust 1.68](https://blog.rust-lang.org/2023/01/09/android-ndk-update-r25.html))
-  have the minimum supported API level higher than [Android 4.3 (API level 18) that added `getauxval`](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49).
+    have the minimum supported API level higher than [Android 4.3 (API level 18) that added `getauxval`](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49).
 
 * `std_detect_file_io` (enabled by default, requires `std`): Enable to perform run-time feature
 detection using file APIs (e.g. `/proc/cpuinfo`, etc.) if other more performant
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 45418c4e622..e4ffffd7b21 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -58,10 +58,13 @@ pub(crate) struct AuxVec {
 /// feature detection on some platforms.
 ///
 ///  Note: The `std_detect_dlsym_getauxval` cargo feature is ignored on
-/// `*-linux-gnu*` and `*-android*` targets because we can safely assume `getauxval`
+/// `*-linux-{gnu,musl,ohos}*` and `*-android*` targets because we can safely assume `getauxval`
 /// is linked to the binary.
 /// - `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html))
 ///   have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html).
+/// - `*-linux-musl*` targets ([at least since Rust 1.15](https://github.com/rust-lang/rust/blob/1.15.0/src/ci/docker/x86_64-musl/build-musl.sh#L15))
+///   use musl newer than [musl 1.1.0 that added `getauxval`](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.0#n1197)
+/// - `*-linux-ohos*` targets use a [fork of musl 1.2](https://gitee.com/openharmony/docs/blob/master/en/application-dev/reference/native-lib/musl.md)
 /// - `*-android*` targets ([since Rust 1.68](https://blog.rust-lang.org/2023/01/09/android-ndk-update-r25.html))
 ///   have the minimum supported API level higher than [Android 4.3 (API level 18) that added `getauxval`](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49).
 ///
@@ -73,7 +76,10 @@ pub(crate) struct AuxVec {
 pub(crate) fn auxv() -> Result<AuxVec, ()> {
     #[cfg(all(
         feature = "std_detect_dlsym_getauxval",
-        not(all(target_os = "linux", target_env = "gnu")),
+        not(all(
+            target_os = "linux",
+            any(target_env = "gnu", target_env = "musl", target_env = "ohos"),
+        )),
         // TODO: libc crate currently doesn't provide getauxval on 32-bit Android.
         not(all(target_os = "android", target_pointer_width = "64")),
     ))]
@@ -120,12 +126,15 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
         }
     }
 
-    #[cfg(any(
-        not(feature = "std_detect_dlsym_getauxval"),
-        all(target_os = "linux", target_env = "gnu"),
+    #[cfg(not(all(
+        feature = "std_detect_dlsym_getauxval",
+        not(all(
+            target_os = "linux",
+            any(target_env = "gnu", target_env = "musl", target_env = "ohos"),
+        )),
         // TODO: libc crate currently doesn't provide getauxval on 32-bit Android.
-        all(target_os = "android", target_pointer_width = "64"),
-    ))]
+        not(all(target_os = "android", target_pointer_width = "64")),
+    )))]
     {
         // Targets with only AT_HWCAP:
         #[cfg(any(
@@ -184,7 +193,12 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
     test,
     all(
         feature = "std_detect_dlsym_getauxval",
-        not(all(target_os = "linux", target_env = "gnu"))
+        not(all(
+            target_os = "linux",
+            any(target_env = "gnu", target_env = "musl", target_env = "ohos"),
+        )),
+        // TODO: libc crate currently doesn't provide getauxval on 32-bit Android.
+        not(all(target_os = "android", target_pointer_width = "64")),
     )
 ))]
 fn getauxval(key: usize) -> Result<usize, ()> {
-- 
cgit 1.4.1-3-g733a5


From 5b9cdf26df41a7d0d4a94c585df60b86574eaa14 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Wed, 26 Mar 2025 00:49:20 +0900
Subject: std_detect: Move cfgs into getauxval helper function

---
 .../std_detect/src/detect/os/linux/auxvec.rs       | 129 ++++++---------------
 1 file changed, 35 insertions(+), 94 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index e4ffffd7b21..8c911fd8d96 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -74,68 +74,8 @@ pub(crate) struct AuxVec {
 /// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
 /// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/
 pub(crate) fn auxv() -> Result<AuxVec, ()> {
-    #[cfg(all(
-        feature = "std_detect_dlsym_getauxval",
-        not(all(
-            target_os = "linux",
-            any(target_env = "gnu", target_env = "musl", target_env = "ohos"),
-        )),
-        // TODO: libc crate currently doesn't provide getauxval on 32-bit Android.
-        not(all(target_os = "android", target_pointer_width = "64")),
-    ))]
-    {
-        // Try to call a dynamically-linked getauxval function.
-        if let Ok(hwcap) = getauxval(AT_HWCAP) {
-            // Targets with only AT_HWCAP:
-            #[cfg(any(
-                target_arch = "riscv32",
-                target_arch = "riscv64",
-                target_arch = "mips",
-                target_arch = "mips64"
-            ))]
-            {
-                // Zero could indicate that no features were detected, but it's also used to
-                // indicate an error. In either case, try the fallback.
-                if hwcap != 0 {
-                    return Ok(AuxVec { hwcap });
-                }
-            }
-
-            // Targets with AT_HWCAP and AT_HWCAP2:
-            #[cfg(any(
-                target_arch = "aarch64",
-                target_arch = "arm",
-                target_arch = "powerpc",
-                target_arch = "powerpc64",
-                target_arch = "s390x",
-            ))]
-            {
-                if let Ok(hwcap2) = getauxval(AT_HWCAP2) {
-                    // Zero could indicate that no features were detected, but it's also used to
-                    // indicate an error. In particular, on many platforms AT_HWCAP2 will be
-                    // legitimately zero, since it contains the most recent feature flags. Use the
-                    // fallback only if no features were detected at all.
-                    if hwcap != 0 || hwcap2 != 0 {
-                        return Ok(AuxVec { hwcap, hwcap2 });
-                    }
-                }
-            }
-
-            // Intentionnaly not used
-            let _ = hwcap;
-        }
-    }
-
-    #[cfg(not(all(
-        feature = "std_detect_dlsym_getauxval",
-        not(all(
-            target_os = "linux",
-            any(target_env = "gnu", target_env = "musl", target_env = "ohos"),
-        )),
-        // TODO: libc crate currently doesn't provide getauxval on 32-bit Android.
-        not(all(target_os = "android", target_pointer_width = "64")),
-    )))]
-    {
+    // Try to call a getauxval function.
+    if let Ok(hwcap) = getauxval(AT_HWCAP) {
         // Targets with only AT_HWCAP:
         #[cfg(any(
             target_arch = "riscv32",
@@ -145,7 +85,6 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
             target_arch = "loongarch64",
         ))]
         {
-            let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize };
             // Zero could indicate that no features were detected, but it's also used to indicate
             // an error. In either case, try the fallback.
             if hwcap != 0 {
@@ -162,16 +101,19 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
             target_arch = "s390x",
         ))]
         {
-            let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize };
-            let hwcap2 = unsafe { libc::getauxval(AT_HWCAP2 as libc::c_ulong) as usize };
-            // Zero could indicate that no features were detected, but it's also used to indicate
-            // an error. In particular, on many platforms AT_HWCAP2 will be legitimately zero,
-            // since it contains the most recent feature flags. Use the fallback only if no
-            // features were detected at all.
-            if hwcap != 0 || hwcap2 != 0 {
-                return Ok(AuxVec { hwcap, hwcap2 });
+            if let Ok(hwcap2) = getauxval(AT_HWCAP2) {
+                // Zero could indicate that no features were detected, but it's also used to indicate
+                // an error. In particular, on many platforms AT_HWCAP2 will be legitimately zero,
+                // since it contains the most recent feature flags. Use the fallback only if no
+                // features were detected at all.
+                if hwcap != 0 || hwcap2 != 0 {
+                    return Ok(AuxVec { hwcap, hwcap2 });
+                }
             }
         }
+
+        // Intentionnaly not used
+        let _ = hwcap;
     }
 
     #[cfg(feature = "std_detect_file_io")]
@@ -187,32 +129,31 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
 }
 
 /// Tries to read the `key` from the auxiliary vector by calling the
-/// dynamically-linked `getauxval` function. If the function is not linked,
-/// this function return `Err`.
-#[cfg(any(
-    test,
-    all(
-        feature = "std_detect_dlsym_getauxval",
-        not(all(
-            target_os = "linux",
-            any(target_env = "gnu", target_env = "musl", target_env = "ohos"),
-        )),
-        // TODO: libc crate currently doesn't provide getauxval on 32-bit Android.
-        not(all(target_os = "android", target_pointer_width = "64")),
-    )
-))]
+/// `getauxval` function. If the function is not linked, this function return `Err`.
 fn getauxval(key: usize) -> Result<usize, ()> {
-    use libc;
-    pub type F = unsafe extern "C" fn(usize) -> usize;
-    unsafe {
-        let ptr = libc::dlsym(libc::RTLD_DEFAULT, c"getauxval".as_ptr());
-        if ptr.is_null() {
-            return Err(());
+    type F = unsafe extern "C" fn(libc::c_ulong) -> libc::c_ulong;
+    cfg_if::cfg_if! {
+        if #[cfg(all(
+            feature = "std_detect_dlsym_getauxval",
+            not(all(
+                target_os = "linux",
+                any(target_env = "gnu", target_env = "musl", target_env = "ohos"),
+            )),
+            // TODO: libc crate currently doesn't provide getauxval on 32-bit Android.
+            not(all(target_os = "android", target_pointer_width = "64")),
+        ))] {
+            let ffi_getauxval: F = unsafe {
+                let ptr = libc::dlsym(libc::RTLD_DEFAULT, c"getauxval".as_ptr());
+                if ptr.is_null() {
+                    return Err(());
+                }
+                core::mem::transmute(ptr)
+            };
+        } else {
+            let ffi_getauxval: F = libc::getauxval;
         }
-
-        let ffi_getauxval: F = core::mem::transmute(ptr);
-        Ok(ffi_getauxval(key))
     }
+    Ok(unsafe { ffi_getauxval(key as libc::c_ulong) as usize })
 }
 
 /// Tries to read the auxiliary vector from the `file`. If this fails, this
-- 
cgit 1.4.1-3-g733a5


From 6e4ad9cc18591731af2afabafe5b8ccb96e9e57e Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Sun, 6 Apr 2025 07:57:22 +0000
Subject: RISC-V: check cfg (batch 1)

rust-lang/rust#138823 added five new extensions as compiler target features.
This commit reflects that fact and now checks static target features on
`std::arch::is_riscv_feature_detected!` as well.

*   "Zicsr"
*   "Zicntr"
*   "Zihpm"
*   "Zifencei"
*   "Zihintpause"
---
 library/stdarch/crates/std_detect/src/detect/arch/riscv.rs | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 2368131fea8..b8fd007dff5 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -104,20 +104,15 @@ features! {
     /// RV128I Base Integer Instruction Set
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicsr: "zicsr";
-    without cfg check: true;
     /// "Zicsr" Extension for Control and Status Register (CSR) Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicntr: "zicntr";
-    without cfg check: true;
     /// "Zicntr" Extension for Base Counters and Timers
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihpm: "zihpm";
-    without cfg check: true;
     /// "Zihpm" Extension for Hardware Performance Counters
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zifencei: "zifencei";
-    without cfg check: true;
     /// "Zifencei" Extension for Instruction-Fetch Fence
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintpause: "zihintpause";
-    without cfg check: true;
     /// "Zihintpause" Extension for Pause Hint
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] m: "m";
-- 
cgit 1.4.1-3-g733a5


From fbd13bd08c45fd61ac672e533503ed57d0436261 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Tue, 1 Apr 2025 23:54:31 +0530
Subject: Add feature detection for new amx variants and movrs

---
 .../crates/std_detect/src/detect/arch/x86.rs       | 22 ++++++++++++++--
 .../stdarch/crates/std_detect/src/detect/os/x86.rs | 29 ++++++++++++++++------
 .../crates/std_detect/tests/x86-specific.rs        | 12 ++++++++-
 3 files changed, 53 insertions(+), 10 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 6c45c76e9db..7fdb72bb4ae 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -89,6 +89,11 @@ features! {
     /// * `"amx-bf16"`
     /// * `"amx-fp16"`
     /// * `"amx-complex"`
+    /// * `"amx-avx512"`
+    /// * `"amx-fp8"`
+    /// * `"amx-movrs"`
+    /// * `"amx-tf32"`
+    /// * `"amx-transpose"`
     /// * `"f16c"`
     /// * `"fma"`
     /// * `"bmi1"`
@@ -109,6 +114,8 @@ features! {
     /// * `"rtm"`
     /// * `"movbe"`
     /// * `"ermsb"`
+    /// * `"movrs"`
+    /// * `"xop"`
     ///
     /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
     #[stable(feature = "simd_x86", since = "1.27.0")]
@@ -177,8 +184,7 @@ features! {
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi: "avx512vbmi";
     /// AVX-512 VBMI (Vector Byte Manipulation Instructions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpopcntdq: "avx512vpopcntdq";
-    /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
-    /// Quadword)
+    /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and Quadword)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi2: "avx512vbmi2";
     /// AVX-512 VBMI2 (Additional byte, word, dword and qword capabilities)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] gfni: "gfni";
@@ -217,6 +223,16 @@ features! {
     /// AMX-FP16 (Float16 Operations)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_complex: "amx-complex";
     /// AMX-COMPLEX (Complex number Operations)
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_avx512: "amx-avx512";
+    /// AMX-AVX512 (AVX512 operations extended to matrices)
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp8: "amx-fp8";
+    /// AMX-FP8 (Float8 Operations)
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_movrs: "amx-movrs";
+    /// AMX-MOVRS (Matrix MOVERS operations)
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tf32: "amx-tf32";
+    /// AMX-TF32 (TensorFloat32 Operations)
+    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose";
+    /// AMX-TRANSPOSE (Matrix Transpose Operations)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c";
     /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma";
@@ -253,6 +269,8 @@ features! {
     /// RTM, Intel (Restricted Transactional Memory)
     @FEATURE: #[stable(feature = "movbe_target_feature", since = "1.67.0")] movbe: "movbe";
     /// MOVBE (Move Data After Swapping Bytes)
+    @FEATURE: #[unstable(feature = "movrs_target_feature", issue = "137976")] movrs: "movrs";
+    /// MOVRS (Move data with the read-shared hint)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ermsb: "ermsb";
     /// ERMSB, Enhanced REP MOVSB and STOSB
     @FEATURE: #[unstable(feature = "xop_target_feature", issue = "127208")] xop: "xop";
diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index bb6a44b6438..e48d04ad004 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -141,6 +141,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
         enable(extended_features_ebx, 9, Feature::ermsb);
 
+        enable(extended_features_eax_leaf_1, 31, Feature::movrs);
+
         // Detect if CPUID.19h available
         if bit::test(extended_features_ecx as usize, 23) {
             let CpuidResult { ebx, .. } = unsafe { __cpuid(0x19) };
@@ -250,14 +252,27 @@ pub(crate) fn detect_features() -> cache::Initializer {
                         enable(extended_features_edx, 8, Feature::avx512vp2intersect);
                         enable(extended_features_edx, 23, Feature::avx512fp16);
                         enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16);
+                    }
+                }
+
+                if os_amx_support {
+                    enable(extended_features_edx, 24, Feature::amx_tile);
+                    enable(extended_features_edx, 25, Feature::amx_int8);
+                    enable(extended_features_edx, 22, Feature::amx_bf16);
+                    enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
+                    enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
+
+                    if max_basic_leaf >= 0x1e {
+                        let CpuidResult {
+                            eax: amx_feature_flags_eax,
+                            ..
+                        } = unsafe { __cpuid_count(0x1e_u32, 1) };
 
-                        if os_amx_support {
-                            enable(extended_features_edx, 24, Feature::amx_tile);
-                            enable(extended_features_edx, 25, Feature::amx_int8);
-                            enable(extended_features_edx, 22, Feature::amx_bf16);
-                            enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
-                            enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
-                        }
+                        enable(amx_feature_flags_eax, 4, Feature::amx_fp8);
+                        enable(amx_feature_flags_eax, 5, Feature::amx_transpose);
+                        enable(amx_feature_flags_eax, 6, Feature::amx_tf32);
+                        enable(amx_feature_flags_eax, 7, Feature::amx_avx512);
+                        enable(amx_feature_flags_eax, 8, Feature::amx_movrs);
                     }
                 }
             }
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
index 5f4441f1015..f41f400c100 100644
--- a/library/stdarch/crates/std_detect/tests/x86-specific.rs
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -6,7 +6,8 @@
     sha512_sm_x86,
     x86_amx_intrinsics,
     xop_target_feature,
-    keylocker_x86
+    keylocker_x86,
+    movrs_target_feature
 )]
 
 extern crate cupid;
@@ -97,6 +98,15 @@ fn dump() {
     println!("xop: {:?}", is_x86_feature_detected!("xop"));
     println!("kl: {:?}", is_x86_feature_detected!("kl"));
     println!("widekl: {:?}", is_x86_feature_detected!("widekl"));
+    println!("movrs: {:?}", is_x86_feature_detected!("movrs"));
+    println!("amx-fp8: {:?}", is_x86_feature_detected!("amx-fp8"));
+    println!(
+        "amx-transpose: {:?}",
+        is_x86_feature_detected!("amx-transpose")
+    );
+    println!("amx-tf32: {:?}", is_x86_feature_detected!("amx-tf32"));
+    println!("amx-avx512: {:?}", is_x86_feature_detected!("amx-avx512"));
+    println!("amx-movrs: {:?}", is_x86_feature_detected!("amx-movrs"));
 }
 
 #[cfg(feature = "std_detect_env_override")]
-- 
cgit 1.4.1-3-g733a5


From a721b3ec299e437587d2f191acfe70bc6ce657c7 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Thu, 10 Apr 2025 16:43:26 +0530
Subject: Disable cfg check for the recently-merged target features to allow
 stdarch update

---
 library/stdarch/crates/std_detect/src/detect/arch/riscv.rs | 5 +++++
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs   | 5 +++++
 library/stdarch/crates/std_detect/src/detect/macros.rs     | 2 +-
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index b8fd007dff5..2368131fea8 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -104,15 +104,20 @@ features! {
     /// RV128I Base Integer Instruction Set
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicsr: "zicsr";
+    without cfg check: true;
     /// "Zicsr" Extension for Control and Status Register (CSR) Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicntr: "zicntr";
+    without cfg check: true;
     /// "Zicntr" Extension for Base Counters and Timers
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihpm: "zihpm";
+    without cfg check: true;
     /// "Zihpm" Extension for Hardware Performance Counters
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zifencei: "zifencei";
+    without cfg check: true;
     /// "Zifencei" Extension for Instruction-Fetch Fence
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintpause: "zihintpause";
+    without cfg check: true;
     /// "Zihintpause" Extension for Pause Hint
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] m: "m";
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 7fdb72bb4ae..8658c46987d 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -224,14 +224,19 @@ features! {
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_complex: "amx-complex";
     /// AMX-COMPLEX (Complex number Operations)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_avx512: "amx-avx512";
+    without cfg check: true;
     /// AMX-AVX512 (AVX512 operations extended to matrices)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp8: "amx-fp8";
+    without cfg check: true;
     /// AMX-FP8 (Float8 Operations)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_movrs: "amx-movrs";
+    without cfg check: true;
     /// AMX-MOVRS (Matrix MOVERS operations)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tf32: "amx-tf32";
+    without cfg check: true;
     /// AMX-TF32 (TensorFloat32 Operations)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose";
+    without cfg check: true;
     /// AMX-TRANSPOSE (Matrix Transpose Operations)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c";
     /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 6ce842b31eb..9315b5ea313 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -23,7 +23,7 @@ macro_rules! check_cfg_feature {
         $(cfg!(target_feature = $target_feature_lit);)*
     };
     ($feature:tt, $feature_lit:tt, without cfg check: $feature_cfg_check:literal) => {
-        #[expect(unexpected_cfgs, reason = $feature_lit)]
+        #[allow(unexpected_cfgs, reason = $feature_lit)]
         { cfg!(target_feature = $feature_lit) }
     };
 }
-- 
cgit 1.4.1-3-g733a5


From 897188c1d0d52f7422d0187ce2f033bb9c29a80a Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 11 Apr 2025 01:01:30 +0000
Subject: RISC-V: doc: Capitalize some words for consistency

RISC-V extension names are capitalized for consistency.
---
 library/stdarch/crates/std_detect/src/detect/arch/riscv.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 2368131fea8..e0673165ab7 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -163,14 +163,14 @@ features! {
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbc: "zbc";
     /// "Zbc" Extension for Carry-less Multiplication
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbs: "zbs";
-    /// "Zbs" Extension for Single-Bit instructions
+    /// "Zbs" Extension for Single-Bit Instructions
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkb: "zbkb";
-    /// "Zbkb" Extension for Bit-manipulation for Cryptography
+    /// "Zbkb" Extension for Bit-Manipulation for Cryptography
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkc: "zbkc";
-    /// "Zbkc" Extension for Carry-less multiplication for Cryptography
+    /// "Zbkc" Extension for Carry-less Multiplication for Cryptography
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkx: "zbkx";
-    /// "Zbkx" Extension for Crossbar permutations
+    /// "Zbkx" Extension for Crossbar Permutations
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zknd: "zknd";
     /// "Zknd" Cryptography Extension for NIST Suite: AES Decryption
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkne: "zkne";
@@ -188,7 +188,7 @@ features! {
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zks: "zks";
     /// "Zks" Cryptography Extension for ShangMi Algorithm Suite
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zk: "zk";
-    /// "Zk" Cryptography Extension for Standard scalar cryptography
+    /// "Zk" Cryptography Extension for Standard Scalar Cryptography
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkt: "zkt";
     /// "Zkt" Cryptography Extension for Data Independent Execution Latency
 
-- 
cgit 1.4.1-3-g733a5


From 333882ada369dd269df10de2d99669007153d7b7 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 11 Apr 2025 01:01:30 +0000
Subject: RISC-V: doc: Updated status and clarification

Some extensions are ratified at least on the ISA specification version
20240411.  This commit moves such extensions.

This commit also changes that:

1.  Lower indentation of "Zk*" and "Zbk*" extensions to avoid extension
    groups from being misleading inside this section.
2.  Raise indentation of "Zfhmin" and "Zhinxmin" extensions to show that
    they are a strict subset of "Zfh" and "Zhinx" (respectively).
3.  Clarify that "s" is not an extension but a feature notifying
    the existence of the supervisor-level ISA.
4.  Clarify that "h" is not just an existence of the hypervisor-level ISA
    but is also an extension name ("H").
---
 .../crates/std_detect/src/detect/arch/riscv.rs     | 45 ++++++++++------------
 1 file changed, 21 insertions(+), 24 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index e0673165ab7..ed7ea86815a 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -44,34 +44,31 @@ features! {
     /// * Zifencei: `"zifencei"`
     /// * Zihintpause: `"zihintpause"`
     /// * Zihpm: `"zihpm"`
-    /// * Zk: `"zk"`
-    ///   * Zbkb: `"zbkb"`
-    ///   * Zbkc: `"zbkc"`
-    ///   * Zbkx: `"zbkx"`
-    ///   * Zkn: `"zkn"`
-    ///     * Zknd: `"zknd"`
-    ///     * Zkne: `"zkne"`
-    ///     * Zknh: `"zknh"`
-    ///   * Zkr: `"zkr"`
-    ///   * Zks: `"zks"`
-    ///     * Zksed: `"zksed"`
-    ///     * Zksh: `"zksh"`
-    ///   * Zkt: `"zkt"`
-    ///
-    /// There's also bases and extensions marked as standard instruction set,
-    /// but they are in frozen or draft state. These instruction sets are also
-    /// reserved by this macro and can be detected in the future platforms.
-    ///
-    /// Frozen RISC-V instruction sets:
-    ///
     /// * Zfh: `"zfh"`
-    /// * Zfhmin: `"zfhmin"`
+    ///   * Zfhmin: `"zfhmin"`
     /// * Zfinx: `"zfinx"`
     /// * Zdinx: `"zdinx"`
     /// * Zhinx: `"zhinx"`
-    /// * Zhinxmin: `"zhinxmin"`
+    ///   * Zhinxmin: `"zhinxmin"`
+    /// * Zbkb: `"zbkb"`
+    /// * Zbkc: `"zbkc"`
+    /// * Zbkx: `"zbkx"`
+    /// * Zk: `"zk"`
+    /// * Zkn: `"zkn"`
+    ///   * Zknd: `"zknd"`
+    ///   * Zkne: `"zkne"`
+    ///   * Zknh: `"zknh"`
+    /// * Zkr: `"zkr"`
+    /// * Zks: `"zks"`
+    ///   * Zksed: `"zksed"`
+    ///   * Zksh: `"zksh"`
+    /// * Zkt: `"zkt"`
     /// * Ztso: `"ztso"`
     ///
+    /// There's also bases and extensions marked as standard instruction set,
+    /// but they are in frozen or draft state. These instruction sets are also
+    /// reserved by this macro and can be detected in the future platforms.
+    ///
     /// Draft RISC-V instruction sets:
     ///
     /// * RV128I: `"rv128i"`
@@ -81,11 +78,11 @@ features! {
     ///
     /// Defined by Privileged Specification:
     ///
-    /// * Supervisor: `"s"`
+    /// * *Supervisor-Level ISA* (not "S" extension): `"s"`
+    /// * H (hypervisor): `"h"`
     /// * Svnapot: `"svnapot"`
     /// * Svpbmt: `"svpbmt"`
     /// * Svinval: `"svinval"`
-    /// * Hypervisor: `"h"`
     ///
     /// [ISA manual]: https://github.com/riscv/riscv-isa-manual/
     #[stable(feature = "riscv_ratified", since = "1.78.0")]
-- 
cgit 1.4.1-3-g733a5


From 217fdb9d89762656d4271f3e9393c3c1e84dcc9d Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 11 Apr 2025 01:01:30 +0000
Subject: RISC-V: doc: tidying: Move link to the ISA Manual

Not only moving the link to the end of the section, this commit changes
the link so that we can reach to the *ratified* ISA manuals (note that,
while the original URL (GitHub) is a good place to browse the latest
draft, it's not easy to know which is the ratified version; even
"Releases" page is not helpful since it's regularly updated).
---
 library/stdarch/crates/std_detect/src/detect/arch/riscv.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index ed7ea86815a..ec4a26b5f48 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -20,6 +20,8 @@ features! {
     /// ISA prefix X. These sets are highly platform specific and should be
     /// detected with their own platform support crates.
     ///
+    /// [ISA manual]: https://riscv.org/specifications/ratified/
+    ///
     /// # Unprivileged Specification
     ///
     /// The supported ratified RISC-V instruction sets are as follows:
@@ -83,8 +85,6 @@ features! {
     /// * Svnapot: `"svnapot"`
     /// * Svpbmt: `"svpbmt"`
     /// * Svinval: `"svinval"`
-    ///
-    /// [ISA manual]: https://github.com/riscv/riscv-isa-manual/
     #[stable(feature = "riscv_ratified", since = "1.78.0")]
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32i: "rv32i";
-- 
cgit 1.4.1-3-g733a5


From 0b0c0e47f82935225a26f705a9fed74bdb0a01db Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 11 Apr 2025 01:01:30 +0000
Subject: RISC-V: tidying: Fix separation of I-related extensions

The author intended to split:

1.  Former "I" extensions
2.  Other "I"-related extensions

but incorrectly separated between "Zihpm" (a supplement of "Zicntr" which is
a former "I" extension) and "Zifencei" (a former "I" extension) while the
author intended making a separation between "Zifencei" and "Zihintpause"
(not a part of "I").

This commit fixes the separation.
---
 library/stdarch/crates/std_detect/src/detect/arch/riscv.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index ec4a26b5f48..29d8da8e122 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -109,10 +109,10 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihpm: "zihpm";
     without cfg check: true;
     /// "Zihpm" Extension for Hardware Performance Counters
-
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zifencei: "zifencei";
     without cfg check: true;
     /// "Zifencei" Extension for Instruction-Fetch Fence
+
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintpause: "zihintpause";
     without cfg check: true;
     /// "Zihintpause" Extension for Pause Hint
-- 
cgit 1.4.1-3-g733a5


From c36e9de178fe9e01e38dac70578733514f2df37b Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 11 Apr 2025 01:13:44 +0000
Subject: RISC-V: tidying: Prefer more canonical reference

1.  Use canonical kernel.org repository instead of the GitHub mirror.
2.  Refer to the fixed commit to guarantee access.
3.  Use `uapi` part to ensure that the feature detection is primarily
    intended for user-mode programs.
---
 library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index 2a9671d75dd..74129a2f450 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -21,7 +21,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
     // The values are part of the platform-specific [asm/hwcap.h][hwcap]
     //
-    // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/riscv/include/asm/hwcap.h
+    // [hwcap]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwcap.h?h=v6.14
     let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform
     #[allow(clippy::eq_op)]
     enable_feature(
-- 
cgit 1.4.1-3-g733a5


From e35bc48a60b590d68013a1b5b1a8fd2692edaa30 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 11 Apr 2025 01:13:44 +0000
Subject: RISC-V: tidying: Handling of base ISA

This commit makes handling of the base ISA a separate block.

Co-Authored-By: Taiki Endo <te316e89@gmail.com>
---
 .../crates/std_detect/src/detect/os/linux/riscv.rs | 24 +++++++++++++---------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index 74129a2f450..3e406b2c364 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -44,28 +44,32 @@ pub(crate) fn detect_features() -> cache::Initializer {
         &[Feature::f, Feature::zicsr],
         bit::test(auxv.hwcap, (b'f' - b'a').into()),
     );
+    enable_feature(
+        &mut value,
+        Feature::h,
+        bit::test(auxv.hwcap, (b'h' - b'a').into()),
+    );
+    enable_feature(
+        &mut value,
+        Feature::m,
+        bit::test(auxv.hwcap, (b'm' - b'a').into()),
+    );
+
+    // Handle base ISA.
     let has_i = bit::test(auxv.hwcap, (b'i' - b'a').into());
     // If future RV128I is supported, implement with `enable_feature` here
     #[cfg(target_pointer_width = "64")]
     enable_feature(&mut value, Feature::rv64i, has_i);
     #[cfg(target_pointer_width = "32")]
     enable_feature(&mut value, Feature::rv32i, has_i);
+    // FIXME: e is not exposed in any of asm/hwcap.h, uapi/asm/hwcap.h, uapi/asm/hwprobe.h
     #[cfg(target_pointer_width = "32")]
     enable_feature(
         &mut value,
         Feature::rv32e,
         bit::test(auxv.hwcap, (b'e' - b'a').into()),
     );
-    enable_feature(
-        &mut value,
-        Feature::h,
-        bit::test(auxv.hwcap, (b'h' - b'a').into()),
-    );
-    enable_feature(
-        &mut value,
-        Feature::m,
-        bit::test(auxv.hwcap, (b'm' - b'a').into()),
-    );
+
     // FIXME: Auxvec does not show supervisor feature support, but this mode may be useful
     // to detect when Rust is used to write Linux kernel modules.
     // These should be more than Auxvec way to detect supervisor features.
-- 
cgit 1.4.1-3-g733a5


From 5c0c7ac77c4f73a5e60f65771c57b2bbc73759ee Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 11 Apr 2025 01:13:44 +0000
Subject: RISC-V: tidying: Make auxvec-based enablement a block

Because this function will be no longer auxvec-only, this commit adds a
comment to mark auxvec-based part.

It *does not* add a comment to "base ISA" part because it may also use
`riscv_hwprobe`-based results.
---
 library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index 3e406b2c364..6d77c89b475 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -19,6 +19,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
         }
     };
 
+    // Use auxiliary vector to enable single-letter ISA extensions and Zicsr.
     // The values are part of the platform-specific [asm/hwcap.h][hwcap]
     //
     // [hwcap]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwcap.h?h=v6.14
-- 
cgit 1.4.1-3-g733a5


From 2759545fda234ead5598ea280fac8be91caa370b Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 11 Apr 2025 01:13:44 +0000
Subject: RISC-V: Remove `enable_features`

This commit prepares common infrastructure for extension implication by
removing `enable_features` closure which makes each feature test longer
(because it needs extra `value` argument each time we test a feature).

It comes with the overhead to enable each feature separately but later
mitigated by the OS-independent extension implication logic.
---
 .../crates/std_detect/src/detect/os/linux/riscv.rs | 58 +++++-----------------
 1 file changed, 13 insertions(+), 45 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index 6d77c89b475..9851f9f89f6 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -6,18 +6,11 @@ use crate::detect::{Feature, bit, cache};
 /// Read list of supported features from the auxiliary vector.
 pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
-    let enable_feature = |value: &mut cache::Initializer, feature, enable| {
+    let mut enable_feature = |feature, enable| {
         if enable {
             value.set(feature as u32);
         }
     };
-    let enable_features = |value: &mut cache::Initializer, feature_slice: &[Feature], enable| {
-        if enable {
-            for feature in feature_slice {
-                value.set(*feature as u32);
-            }
-        }
-    };
 
     // Use auxiliary vector to enable single-letter ISA extensions and Zicsr.
     // The values are part of the platform-specific [asm/hwcap.h][hwcap]
@@ -25,51 +18,26 @@ pub(crate) fn detect_features() -> cache::Initializer {
     // [hwcap]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwcap.h?h=v6.14
     let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform
     #[allow(clippy::eq_op)]
-    enable_feature(
-        &mut value,
-        Feature::a,
-        bit::test(auxv.hwcap, (b'a' - b'a').into()),
-    );
-    enable_feature(
-        &mut value,
-        Feature::c,
-        bit::test(auxv.hwcap, (b'c' - b'a').into()),
-    );
-    enable_features(
-        &mut value,
-        &[Feature::d, Feature::f, Feature::zicsr],
-        bit::test(auxv.hwcap, (b'd' - b'a').into()),
-    );
-    enable_features(
-        &mut value,
-        &[Feature::f, Feature::zicsr],
-        bit::test(auxv.hwcap, (b'f' - b'a').into()),
-    );
-    enable_feature(
-        &mut value,
-        Feature::h,
-        bit::test(auxv.hwcap, (b'h' - b'a').into()),
-    );
-    enable_feature(
-        &mut value,
-        Feature::m,
-        bit::test(auxv.hwcap, (b'm' - b'a').into()),
-    );
+    enable_feature(Feature::a, bit::test(auxv.hwcap, (b'a' - b'a').into()));
+    enable_feature(Feature::c, bit::test(auxv.hwcap, (b'c' - b'a').into()));
+    let has_d = bit::test(auxv.hwcap, (b'd' - b'a').into());
+    let has_f = bit::test(auxv.hwcap, (b'f' - b'a').into());
+    enable_feature(Feature::d, has_d);
+    enable_feature(Feature::f, has_d | has_f);
+    enable_feature(Feature::zicsr, has_d | has_f);
+    enable_feature(Feature::h, bit::test(auxv.hwcap, (b'h' - b'a').into()));
+    enable_feature(Feature::m, bit::test(auxv.hwcap, (b'm' - b'a').into()));
 
     // Handle base ISA.
     let has_i = bit::test(auxv.hwcap, (b'i' - b'a').into());
     // If future RV128I is supported, implement with `enable_feature` here
     #[cfg(target_pointer_width = "64")]
-    enable_feature(&mut value, Feature::rv64i, has_i);
+    enable_feature(Feature::rv64i, has_i);
     #[cfg(target_pointer_width = "32")]
-    enable_feature(&mut value, Feature::rv32i, has_i);
+    enable_feature(Feature::rv32i, has_i);
     // FIXME: e is not exposed in any of asm/hwcap.h, uapi/asm/hwcap.h, uapi/asm/hwprobe.h
     #[cfg(target_pointer_width = "32")]
-    enable_feature(
-        &mut value,
-        Feature::rv32e,
-        bit::test(auxv.hwcap, (b'e' - b'a').into()),
-    );
+    enable_feature(Feature::rv32e, bit::test(auxv.hwcap, (b'e' - b'a').into()));
 
     // FIXME: Auxvec does not show supervisor feature support, but this mode may be useful
     // to detect when Rust is used to write Linux kernel modules.
-- 
cgit 1.4.1-3-g733a5


From 53e89494b333e5c84101f84c54d48f480e24b09b Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 11 Apr 2025 01:13:44 +0000
Subject: RISC-V: Use `target_arch` for RV(32|64) detection

As Taiki Endo pointed out, there's a problem if we continue using
`target_pointer_width` values to detect an architecture because:

*   There are separate `target_arch`s already and
*   There is an experimental ABI (not ratified though): RV64ILP32.
    cf. <https://lpc.events/event/17/contributions/1475/attachments/1186/2442/rv64ilp32_%20Run%20ILP32%20on%20RV64%20ISA.pdf>

Co-Authored-By: Taiki Endo <te316e89@gmail.com>
---
 library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index 9851f9f89f6..ee74c7c29aa 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -30,13 +30,15 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
     // Handle base ISA.
     let has_i = bit::test(auxv.hwcap, (b'i' - b'a').into());
-    // If future RV128I is supported, implement with `enable_feature` here
-    #[cfg(target_pointer_width = "64")]
+    // If future RV128I is supported, implement with `enable_feature` here.
+    // Note that we should use `target_arch` instead of `target_pointer_width`
+    // to avoid misdetection caused by experimental ABIs such as RV64ILP32.
+    #[cfg(target_arch = "riscv64")]
     enable_feature(Feature::rv64i, has_i);
-    #[cfg(target_pointer_width = "32")]
+    #[cfg(target_arch = "riscv32")]
     enable_feature(Feature::rv32i, has_i);
     // FIXME: e is not exposed in any of asm/hwcap.h, uapi/asm/hwcap.h, uapi/asm/hwprobe.h
-    #[cfg(target_pointer_width = "32")]
+    #[cfg(target_arch = "riscv32")]
     enable_feature(Feature::rv32e, bit::test(auxv.hwcap, (b'e' - b'a').into()));
 
     // FIXME: Auxvec does not show supervisor feature support, but this mode may be useful
-- 
cgit 1.4.1-3-g733a5


From 68c54c19be34ca141f5bae6bc92895e15b5caf39 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 11 Apr 2025 01:13:44 +0000
Subject: RISC-V: Add two "A" extension subsets

The "A" extension comprises instructions provided by the "Zaamo" and
"Zalrsc" extensions.  To prepare for the "Zacas" extension (which provides
compare-and-swap instructions and discoverable from Linux) which depends on
the "Zaamo" extension, it would be better to support those subsets.
---
 library/stdarch/crates/std_detect/src/detect/arch/riscv.rs     | 6 ++++++
 library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs | 5 ++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 29d8da8e122..271700aefa6 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -30,6 +30,8 @@ features! {
     /// * RV32I: `"rv32i"`
     /// * RV64I: `"rv64i"`
     /// * A: `"a"`
+    ///   * Zaamo: `"zaamo"`
+    ///   * Zalrsc: `"zalrsc"`
     /// * Bit-Manipulation Extensions:
     ///   * Zba: `"zba"`
     ///   * Zbb: `"zbb"`
@@ -122,6 +124,10 @@ features! {
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] a: "a";
     /// "A" Extension for Atomic Instructions
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zalrsc: "zalrsc";
+    /// "Zalrsc" Extension for Load-Reserved/Store-Conditional Instructions
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zaamo: "zaamo";
+    /// "Zaamo" Extension for Atomic Memory Operations
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zam: "zam";
     without cfg check: true;
     /// "Zam" Extension for Misaligned Atomics
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index ee74c7c29aa..33deba93355 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -18,7 +18,10 @@ pub(crate) fn detect_features() -> cache::Initializer {
     // [hwcap]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwcap.h?h=v6.14
     let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform
     #[allow(clippy::eq_op)]
-    enable_feature(Feature::a, bit::test(auxv.hwcap, (b'a' - b'a').into()));
+    let has_a = bit::test(auxv.hwcap, (b'a' - b'a').into());
+    enable_feature(Feature::a, has_a);
+    enable_feature(Feature::zalrsc, has_a);
+    enable_feature(Feature::zaamo, has_a);
     enable_feature(Feature::c, bit::test(auxv.hwcap, (b'c' - b'a').into()));
     let has_d = bit::test(auxv.hwcap, (b'd' - b'a').into());
     let has_f = bit::test(auxv.hwcap, (b'f' - b'a').into());
-- 
cgit 1.4.1-3-g733a5


From ed2d3ee924058d4f28e3a5cdbfe5022e927bd223 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Fri, 11 Apr 2025 01:13:44 +0000
Subject: RISC-V: Add placeholder for the "B" extension

The "B" extension is once abandoned (instead, it is ratified as a collection
of "Zb*" extensions).  However, it is later redefined and ratified as a
superset of "Zba", "Zbb" and "Zbs" extensions (but not "Zbc" carry-less
multiplication for limited benefits and implementation cost).

Although non-functional (because feature detection is not yet implemented),
it provides the foundation to implement this extension (along with
straightforward documentation showing subsets of "B").
---
 library/stdarch/crates/std_detect/src/detect/arch/riscv.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 271700aefa6..4b1cbdcda5a 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -32,10 +32,9 @@ features! {
     /// * A: `"a"`
     ///   * Zaamo: `"zaamo"`
     ///   * Zalrsc: `"zalrsc"`
-    /// * Bit-Manipulation Extensions:
+    /// * B: `"b"`
     ///   * Zba: `"zba"`
     ///   * Zbb: `"zbb"`
-    ///   * Zbc: `"zbc"`
     ///   * Zbs: `"zbs"`
     /// * C: `"c"`
     /// * D: `"d"`
@@ -54,6 +53,7 @@ features! {
     /// * Zdinx: `"zdinx"`
     /// * Zhinx: `"zhinx"`
     ///   * Zhinxmin: `"zhinxmin"`
+    /// * Zbc: `"zbc"`
     /// * Zbkb: `"zbkb"`
     /// * Zbkc: `"zbkc"`
     /// * Zbkx: `"zbkx"`
@@ -159,6 +159,9 @@ features! {
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] c: "c";
     /// "C" Extension for Compressed Instructions
 
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] b: "b";
+    without cfg check: true;
+    /// "B" Extension for Bit Manipulation
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zba: "zba";
     /// "Zba" Extension for Address Generation
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbb: "zbb";
-- 
cgit 1.4.1-3-g733a5


From db188b33b31f68c4f41eb45b0e23fd6228ed3a84 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Sun, 13 Apr 2025 04:35:58 +0000
Subject: RISC-V: OS-independent implication logic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds the OS-independent extension implication logic for RISC-V.
It implements:

1.  Regular implication (A → B)
    a.  "the extension A implies the extension B"
    b.  "the extension A requires the extension B"
    c.  "the extension A depends on the extension B"
2.  Extension group or shorthand (A == B1 & B2...)
    a.  "the extension A is shorthand for other extensions: B1, B2..."
    b.  "the extension A comprises instructions provided by B1, B2..."
    This is implemented as (A → B1 & B2... + B1 & B2... → A)
    where the former is a regular implication as required by specifications
    and the latter is a "reverse" implication to improve usability.

and prepares for:

3.  Implication with multiple requirements (A1 & A2... → B)
    a.  "A1 + A2 implies B"
    b.  (implicitly used to implement reverse implication of case 2)

Although it uses macros and iterators, good optimizers turn the series of
implications into fast bit-manipulation operations.

In the case 2 (extension group or shorthand; where a superset extension
is just a collection of other subextensions and provides no features by
a superset itself), specifications do specify that an extension group
implies its members but not vice versa.  However, implying an extension
group from its members would improve usability on the feature detection
(especially when the feature provider does not provide existence of such
extension group but provides existence of its members).

Similar "reverse implication" on RISC-V is implemented on LLVM.

Case 3 is implicitly used to implement reverse implication of case 2 but
there's another use case: implication with multiple requirements like
"Zcf" and "Zcd" extensions (not yet implemented in this crate for now).

To handle extension groups perfectly, we need to loop implication several
times (until they converge; normally 2 times and up to 4 times when we add
most of `riscv_hwprobe`-based features).
To make implementation of that loop possible, `cache::Initializer` is
modified to implement `PartialEq` and `Eq`.
---
 .../stdarch/crates/std_detect/src/detect/cache.rs  |   2 +-
 .../stdarch/crates/std_detect/src/detect/mod.rs    |   3 +
 .../crates/std_detect/src/detect/os/linux/riscv.rs |  17 +--
 .../crates/std_detect/src/detect/os/riscv.rs       | 148 +++++++++++++++++++++
 4 files changed, 158 insertions(+), 12 deletions(-)
 create mode 100644 library/stdarch/crates/std_detect/src/detect/os/riscv.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 3056c28a806..83bcedea612 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -31,7 +31,7 @@ const CACHE_CAPACITY: u32 = 93;
 /// This type is used to initialize the cache
 // The derived `Default` implementation will initialize the field to zero,
 // which is what we want.
-#[derive(Copy, Clone, Default)]
+#[derive(Copy, Clone, Default, PartialEq, Eq)]
 pub(crate) struct Initializer(u128);
 
 // NOTE: the `debug_assert!` would catch that we do not add more Features than
diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs
index a61400a5553..8fd3d957932 100644
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@@ -49,6 +49,9 @@ cfg_if! {
         #[path = "os/x86.rs"]
         mod os;
     } else if #[cfg(all(any(target_os = "linux", target_os = "android"), feature = "libc"))] {
+        #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+        #[path = "os/riscv.rs"]
+        mod riscv;
         #[path = "os/linux/mod.rs"]
         mod os;
     } else if #[cfg(all(target_os = "freebsd", feature = "libc"))] {
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index 33deba93355..6cdc72dff5a 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -1,5 +1,6 @@
 //! Run-time feature detection for RISC-V on Linux.
 
+use super::super::riscv::imply_features;
 use super::auxvec;
 use crate::detect::{Feature, bit, cache};
 
@@ -12,22 +13,16 @@ pub(crate) fn detect_features() -> cache::Initializer {
         }
     };
 
-    // Use auxiliary vector to enable single-letter ISA extensions and Zicsr.
+    // Use auxiliary vector to enable single-letter ISA extensions.
     // The values are part of the platform-specific [asm/hwcap.h][hwcap]
     //
     // [hwcap]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwcap.h?h=v6.14
     let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform
     #[allow(clippy::eq_op)]
-    let has_a = bit::test(auxv.hwcap, (b'a' - b'a').into());
-    enable_feature(Feature::a, has_a);
-    enable_feature(Feature::zalrsc, has_a);
-    enable_feature(Feature::zaamo, has_a);
+    enable_feature(Feature::a, bit::test(auxv.hwcap, (b'a' - b'a').into()));
     enable_feature(Feature::c, bit::test(auxv.hwcap, (b'c' - b'a').into()));
-    let has_d = bit::test(auxv.hwcap, (b'd' - b'a').into());
-    let has_f = bit::test(auxv.hwcap, (b'f' - b'a').into());
-    enable_feature(Feature::d, has_d);
-    enable_feature(Feature::f, has_d | has_f);
-    enable_feature(Feature::zicsr, has_d | has_f);
+    enable_feature(Feature::d, bit::test(auxv.hwcap, (b'd' - b'a').into()));
+    enable_feature(Feature::f, bit::test(auxv.hwcap, (b'f' - b'a').into()));
     enable_feature(Feature::h, bit::test(auxv.hwcap, (b'h' - b'a').into()));
     enable_feature(Feature::m, bit::test(auxv.hwcap, (b'm' - b'a').into()));
 
@@ -48,5 +43,5 @@ pub(crate) fn detect_features() -> cache::Initializer {
     // to detect when Rust is used to write Linux kernel modules.
     // These should be more than Auxvec way to detect supervisor features.
 
-    value
+    imply_features(value)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/riscv.rs
new file mode 100644
index 00000000000..b268eff8a14
--- /dev/null
+++ b/library/stdarch/crates/std_detect/src/detect/os/riscv.rs
@@ -0,0 +1,148 @@
+//! Run-time feature detection utility for RISC-V.
+//!
+//! On RISC-V, full feature detection needs a help of one or more
+//! feature detection mechanisms (usually provided by the operating system).
+//!
+//! RISC-V architecture defines many extensions and some have dependency to others.
+//! More importantly, some of them cannot be enabled without resolving such
+//! dependencies due to limited set of features that such mechanisms provide.
+//!
+//! This module provides an OS-independent utility to process such relations
+//! between RISC-V extensions.
+
+use crate::detect::{Feature, cache};
+
+/// Imply features by the given set of enabled features.
+///
+/// Note that it does not perform any consistency checks including existence of
+/// conflicting extensions and/or complicated requirements.  Eliminating such
+/// inconsistencies is the responsibility of the feature detection logic and
+/// its provider(s).
+pub(crate) fn imply_features(mut value: cache::Initializer) -> cache::Initializer {
+    loop {
+        // Check convergence of the feature flags later.
+        let prev = value;
+
+        // Expect that the optimizer turns repeated operations into
+        // a fewer number of bit-manipulation operations.
+        macro_rules! imply {
+            // Regular implication:
+            // A1 => (B1[, B2...]), A2 => (B1[, B2...]) and so on.
+            ($($from: ident)|+ => $($to: ident)&+) => {
+                if [$(Feature::$from as u32),+].iter().any(|&x| value.test(x)) {
+                    $(
+                        value.set(Feature::$to as u32);
+                    )+
+                }
+            };
+            // Implication with multiple requirements:
+            // A1 && A2 ... => (B1[, B2...]).
+            ($($from: ident)&+ => $($to: ident)&+) => {
+                if [$(Feature::$from as u32),+].iter().all(|&x| value.test(x)) {
+                    $(
+                        value.set(Feature::$to as u32);
+                    )+
+                }
+            };
+        }
+        macro_rules! group {
+            ($group: ident == $($member: ident)&+) => {
+                // Forward implication as defined in the specifications.
+                imply!($group => $($member)&+);
+                // Reverse implication to "group extension" from its members.
+                // This is not a part of specifications but convenient for
+                // feature detection and implemented in e.g. LLVM.
+                imply!($($member)&+ => $group);
+            };
+        }
+
+        /*
+            If a dependency/implication is not explicitly stated in the
+            specification, it is denoted as a comment as follows:
+            "defined as subset":
+                The latter extension is described as a subset of the former
+                (but the evidence is weak).
+        */
+
+        imply!(zbc => zbkc); // defined as subset
+        group!(zkn == zbkb & zbkc & zbkx & zkne & zknd & zknh);
+        group!(zks == zbkb & zbkc & zbkx & zksed & zksh);
+        group!(zk == zkn & zkr & zkt);
+
+        group!(a == zalrsc & zaamo);
+
+        group!(b == zba & zbb & zbs);
+
+        imply!(zhinx => zhinxmin);
+        imply!(zdinx | zhinxmin => zfinx);
+
+        imply!(zfh => zfhmin);
+        imply!(q => d);
+        imply!(d | zfhmin => f);
+
+        imply!(zicntr | zihpm | f | zfinx => zicsr);
+        imply!(s | h => zicsr);
+
+        // Loop until the feature flags converge.
+        if prev == value {
+            return value;
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn simple_direct() {
+        let mut value = cache::Initializer::default();
+        value.set(Feature::f as u32);
+        // F (and other extensions with CSRs) -> Zicsr
+        assert!(imply_features(value).test(Feature::zicsr as u32));
+    }
+
+    #[test]
+    fn simple_indirect() {
+        let mut value = cache::Initializer::default();
+        value.set(Feature::q as u32);
+        // Q -> D, D -> F, F -> Zicsr
+        assert!(imply_features(value).test(Feature::zicsr as u32));
+    }
+
+    #[test]
+    fn group_simple_forward() {
+        let mut value = cache::Initializer::default();
+        // A -> Zalrsc & Zaamo (forward implication)
+        value.set(Feature::a as u32);
+        let value = imply_features(value);
+        assert!(value.test(Feature::zalrsc as u32));
+        assert!(value.test(Feature::zaamo as u32));
+    }
+
+    #[test]
+    fn group_simple_backward() {
+        let mut value = cache::Initializer::default();
+        // Zalrsc & Zaamo -> A (reverse implication)
+        value.set(Feature::zalrsc as u32);
+        value.set(Feature::zaamo as u32);
+        assert!(imply_features(value).test(Feature::a as u32));
+    }
+
+    #[test]
+    fn group_complex_convergence() {
+        let mut value = cache::Initializer::default();
+        // Needs 2 iterations to converge
+        // (and 3rd iteration for convergence checking):
+        // 1.  [Zk] -> Zkn & Zkr & Zkt
+        // 2.  Zkn -> {Zbkb} & {Zbkc} & {Zbkx} & {Zkne} & {Zknd} & {Zknh}
+        value.set(Feature::zk as u32);
+        let value = imply_features(value);
+        assert!(value.test(Feature::zbkb as u32));
+        assert!(value.test(Feature::zbkc as u32));
+        assert!(value.test(Feature::zbkx as u32));
+        assert!(value.test(Feature::zkne as u32));
+        assert!(value.test(Feature::zknd as u32));
+        assert!(value.test(Feature::zknh as u32));
+    }
+}
-- 
cgit 1.4.1-3-g733a5


From d5baf4da917af26c499ef9b09a716958d21cd78d Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Sun, 13 Apr 2025 04:35:58 +0000
Subject: RISC-V: `riscv_hwprobe`-based feature detection on Linux / Android

This commit implements `riscv_hwprobe`-based feature detection as available
on newer versions of the Linux kernel.  It also queries whether the vector
extensions are enabled using `prctl` but this is not supported on QEMU's
userland emulator (as of version 9.2.3) and use the auxiliary vector
as a fallback.

Currently, all extensions discoverable from the Linux kernel version 6.14
and related extension groups (except "Supm", which reports the existence of
`prctl`-based pointer masking control and too OS-dependent) are implemented.

Co-Authored-By: Taiki Endo <te316e89@gmail.com>
---
 library/stdarch/crates/std_detect/README.md        |   5 +-
 .../crates/std_detect/src/detect/arch/riscv.rs     | 130 ++++++++++
 .../crates/std_detect/src/detect/os/linux/riscv.rs | 280 ++++++++++++++++++++-
 .../crates/std_detect/src/detect/os/riscv.rs       |  79 +++++-
 .../crates/std_detect/tests/cpu-detection.rs       |  43 ++++
 5 files changed, 521 insertions(+), 16 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md
index 5191b2f6654..0053d777aac 100644
--- a/library/stdarch/crates/std_detect/README.md
+++ b/library/stdarch/crates/std_detect/README.md
@@ -56,11 +56,14 @@ crate from working on applications in which `std` is not available.
   [`cupid`](https://crates.io/crates/cupid) crate.
 
 * Linux/Android:
-  * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `riscv{32,64}`, `loongarch64`, `s390x`:
+  * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `loongarch64`, `s390x`:
     `std_detect` supports these on Linux by querying ELF auxiliary vectors (using `getauxval`
     when available), and if that fails, by querying `/proc/cpuinfo`.
   * `arm64`: partial support for doing run-time feature detection by directly
     querying `mrs` is implemented for Linux >= 4.11, but not enabled by default.
+  * `riscv{32,64}`:
+    `std_detect` supports these on Linux by querying `riscv_hwprobe`, and
+    by querying ELF auxiliary vectors (using `getauxval` when available).
 
 * FreeBSD:
   * `arm32`, `powerpc64`: `std_detect` supports these on FreeBSD by querying ELF
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 4b1cbdcda5a..e9a6c96af1e 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -37,22 +37,39 @@ features! {
     ///   * Zbb: `"zbb"`
     ///   * Zbs: `"zbs"`
     /// * C: `"c"`
+    ///   * Zca: `"zca"`
+    ///   * Zcd: `"zcd"` (if D is enabled)
+    ///   * Zcf: `"zcf"` (if F is enabled on RV32)
     /// * D: `"d"`
     /// * F: `"f"`
     /// * M: `"m"`
     /// * Q: `"q"`
     /// * V: `"v"`
+    ///   * Zve32x: `"zve32x"`
+    ///   * Zve32f: `"zve32f"`
+    ///   * Zve64x: `"zve64x"`
+    ///   * Zve64f: `"zve64f"`
+    ///   * Zve64d: `"zve64d"`
+    /// * Zicboz: `"zicboz"`
     /// * Zicntr: `"zicntr"`
+    /// * Zicond: `"zicond"`
     /// * Zicsr: `"zicsr"`
     /// * Zifencei: `"zifencei"`
+    /// * Zihintntl: `"zihintntl"`
     /// * Zihintpause: `"zihintpause"`
     /// * Zihpm: `"zihpm"`
+    /// * Zimop: `"zimop"`
+    /// * Zacas: `"zacas"`
+    /// * Zawrs: `"zawrs"`
+    /// * Zfa: `"zfa"`
     /// * Zfh: `"zfh"`
     ///   * Zfhmin: `"zfhmin"`
     /// * Zfinx: `"zfinx"`
     /// * Zdinx: `"zdinx"`
     /// * Zhinx: `"zhinx"`
     ///   * Zhinxmin: `"zhinxmin"`
+    /// * Zcb: `"zcb"`
+    /// * Zcmop: `"zcmop"`
     /// * Zbc: `"zbc"`
     /// * Zbkb: `"zbkb"`
     /// * Zbkc: `"zbkc"`
@@ -67,6 +84,24 @@ features! {
     ///   * Zksed: `"zksed"`
     ///   * Zksh: `"zksh"`
     /// * Zkt: `"zkt"`
+    /// * Zvbb: `"zvbb"`
+    /// * Zvbc: `"zvbc"`
+    /// * Zvfh: `"zvfh"`
+    ///   * Zvfhmin: `"zvfhmin"`
+    /// * Zvkb: `"zvkb"`
+    /// * Zvkg: `"zvkg"`
+    /// * Zvkn: `"zvkn"`
+    ///   * Zvkned: `"zvkned"`
+    ///   * Zvknha: `"zvknha"`
+    ///   * Zvknhb: `"zvknhb"`
+    /// * Zvknc: `"zvknc"`
+    /// * Zvkng: `"zvkng"`
+    /// * Zvks: `"zvks"`
+    ///   * Zvksed: `"zvksed"`
+    ///   * Zvksh: `"zvksh"`
+    /// * Zvksc: `"zvksc"`
+    /// * Zvksg: `"zvksg"`
+    /// * Zvkt: `"zvkt"`
     /// * Ztso: `"ztso"`
     ///
     /// There's also bases and extensions marked as standard instruction set,
@@ -87,6 +122,15 @@ features! {
     /// * Svnapot: `"svnapot"`
     /// * Svpbmt: `"svpbmt"`
     /// * Svinval: `"svinval"`
+    ///
+    /// # Performance Hints
+    ///
+    /// The two features below define performance hints for unaligned
+    /// scalar/vector memory accesses, respectively.  If enabled, it denotes that
+    /// corresponding unaligned memory access is reasonably fast.
+    ///
+    /// * `"unaligned-scalar-mem"`
+    /// * `"unaligned-vector-mem"`
     #[stable(feature = "riscv_ratified", since = "1.78.0")]
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32i: "rv32i";
@@ -102,6 +146,11 @@ features! {
     without cfg check: true;
     /// RV128I Base Integer Instruction Set
 
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] unaligned_scalar_mem: "unaligned-scalar-mem";
+    /// Has reasonably performant unaligned scalar
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] unaligned_vector_mem: "unaligned-vector-mem";
+    /// Has reasonably performant unaligned vector
+
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicsr: "zicsr";
     without cfg check: true;
     /// "Zicsr" Extension for Control and Status Register (CSR) Instructions
@@ -115,9 +164,21 @@ features! {
     without cfg check: true;
     /// "Zifencei" Extension for Instruction-Fetch Fence
 
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintntl: "zihintntl";
+    without cfg check: true;
+    /// "Zihintntl" Extension for Non-Temporal Locality Hints
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintpause: "zihintpause";
     without cfg check: true;
     /// "Zihintpause" Extension for Pause Hint
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zimop: "zimop";
+    without cfg check: true;
+    /// "Zimop" Extension for May-Be-Operations
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicboz: "zicboz";
+    without cfg check: true;
+    /// "Zicboz" Extension for Cache-Block Zero Instruction
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicond: "zicond";
+    without cfg check: true;
+    /// "Zicond" Extension for Integer Conditional Operations
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] m: "m";
     /// "M" Extension for Integer Multiplication and Division
@@ -128,6 +189,10 @@ features! {
     /// "Zalrsc" Extension for Load-Reserved/Store-Conditional Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zaamo: "zaamo";
     /// "Zaamo" Extension for Atomic Memory Operations
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zawrs: "zawrs";
+    /// "Zawrs" Extension for Wait-on-Reservation-Set Instructions
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zacas: "zacas";
+    /// "Zacas" Extension for Atomic Compare-and-Swap (CAS) Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zam: "zam";
     without cfg check: true;
     /// "Zam" Extension for Misaligned Atomics
@@ -146,6 +211,9 @@ features! {
     /// "Zfh" Extension for Half-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfhmin: "zfhmin";
     /// "Zfhmin" Extension for Minimal Half-Precision Floating-Point
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfa: "zfa";
+    without cfg check: true;
+    /// "Zfa" Extension for Additional Floating-Point Instructions
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfinx: "zfinx";
     /// "Zfinx" Extension for Single-Precision Floating-Point in Integer Registers
@@ -158,6 +226,21 @@ features! {
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] c: "c";
     /// "C" Extension for Compressed Instructions
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zca: "zca";
+    without cfg check: true;
+    /// "Zca" Compressed Instructions excluding Floating-Point Loads/Stores
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcf: "zcf";
+    without cfg check: true;
+    /// "Zcf" Compressed Instructions for Single-Precision Floating-Point Loads/Stores on RV32
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcd: "zcd";
+    without cfg check: true;
+    /// "Zcd" Compressed Instructions for Double-Precision Floating-Point Loads/Stores
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcb: "zcb";
+    without cfg check: true;
+    /// "Zcb" Simple Code-size Saving Compressed Instructions
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcmop: "zcmop";
+    without cfg check: true;
+    /// "Zcmop" Extension for Compressed May-Be-Operations
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] b: "b";
     without cfg check: true;
@@ -200,6 +283,53 @@ features! {
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] v: "v";
     /// "V" Extension for Vector Operations
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve32x: "zve32x";
+    /// "Zve32x" Vector Extension for Embedded Processors (32-bit+; Integer)
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve32f: "zve32f";
+    /// "Zve32f" Vector Extension for Embedded Processors (32-bit+; with Single-Precision Floating-Point)
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve64x: "zve64x";
+    /// "Zve64x" Vector Extension for Embedded Processors (64-bit+; Integer)
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve64f: "zve64f";
+    /// "Zve64f" Vector Extension for Embedded Processors (64-bit+; with Single-Precision Floating-Point)
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve64d: "zve64d";
+    /// "Zve64d" Vector Extension for Embedded Processors (64-bit+; with Double-Precision Floating-Point)
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfh: "zvfh";
+    /// "Zvfh" Vector Extension for Half-Precision Floating-Point
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfhmin: "zvfhmin";
+    /// "Zvfhmin" Vector Extension for Minimal Half-Precision Floating-Point
+
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvbb: "zvbb";
+    /// "Zvbb" Extension for Vector Basic Bit-Manipulation
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvbc: "zvbc";
+    /// "Zvbc" Extension for Vector Carryless Multiplication
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkb: "zvkb";
+    /// "Zvkb" Extension for Vector Cryptography Bit-Manipulation
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkg: "zvkg";
+    /// "Zvkg" Cryptography Extension for Vector GCM/GMAC
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkned: "zvkned";
+    /// "Zvkned" Cryptography Extension for NIST Suite: Vector AES Block Cipher
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvknha: "zvknha";
+    /// "Zvknha" Cryptography Extension for Vector SHA-2 Secure Hash (SHA-256)
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvknhb: "zvknhb";
+    /// "Zvknhb" Cryptography Extension for Vector SHA-2 Secure Hash (SHA-256/512)
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksed: "zvksed";
+    /// "Zvksed" Cryptography Extension for ShangMi Suite: Vector SM4 Block Cipher
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksh: "zvksh";
+    /// "Zvksh" Cryptography Extension for ShangMi Suite: Vector SM3 Secure Hash
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkn: "zvkn";
+    /// "Zvkn" Cryptography Extension for NIST Algorithm Suite
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvknc: "zvknc";
+    /// "Zvknc" Cryptography Extension for NIST Algorithm Suite with Carryless Multiply
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkng: "zvkng";
+    /// "Zvkng" Cryptography Extension for NIST Algorithm Suite with GCM
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvks: "zvks";
+    /// "Zvks" Cryptography Extension for ShangMi Algorithm Suite
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksc: "zvksc";
+    /// "Zvksc" Cryptography Extension for ShangMi Algorithm Suite with Carryless Multiply
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksg: "zvksg";
+    /// "Zvksg" Cryptography Extension for ShangMi Algorithm Suite with GCM
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkt: "zvkt";
+    /// "Zvkt" Extension for Vector Data-Independent Execution Latency
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svnapot: "svnapot";
     without cfg check: true;
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index 6cdc72dff5a..5cf7b064e8e 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -1,10 +1,127 @@
 //! Run-time feature detection for RISC-V on Linux.
+//!
+//! On RISC-V, detection using auxv only supports single-letter extensions.
+//! So, we use riscv_hwprobe that supports multi-letter extensions if available.
+//! <https://www.kernel.org/doc/html/latest/arch/riscv/hwprobe.html>
+
+use core::ptr;
 
 use super::super::riscv::imply_features;
 use super::auxvec;
 use crate::detect::{Feature, bit, cache};
 
-/// Read list of supported features from the auxiliary vector.
+// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/prctl.h?h=v6.14>
+// for runtime status query constants.
+const PR_RISCV_V_GET_CONTROL: libc::c_int = 70;
+const PR_RISCV_V_VSTATE_CTRL_ON: libc::c_int = 2;
+const PR_RISCV_V_VSTATE_CTRL_CUR_MASK: libc::c_int = 3;
+
+// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwprobe.h?h=v6.14>
+// for riscv_hwprobe struct and hardware probing constants.
+
+#[repr(C)]
+struct riscv_hwprobe {
+    key: i64,
+    value: u64,
+}
+
+#[allow(non_upper_case_globals)]
+const __NR_riscv_hwprobe: libc::c_long = 258;
+
+const RISCV_HWPROBE_KEY_BASE_BEHAVIOR: i64 = 3;
+const RISCV_HWPROBE_BASE_BEHAVIOR_IMA: u64 = 1 << 0;
+
+const RISCV_HWPROBE_KEY_IMA_EXT_0: i64 = 4;
+const RISCV_HWPROBE_IMA_FD: u64 = 1 << 0;
+const RISCV_HWPROBE_IMA_C: u64 = 1 << 1;
+const RISCV_HWPROBE_IMA_V: u64 = 1 << 2;
+const RISCV_HWPROBE_EXT_ZBA: u64 = 1 << 3;
+const RISCV_HWPROBE_EXT_ZBB: u64 = 1 << 4;
+const RISCV_HWPROBE_EXT_ZBS: u64 = 1 << 5;
+const RISCV_HWPROBE_EXT_ZICBOZ: u64 = 1 << 6;
+const RISCV_HWPROBE_EXT_ZBC: u64 = 1 << 7;
+const RISCV_HWPROBE_EXT_ZBKB: u64 = 1 << 8;
+const RISCV_HWPROBE_EXT_ZBKC: u64 = 1 << 9;
+const RISCV_HWPROBE_EXT_ZBKX: u64 = 1 << 10;
+const RISCV_HWPROBE_EXT_ZKND: u64 = 1 << 11;
+const RISCV_HWPROBE_EXT_ZKNE: u64 = 1 << 12;
+const RISCV_HWPROBE_EXT_ZKNH: u64 = 1 << 13;
+const RISCV_HWPROBE_EXT_ZKSED: u64 = 1 << 14;
+const RISCV_HWPROBE_EXT_ZKSH: u64 = 1 << 15;
+const RISCV_HWPROBE_EXT_ZKT: u64 = 1 << 16;
+const RISCV_HWPROBE_EXT_ZVBB: u64 = 1 << 17;
+const RISCV_HWPROBE_EXT_ZVBC: u64 = 1 << 18;
+const RISCV_HWPROBE_EXT_ZVKB: u64 = 1 << 19;
+const RISCV_HWPROBE_EXT_ZVKG: u64 = 1 << 20;
+const RISCV_HWPROBE_EXT_ZVKNED: u64 = 1 << 21;
+const RISCV_HWPROBE_EXT_ZVKNHA: u64 = 1 << 22;
+const RISCV_HWPROBE_EXT_ZVKNHB: u64 = 1 << 23;
+const RISCV_HWPROBE_EXT_ZVKSED: u64 = 1 << 24;
+const RISCV_HWPROBE_EXT_ZVKSH: u64 = 1 << 25;
+const RISCV_HWPROBE_EXT_ZVKT: u64 = 1 << 26;
+const RISCV_HWPROBE_EXT_ZFH: u64 = 1 << 27;
+const RISCV_HWPROBE_EXT_ZFHMIN: u64 = 1 << 28;
+const RISCV_HWPROBE_EXT_ZIHINTNTL: u64 = 1 << 29;
+const RISCV_HWPROBE_EXT_ZVFH: u64 = 1 << 30;
+const RISCV_HWPROBE_EXT_ZVFHMIN: u64 = 1 << 31;
+const RISCV_HWPROBE_EXT_ZFA: u64 = 1 << 32;
+const RISCV_HWPROBE_EXT_ZTSO: u64 = 1 << 33;
+const RISCV_HWPROBE_EXT_ZACAS: u64 = 1 << 34;
+const RISCV_HWPROBE_EXT_ZICOND: u64 = 1 << 35;
+const RISCV_HWPROBE_EXT_ZIHINTPAUSE: u64 = 1 << 36;
+const RISCV_HWPROBE_EXT_ZVE32X: u64 = 1 << 37;
+const RISCV_HWPROBE_EXT_ZVE32F: u64 = 1 << 38;
+const RISCV_HWPROBE_EXT_ZVE64X: u64 = 1 << 39;
+const RISCV_HWPROBE_EXT_ZVE64F: u64 = 1 << 40;
+const RISCV_HWPROBE_EXT_ZVE64D: u64 = 1 << 41;
+const RISCV_HWPROBE_EXT_ZIMOP: u64 = 1 << 42;
+const RISCV_HWPROBE_EXT_ZCA: u64 = 1 << 43;
+const RISCV_HWPROBE_EXT_ZCB: u64 = 1 << 44;
+const RISCV_HWPROBE_EXT_ZCD: u64 = 1 << 45;
+const RISCV_HWPROBE_EXT_ZCF: u64 = 1 << 46;
+const RISCV_HWPROBE_EXT_ZCMOP: u64 = 1 << 47;
+const RISCV_HWPROBE_EXT_ZAWRS: u64 = 1 << 48;
+// Excluded because it only reports the existence of `prctl`-based pointer masking control.
+// const RISCV_HWPROBE_EXT_SUPM: u64 = 1 << 49;
+
+const RISCV_HWPROBE_KEY_CPUPERF_0: i64 = 5;
+const RISCV_HWPROBE_MISALIGNED_FAST: u64 = 3;
+const RISCV_HWPROBE_MISALIGNED_MASK: u64 = 7;
+
+const RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: i64 = 9;
+const RISCV_HWPROBE_MISALIGNED_SCALAR_FAST: u64 = 3;
+
+const RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF: i64 = 10;
+const RISCV_HWPROBE_MISALIGNED_VECTOR_FAST: u64 = 3;
+
+// syscall returns an unsupported error if riscv_hwprobe is not supported,
+// so we can safely use this function on older versions of Linux.
+fn _riscv_hwprobe(out: &mut [riscv_hwprobe]) -> bool {
+    unsafe fn __riscv_hwprobe(
+        pairs: *mut riscv_hwprobe,
+        pair_count: libc::size_t,
+        cpu_set_size: libc::size_t,
+        cpus: *mut libc::c_ulong,
+        flags: libc::c_uint,
+    ) -> libc::c_long {
+        unsafe {
+            libc::syscall(
+                __NR_riscv_hwprobe,
+                pairs,
+                pair_count,
+                cpu_set_size,
+                cpus,
+                flags,
+            )
+        }
+    }
+
+    let len = out.len();
+    unsafe { __riscv_hwprobe(out.as_mut_ptr(), len, 0, ptr::null_mut(), 0) == 0 }
+}
+
+/// Read list of supported features from (1) the auxiliary vector
+/// and (2) the results of `riscv_hwprobe` and `prctl` system calls.
 pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
     let mut enable_feature = |feature, enable| {
@@ -18,6 +135,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
     //
     // [hwcap]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwcap.h?h=v6.14
     let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform
+    let mut has_i = bit::test(auxv.hwcap, (b'i' - b'a').into());
     #[allow(clippy::eq_op)]
     enable_feature(Feature::a, bit::test(auxv.hwcap, (b'a' - b'a').into()));
     enable_feature(Feature::c, bit::test(auxv.hwcap, (b'c' - b'a').into()));
@@ -25,9 +143,167 @@ pub(crate) fn detect_features() -> cache::Initializer {
     enable_feature(Feature::f, bit::test(auxv.hwcap, (b'f' - b'a').into()));
     enable_feature(Feature::h, bit::test(auxv.hwcap, (b'h' - b'a').into()));
     enable_feature(Feature::m, bit::test(auxv.hwcap, (b'm' - b'a').into()));
+    let has_v = bit::test(auxv.hwcap, (b'v' - b'a').into());
+    let mut is_v_set = false;
+
+    // Use riscv_hwprobe syscall to query more extensions and
+    // performance-related capabilities.
+    'hwprobe: {
+        let mut out = [
+            riscv_hwprobe {
+                key: RISCV_HWPROBE_KEY_BASE_BEHAVIOR,
+                value: 0,
+            },
+            riscv_hwprobe {
+                key: RISCV_HWPROBE_KEY_IMA_EXT_0,
+                value: 0,
+            },
+            riscv_hwprobe {
+                key: RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF,
+                value: 0,
+            },
+            riscv_hwprobe {
+                key: RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF,
+                value: 0,
+            },
+            riscv_hwprobe {
+                key: RISCV_HWPROBE_KEY_CPUPERF_0,
+                value: 0,
+            },
+        ];
+        if !_riscv_hwprobe(&mut out) {
+            break 'hwprobe;
+        }
+
+        // Query scalar/vector misaligned behavior.
+        if out[2].key != -1 {
+            enable_feature(
+                Feature::unaligned_scalar_mem,
+                out[2].value == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST,
+            );
+        } else if out[4].key != -1 {
+            // Deprecated method for fallback
+            enable_feature(
+                Feature::unaligned_scalar_mem,
+                out[4].value & RISCV_HWPROBE_MISALIGNED_MASK == RISCV_HWPROBE_MISALIGNED_FAST,
+            );
+        }
+        if out[3].key != -1 {
+            enable_feature(
+                Feature::unaligned_vector_mem,
+                out[3].value == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST,
+            );
+        }
+
+        // Query whether "I" base and extensions "M" and "A" (as in the ISA
+        // manual version 2.2) are enabled.  "I" base at that time corresponds
+        // to "I", "Zicsr", "Zicntr" and "Zifencei" (as in the ISA manual version
+        // 20240411) and we chose to imply "Zicsr" and "Zifencei" (not "Zicntr")
+        // because there will be a separate RISCV_HWPROBE_EXT_ZICNTR constant to
+        // determine existence of the "Zicntr" extension in Linux 6.15 (as of rc1).
+        // "fence.i" ("Zifencei") is conditionally valid on the Linux userland
+        // (when CMODX is enabled).
+        // This is a requirement of `RISCV_HWPROBE_KEY_IMA_EXT_0`-based tests.
+        let has_ima = (out[0].key != -1) && (out[0].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA != 0);
+        if !has_ima {
+            break 'hwprobe;
+        }
+        has_i |= has_ima;
+        enable_feature(Feature::zicsr, has_ima);
+        enable_feature(Feature::zifencei, has_ima);
+        enable_feature(Feature::m, has_ima);
+        enable_feature(Feature::a, has_ima);
+
+        // Enable features based on `RISCV_HWPROBE_KEY_IMA_EXT_0`.
+        if out[1].key == -1 {
+            break 'hwprobe;
+        }
+        let ima_ext_0 = out[1].value;
+        let test = |mask| (ima_ext_0 & mask) != 0;
+
+        enable_feature(Feature::d, test(RISCV_HWPROBE_IMA_FD)); // F is implied.
+        enable_feature(Feature::c, test(RISCV_HWPROBE_IMA_C));
+
+        enable_feature(Feature::zihintntl, test(RISCV_HWPROBE_EXT_ZIHINTNTL));
+        enable_feature(Feature::zihintpause, test(RISCV_HWPROBE_EXT_ZIHINTPAUSE));
+        enable_feature(Feature::zimop, test(RISCV_HWPROBE_EXT_ZIMOP));
+        enable_feature(Feature::zicboz, test(RISCV_HWPROBE_EXT_ZICBOZ));
+        enable_feature(Feature::zicond, test(RISCV_HWPROBE_EXT_ZICOND));
+
+        enable_feature(Feature::zawrs, test(RISCV_HWPROBE_EXT_ZAWRS));
+        enable_feature(Feature::zacas, test(RISCV_HWPROBE_EXT_ZACAS));
+        enable_feature(Feature::ztso, test(RISCV_HWPROBE_EXT_ZTSO));
+
+        enable_feature(Feature::zba, test(RISCV_HWPROBE_EXT_ZBA));
+        enable_feature(Feature::zbb, test(RISCV_HWPROBE_EXT_ZBB));
+        enable_feature(Feature::zbs, test(RISCV_HWPROBE_EXT_ZBS));
+        enable_feature(Feature::zbc, test(RISCV_HWPROBE_EXT_ZBC));
+
+        enable_feature(Feature::zbkb, test(RISCV_HWPROBE_EXT_ZBKB));
+        enable_feature(Feature::zbkc, test(RISCV_HWPROBE_EXT_ZBKC));
+        enable_feature(Feature::zbkx, test(RISCV_HWPROBE_EXT_ZBKX));
+        enable_feature(Feature::zknd, test(RISCV_HWPROBE_EXT_ZKND));
+        enable_feature(Feature::zkne, test(RISCV_HWPROBE_EXT_ZKNE));
+        enable_feature(Feature::zknh, test(RISCV_HWPROBE_EXT_ZKNH));
+        enable_feature(Feature::zksed, test(RISCV_HWPROBE_EXT_ZKSED));
+        enable_feature(Feature::zksh, test(RISCV_HWPROBE_EXT_ZKSH));
+        enable_feature(Feature::zkt, test(RISCV_HWPROBE_EXT_ZKT));
+
+        enable_feature(Feature::zcmop, test(RISCV_HWPROBE_EXT_ZCMOP));
+        enable_feature(Feature::zca, test(RISCV_HWPROBE_EXT_ZCA));
+        enable_feature(Feature::zcf, test(RISCV_HWPROBE_EXT_ZCF));
+        enable_feature(Feature::zcd, test(RISCV_HWPROBE_EXT_ZCD));
+        enable_feature(Feature::zcb, test(RISCV_HWPROBE_EXT_ZCB));
+
+        enable_feature(Feature::zfh, test(RISCV_HWPROBE_EXT_ZFH));
+        enable_feature(Feature::zfhmin, test(RISCV_HWPROBE_EXT_ZFHMIN));
+        enable_feature(Feature::zfa, test(RISCV_HWPROBE_EXT_ZFA));
+
+        // Use prctl (if any) to determine whether the vector extension
+        // is enabled on the current thread (assuming the entire process
+        // share the same status).  If prctl fails (e.g. QEMU userland emulator
+        // as of version 9.2.3), use auxiliary vector to retrieve the default
+        // vector status on the process startup.
+        let has_vectors = {
+            let v_status = unsafe { libc::prctl(PR_RISCV_V_GET_CONTROL) };
+            if v_status >= 0 {
+                (v_status & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) == PR_RISCV_V_VSTATE_CTRL_ON
+            } else {
+                has_v
+            }
+        };
+        if has_vectors {
+            enable_feature(Feature::v, test(RISCV_HWPROBE_IMA_V));
+            enable_feature(Feature::zve32x, test(RISCV_HWPROBE_EXT_ZVE32X));
+            enable_feature(Feature::zve32f, test(RISCV_HWPROBE_EXT_ZVE32F));
+            enable_feature(Feature::zve64x, test(RISCV_HWPROBE_EXT_ZVE64X));
+            enable_feature(Feature::zve64f, test(RISCV_HWPROBE_EXT_ZVE64F));
+            enable_feature(Feature::zve64d, test(RISCV_HWPROBE_EXT_ZVE64D));
+
+            enable_feature(Feature::zvbb, test(RISCV_HWPROBE_EXT_ZVBB));
+            enable_feature(Feature::zvbc, test(RISCV_HWPROBE_EXT_ZVBC));
+            enable_feature(Feature::zvkb, test(RISCV_HWPROBE_EXT_ZVKB));
+            enable_feature(Feature::zvkg, test(RISCV_HWPROBE_EXT_ZVKG));
+            enable_feature(Feature::zvkned, test(RISCV_HWPROBE_EXT_ZVKNED));
+            enable_feature(Feature::zvknha, test(RISCV_HWPROBE_EXT_ZVKNHA));
+            enable_feature(Feature::zvknhb, test(RISCV_HWPROBE_EXT_ZVKNHB));
+            enable_feature(Feature::zvksed, test(RISCV_HWPROBE_EXT_ZVKSED));
+            enable_feature(Feature::zvksh, test(RISCV_HWPROBE_EXT_ZVKSH));
+            enable_feature(Feature::zvkt, test(RISCV_HWPROBE_EXT_ZVKT));
+
+            enable_feature(Feature::zvfh, test(RISCV_HWPROBE_EXT_ZVFH));
+            enable_feature(Feature::zvfhmin, test(RISCV_HWPROBE_EXT_ZVFHMIN));
+        }
+        is_v_set = true;
+    };
+
+    // Set V purely depending on the auxiliary vector
+    // only if no fine-grained vector extension detection is available.
+    if !is_v_set {
+        enable_feature(Feature::v, has_v);
+    }
 
     // Handle base ISA.
-    let has_i = bit::test(auxv.hwcap, (b'i' - b'a').into());
     // If future RV128I is supported, implement with `enable_feature` here.
     // Note that we should use `target_arch` instead of `target_pointer_width`
     // to avoid misdetection caused by experimental ABIs such as RV64ILP32.
diff --git a/library/stdarch/crates/std_detect/src/detect/os/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/riscv.rs
index b268eff8a14..fc218d51ab5 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/riscv.rs
@@ -62,25 +62,67 @@ pub(crate) fn imply_features(mut value: cache::Initializer) -> cache::Initialize
             "defined as subset":
                 The latter extension is described as a subset of the former
                 (but the evidence is weak).
+            "functional":
+                The former extension is functionally a superset of the latter
+                (no direct references though).
         */
 
+        imply!(zvbb => zvkb);
+
+        // Certain set of vector cryptography extensions form a group.
+        group!(zvkn == zvkned & zvknhb & zvkb & zvkt);
+        group!(zvknc == zvkn & zvbc);
+        group!(zvkng == zvkn & zvkg);
+        group!(zvks == zvksed & zvksh & zvkb & zvkt);
+        group!(zvksc == zvks & zvbc);
+        group!(zvksg == zvks & zvkg);
+
+        imply!(zvknhb => zvknha); // functional
+
+        // For vector cryptography, Zvknhb and Zvbc require integer arithmetic
+        // with EEW=64 (Zve64x) while others not depending on them
+        // require EEW=32 (Zve32x).
+        imply!(zvknhb | zvbc => zve64x);
+        imply!(zvbb | zvkb | zvkg | zvkned | zvknha | zvksed | zvksh => zve32x);
+
         imply!(zbc => zbkc); // defined as subset
         group!(zkn == zbkb & zbkc & zbkx & zkne & zknd & zknh);
         group!(zks == zbkb & zbkc & zbkx & zksed & zksh);
         group!(zk == zkn & zkr & zkt);
 
+        imply!(zacas => zaamo);
         group!(a == zalrsc & zaamo);
 
         group!(b == zba & zbb & zbs);
 
+        imply!(zcf => zca & f);
+        imply!(zcd => zca & d);
+        imply!(zcmop | zcb => zca);
+
         imply!(zhinx => zhinxmin);
         imply!(zdinx | zhinxmin => zfinx);
 
+        imply!(zvfh => zvfhmin); // functional
+        imply!(zvfh => zve32f & zfhmin);
+        imply!(zvfhmin => zve32f);
+
+        imply!(v => zve64d);
+        imply!(zve64d => zve64f & d);
+        imply!(zve64f => zve64x & zve32f);
+        imply!(zve64x => zve32x);
+        imply!(zve32f => zve32x & f);
+
         imply!(zfh => zfhmin);
         imply!(q => d);
-        imply!(d | zfhmin => f);
+        imply!(d | zfhmin | zfa => f);
+
+        // Relatively complex implication rules from the "C" extension.
+        imply!(c => zca);
+        imply!(c & d => zcd);
+        #[cfg(target_arch = "riscv32")]
+        imply!(c & f => zcf);
 
-        imply!(zicntr | zihpm | f | zfinx => zicsr);
+        imply!(zicntr | zihpm | f | zfinx | zve32x => zicsr);
         imply!(s | h => zicsr);
 
         // Loop until the feature flags converge.
@@ -110,6 +152,16 @@ mod tests {
         assert!(imply_features(value).test(Feature::zicsr as u32));
     }
 
+    #[test]
+    fn complex_zcd() {
+        let mut value = cache::Initializer::default();
+        // C & D -> Zcd
+        value.set(Feature::c as u32);
+        assert!(!imply_features(value).test(Feature::zcd as u32));
+        value.set(Feature::d as u32);
+        assert!(imply_features(value).test(Feature::zcd as u32));
+    }
+
     #[test]
     fn group_simple_forward() {
         let mut value = cache::Initializer::default();
@@ -132,17 +184,18 @@ mod tests {
     #[test]
     fn group_complex_convergence() {
         let mut value = cache::Initializer::default();
-        // Needs 2 iterations to converge
-        // (and 3rd iteration for convergence checking):
-        // 1.  [Zk] -> Zkn & Zkr & Zkt
-        // 2.  Zkn -> {Zbkb} & {Zbkc} & {Zbkx} & {Zkne} & {Zknd} & {Zknh}
-        value.set(Feature::zk as u32);
+        // Needs 3 iterations to converge
+        // (and 4th iteration for convergence checking):
+        // 1.  [Zvksc] -> Zvks & Zvbc
+        // 2.  Zvks -> Zvksed & Zvksh & Zvkb & Zvkt
+        // 3a. [Zvkned] & [Zvknhb] & [Zvkb] & Zvkt -> {Zvkn}
+        // 3b. Zvkn & Zvbc -> {Zvknc}
+        value.set(Feature::zvksc as u32);
+        value.set(Feature::zvkned as u32);
+        value.set(Feature::zvknhb as u32);
+        value.set(Feature::zvkb as u32);
         let value = imply_features(value);
-        assert!(value.test(Feature::zbkb as u32));
-        assert!(value.test(Feature::zbkc as u32));
-        assert!(value.test(Feature::zbkx as u32));
-        assert!(value.test(Feature::zkne as u32));
-        assert!(value.test(Feature::zknd as u32));
-        assert!(value.test(Feature::zknh as u32));
+        assert!(value.test(Feature::zvkn as u32));
+        assert!(value.test(Feature::zvknc as u32));
     }
 }
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 8a40997f75d..c0a76898d41 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -236,15 +236,29 @@ fn riscv_linux() {
     println!("rv32e: {}", is_riscv_feature_detected!("rv32e"));
     println!("rv64i: {}", is_riscv_feature_detected!("rv64i"));
     println!("rv128i: {}", is_riscv_feature_detected!("rv128i"));
+    println!(
+        "unaligned-scalar-mem: {}",
+        is_riscv_feature_detected!("unaligned-scalar-mem")
+    );
+    println!(
+        "unaligned-vector-mem: {}",
+        is_riscv_feature_detected!("unaligned-vector-mem")
+    );
     println!("zicsr: {}", is_riscv_feature_detected!("zicsr"));
     println!("zicntr: {}", is_riscv_feature_detected!("zicntr"));
     println!("zihpm: {}", is_riscv_feature_detected!("zihpm"));
     println!("zifencei: {}", is_riscv_feature_detected!("zifencei"));
+    println!("zihintntl: {}", is_riscv_feature_detected!("zihintntl"));
     println!("zihintpause: {}", is_riscv_feature_detected!("zihintpause"));
+    println!("zimop: {}", is_riscv_feature_detected!("zimop"));
+    println!("zicboz: {}", is_riscv_feature_detected!("zicboz"));
+    println!("zicond: {}", is_riscv_feature_detected!("zicond"));
     println!("m: {}", is_riscv_feature_detected!("m"));
     println!("a: {}", is_riscv_feature_detected!("a"));
     println!("zalrsc: {}", is_riscv_feature_detected!("zalrsc"));
     println!("zaamo: {}", is_riscv_feature_detected!("zaamo"));
+    println!("zawrs: {}", is_riscv_feature_detected!("zawrs"));
+    println!("zacas: {}", is_riscv_feature_detected!("zacas"));
     println!("zam: {}", is_riscv_feature_detected!("zam"));
     println!("ztso: {}", is_riscv_feature_detected!("ztso"));
     println!("f: {}", is_riscv_feature_detected!("f"));
@@ -252,11 +266,17 @@ fn riscv_linux() {
     println!("q: {}", is_riscv_feature_detected!("q"));
     println!("zfh: {}", is_riscv_feature_detected!("zfh"));
     println!("zfhmin: {}", is_riscv_feature_detected!("zfhmin"));
+    println!("zfa: {}", is_riscv_feature_detected!("zfa"));
     println!("zfinx: {}", is_riscv_feature_detected!("zfinx"));
     println!("zdinx: {}", is_riscv_feature_detected!("zdinx"));
     println!("zhinx: {}", is_riscv_feature_detected!("zhinx"));
     println!("zhinxmin: {}", is_riscv_feature_detected!("zhinxmin"));
     println!("c: {}", is_riscv_feature_detected!("c"));
+    println!("zca: {}", is_riscv_feature_detected!("zca"));
+    println!("zcf: {}", is_riscv_feature_detected!("zcf"));
+    println!("zcd: {}", is_riscv_feature_detected!("zcd"));
+    println!("zcb: {}", is_riscv_feature_detected!("zcb"));
+    println!("zcmop: {}", is_riscv_feature_detected!("zcmop"));
     println!("b: {}", is_riscv_feature_detected!("b"));
     println!("zba: {}", is_riscv_feature_detected!("zba"));
     println!("zbb: {}", is_riscv_feature_detected!("zbb"));
@@ -276,6 +296,29 @@ fn riscv_linux() {
     println!("zk: {}", is_riscv_feature_detected!("zk"));
     println!("zkt: {}", is_riscv_feature_detected!("zkt"));
     println!("v: {}", is_riscv_feature_detected!("v"));
+    println!("zve32x: {}", is_riscv_feature_detected!("zve32x"));
+    println!("zve32f: {}", is_riscv_feature_detected!("zve32f"));
+    println!("zve64x: {}", is_riscv_feature_detected!("zve64x"));
+    println!("zve64f: {}", is_riscv_feature_detected!("zve64f"));
+    println!("zve64d: {}", is_riscv_feature_detected!("zve64d"));
+    println!("zvfh: {}", is_riscv_feature_detected!("zvfh"));
+    println!("zvfhmin: {}", is_riscv_feature_detected!("zvfhmin"));
+    println!("zvbb: {}", is_riscv_feature_detected!("zvbb"));
+    println!("zvbc: {}", is_riscv_feature_detected!("zvbc"));
+    println!("zvkb: {}", is_riscv_feature_detected!("zvkb"));
+    println!("zvkg: {}", is_riscv_feature_detected!("zvkg"));
+    println!("zvkned: {}", is_riscv_feature_detected!("zvkned"));
+    println!("zvknha: {}", is_riscv_feature_detected!("zvknha"));
+    println!("zvknhb: {}", is_riscv_feature_detected!("zvknhb"));
+    println!("zvksed: {}", is_riscv_feature_detected!("zvksed"));
+    println!("zvksh: {}", is_riscv_feature_detected!("zvksh"));
+    println!("zvkn: {}", is_riscv_feature_detected!("zvkn"));
+    println!("zvknc: {}", is_riscv_feature_detected!("zvknc"));
+    println!("zvkng: {}", is_riscv_feature_detected!("zvkng"));
+    println!("zvks: {}", is_riscv_feature_detected!("zvks"));
+    println!("zvksc: {}", is_riscv_feature_detected!("zvksc"));
+    println!("zvksg: {}", is_riscv_feature_detected!("zvksg"));
+    println!("zvkt: {}", is_riscv_feature_detected!("zvkt"));
     println!("svnapot: {}", is_riscv_feature_detected!("svnapot"));
     println!("svpbmt: {}", is_riscv_feature_detected!("svpbmt"));
     println!("svinval: {}", is_riscv_feature_detected!("svinval"));
-- 
cgit 1.4.1-3-g733a5


From ceaeba176072605282377c0aef2059383832bd64 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Sun, 13 Apr 2025 04:35:58 +0000
Subject: RISC-V: Remove privileged extensions for now

Until in-kernel feature detection is implemented, runtime detection of
privileged extensions is temporally removed along with features themselves
since none of such privileged features are stable.

Co-Authored-By: Taiki Endo <te316e89@gmail.com>
Co-Authored-By: Amanieu d'Antras <amanieu@gmail.com>
---
 .../crates/std_detect/src/detect/arch/riscv.rs     | 24 ----------------------
 .../crates/std_detect/src/detect/os/linux/riscv.rs |  5 -----
 .../crates/std_detect/src/detect/os/riscv.rs       |  1 -
 .../crates/std_detect/tests/cpu-detection.rs       |  5 -----
 4 files changed, 35 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index e9a6c96af1e..63790537f85 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -115,14 +115,6 @@ features! {
     /// * P: `"p"`
     /// * Zam: `"zam"`
     ///
-    /// Defined by Privileged Specification:
-    ///
-    /// * *Supervisor-Level ISA* (not "S" extension): `"s"`
-    /// * H (hypervisor): `"h"`
-    /// * Svnapot: `"svnapot"`
-    /// * Svpbmt: `"svpbmt"`
-    /// * Svinval: `"svinval"`
-    ///
     /// # Performance Hints
     ///
     /// The two features below define performance hints for unaligned
@@ -331,22 +323,6 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkt: "zvkt";
     /// "Zvkt" Extension for Vector Data-Independent Execution Latency
 
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svnapot: "svnapot";
-    without cfg check: true;
-    /// "Svnapot" Extension for NAPOT Translation Contiguity
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svpbmt: "svpbmt";
-    without cfg check: true;
-    /// "Svpbmt" Extension for Page-Based Memory Types
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] svinval: "svinval";
-    without cfg check: true;
-    /// "Svinval" Extension for Fine-Grained Address-Translation Cache Invalidation
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] h: "h";
-    without cfg check: true;
-    /// "H" Extension for Hypervisor Support
-
-    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] s: "s";
-    without cfg check: true;
-    /// Supervisor-Level ISA
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] j: "j";
     without cfg check: true;
     /// "J" Extension for Dynamically Translated Languages
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index 5cf7b064e8e..ad4fa956b16 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -141,7 +141,6 @@ pub(crate) fn detect_features() -> cache::Initializer {
     enable_feature(Feature::c, bit::test(auxv.hwcap, (b'c' - b'a').into()));
     enable_feature(Feature::d, bit::test(auxv.hwcap, (b'd' - b'a').into()));
     enable_feature(Feature::f, bit::test(auxv.hwcap, (b'f' - b'a').into()));
-    enable_feature(Feature::h, bit::test(auxv.hwcap, (b'h' - b'a').into()));
     enable_feature(Feature::m, bit::test(auxv.hwcap, (b'm' - b'a').into()));
     let has_v = bit::test(auxv.hwcap, (b'v' - b'a').into());
     let mut is_v_set = false;
@@ -315,9 +314,5 @@ pub(crate) fn detect_features() -> cache::Initializer {
     #[cfg(target_arch = "riscv32")]
     enable_feature(Feature::rv32e, bit::test(auxv.hwcap, (b'e' - b'a').into()));
 
-    // FIXME: Auxvec does not show supervisor feature support, but this mode may be useful
-    // to detect when Rust is used to write Linux kernel modules.
-    // These should be more than Auxvec way to detect supervisor features.
-
     imply_features(value)
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/riscv.rs
index fc218d51ab5..97496df8938 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/riscv.rs
@@ -123,7 +123,6 @@ pub(crate) fn imply_features(mut value: cache::Initializer) -> cache::Initialize
         imply!(c & f => zcf);
 
         imply!(zicntr | zihpm | f | zfinx | zve32x => zicsr);
-        imply!(s | h => zicsr);
 
         // Loop until the feature flags converge.
         if prev == value {
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index c0a76898d41..0dabca52a2c 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -319,11 +319,6 @@ fn riscv_linux() {
     println!("zvksc: {}", is_riscv_feature_detected!("zvksc"));
     println!("zvksg: {}", is_riscv_feature_detected!("zvksg"));
     println!("zvkt: {}", is_riscv_feature_detected!("zvkt"));
-    println!("svnapot: {}", is_riscv_feature_detected!("svnapot"));
-    println!("svpbmt: {}", is_riscv_feature_detected!("svpbmt"));
-    println!("svinval: {}", is_riscv_feature_detected!("svinval"));
-    println!("h: {}", is_riscv_feature_detected!("h"));
-    println!("s: {}", is_riscv_feature_detected!("s"));
     println!("j: {}", is_riscv_feature_detected!("j"));
     println!("p: {}", is_riscv_feature_detected!("p"));
 }
-- 
cgit 1.4.1-3-g733a5


From 2d2390ea398ab314e3d67a3fdfd627afb5ac33bc Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Wed, 16 Apr 2025 13:43:17 +0000
Subject: std_detect: Remove RV32E support attempt on Linux (RISC-V)

Because the current lowest requirements to run the Linux kernel on RISC-V is
RV{32,64}IMA (with 32 general purpose registers) plus some features,
RV32E (with only 16 GPRs) is not currently supported.

Since it's not sure whether current implemented method will work for future
Linux versions even if the minimum requirements are lowered, the support for
RV32E (to be more specific, an attempt to do that) is removed for now.
---
 library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index ad4fa956b16..045f03b275b 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -310,9 +310,6 @@ pub(crate) fn detect_features() -> cache::Initializer {
     enable_feature(Feature::rv64i, has_i);
     #[cfg(target_arch = "riscv32")]
     enable_feature(Feature::rv32i, has_i);
-    // FIXME: e is not exposed in any of asm/hwcap.h, uapi/asm/hwcap.h, uapi/asm/hwprobe.h
-    #[cfg(target_arch = "riscv32")]
-    enable_feature(Feature::rv32e, bit::test(auxv.hwcap, (b'e' - b'a').into()));
 
     imply_features(value)
 }
-- 
cgit 1.4.1-3-g733a5


From 8f30830d97539f751ddd40d3f6c2f6eaffe32716 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Wed, 16 Apr 2025 22:44:55 +0900
Subject: Revert "std_detect: Do not use libc::getauxval on 32-bit Android"

This reverts commit 85572dc298f5222902c9b200cebf5d045e769a83.
---
 library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 8c911fd8d96..0f643bbd272 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -139,8 +139,7 @@ fn getauxval(key: usize) -> Result<usize, ()> {
                 target_os = "linux",
                 any(target_env = "gnu", target_env = "musl", target_env = "ohos"),
             )),
-            // TODO: libc crate currently doesn't provide getauxval on 32-bit Android.
-            not(all(target_os = "android", target_pointer_width = "64")),
+            not(target_os = "android"),
         ))] {
             let ffi_getauxval: F = unsafe {
                 let ptr = libc::dlsym(libc::RTLD_DEFAULT, c"getauxval".as_ptr());
-- 
cgit 1.4.1-3-g733a5


From 4621641a49f5188d0ffddeffb3ac970bc6d9e1c2 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Tue, 18 Mar 2025 23:30:43 +0900
Subject: std_detect: Remove /proc/cpuinfo-based detection

---
 library/stdarch/crates/std_detect/README.md        |   4 +-
 .../std_detect/src/detect/os/linux/aarch64.rs      | 119 +-------
 .../crates/std_detect/src/detect/os/linux/arm.rs   |  39 +--
 .../std_detect/src/detect/os/linux/auxvec.rs       |   4 +-
 .../std_detect/src/detect/os/linux/cpuinfo.rs      | 331 ---------------------
 .../crates/std_detect/src/detect/os/linux/mips.rs  |   4 +-
 .../crates/std_detect/src/detect/os/linux/mod.rs   |   3 -
 .../std_detect/src/detect/os/linux/powerpc.rs      |  11 +-
 8 files changed, 7 insertions(+), 508 deletions(-)
 delete mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md
index 0053d777aac..009c10bebe7 100644
--- a/library/stdarch/crates/std_detect/README.md
+++ b/library/stdarch/crates/std_detect/README.md
@@ -41,7 +41,7 @@ is undefined.
     have the minimum supported API level higher than [Android 4.3 (API level 18) that added `getauxval`](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49).
 
 * `std_detect_file_io` (enabled by default, requires `std`): Enable to perform run-time feature
-detection using file APIs (e.g. `/proc/cpuinfo`, etc.) if other more performant
+detection using file APIs (e.g. `/proc/self/auxv`, etc.) if other more performant
 methods fail. This feature requires `libstd` as a dependency, preventing the
 crate from working on applications in which `std` is not available.
 
@@ -58,7 +58,7 @@ crate from working on applications in which `std` is not available.
 * Linux/Android:
   * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `loongarch64`, `s390x`:
     `std_detect` supports these on Linux by querying ELF auxiliary vectors (using `getauxval`
-    when available), and if that fails, by querying `/proc/cpuinfo`.
+    when available), and if that fails, by querying `/proc/self/auxv`.
   * `arm64`: partial support for doing run-time feature detection by directly
     querying `mrs` is implemented for Linux >= 4.11, but not enabled by default.
   * `riscv{32,64}`:
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index d13aa0168ce..9caa785564a 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -3,8 +3,7 @@
 use super::auxvec;
 use crate::detect::{Feature, bit, cache};
 
-/// Try to read the features from the auxiliary vector, and if that fails, try
-/// to read them from /proc/cpuinfo.
+/// Try to read the features from the auxiliary vector.
 pub(crate) fn detect_features() -> cache::Initializer {
     #[cfg(target_os = "android")]
     let is_exynos9810 = {
@@ -27,11 +26,6 @@ pub(crate) fn detect_features() -> cache::Initializer {
         let hwcap: AtHwcap = auxv.into();
         return hwcap.cache(is_exynos9810);
     }
-    #[cfg(feature = "std_detect_file_io")]
-    if let Ok(c) = super::cpuinfo::CpuInfo::new() {
-        let hwcap: AtHwcap = c.into();
-        return hwcap.cache(is_exynos9810);
-    }
     cache::Initializer::default()
 }
 
@@ -249,117 +243,6 @@ impl From<auxvec::AuxVec> for AtHwcap {
     }
 }
 
-#[cfg(feature = "std_detect_file_io")]
-impl From<super::cpuinfo::CpuInfo> for AtHwcap {
-    /// Reads AtHwcap from /proc/cpuinfo .
-    fn from(c: super::cpuinfo::CpuInfo) -> Self {
-        let f = &c.field("Features");
-        AtHwcap {
-            // 64-bit names. FIXME: In 32-bit compatibility mode /proc/cpuinfo will
-            // map some of the 64-bit names to some 32-bit feature names. This does not
-            // cover that yet.
-            fp: f.has("fp"),
-            asimd: f.has("asimd"),
-            // evtstrm: f.has("evtstrm"),
-            aes: f.has("aes"),
-            pmull: f.has("pmull"),
-            sha1: f.has("sha1"),
-            sha2: f.has("sha2"),
-            crc32: f.has("crc32"),
-            atomics: f.has("atomics"),
-            fphp: f.has("fphp"),
-            asimdhp: f.has("asimdhp"),
-            // cpuid: f.has("cpuid"),
-            asimdrdm: f.has("asimdrdm"),
-            jscvt: f.has("jscvt"),
-            fcma: f.has("fcma"),
-            lrcpc: f.has("lrcpc"),
-            dcpop: f.has("dcpop"),
-            sha3: f.has("sha3"),
-            sm3: f.has("sm3"),
-            sm4: f.has("sm4"),
-            asimddp: f.has("asimddp"),
-            sha512: f.has("sha512"),
-            sve: f.has("sve"),
-            fhm: f.has("asimdfhm"),
-            dit: f.has("dit"),
-            uscat: f.has("uscat"),
-            ilrcpc: f.has("ilrcpc"),
-            flagm: f.has("flagm"),
-            ssbs: f.has("ssbs"),
-            sb: f.has("sb"),
-            paca: f.has("paca"),
-            pacg: f.has("pacg"),
-
-            // AT_HWCAP2
-            dcpodp: f.has("dcpodp"),
-            sve2: f.has("sve2"),
-            sveaes: f.has("sveaes"),
-            svepmull: f.has("svepmull"),
-            svebitperm: f.has("svebitperm"),
-            svesha3: f.has("svesha3"),
-            svesm4: f.has("svesm4"),
-            flagm2: f.has("flagm2"),
-            frint: f.has("frint"),
-            // svei8mm: f.has("svei8mm"),
-            svef32mm: f.has("svef32mm"),
-            svef64mm: f.has("svef64mm"),
-            // svebf16: f.has("svebf16"),
-            i8mm: f.has("i8mm"),
-            bf16: f.has("bf16"),
-            // dgh: f.has("dgh"),
-            rng: f.has("rng"),
-            bti: f.has("bti"),
-            mte: f.has("mte"),
-            ecv: f.has("ecv"),
-            // afp: f.has("afp"),
-            // rpres: f.has("rpres"),
-            // mte3: f.has("mte3"),
-            sme: f.has("sme"),
-            smei16i64: f.has("smei16i64"),
-            smef64f64: f.has("smef64f64"),
-            // smei8i32: f.has("smei8i32"),
-            // smef16f32: f.has("smef16f32"),
-            // smeb16f32: f.has("smeb16f32"),
-            // smef32f32: f.has("smef32f32"),
-            smefa64: f.has("smefa64"),
-            wfxt: f.has("wfxt"),
-            // ebf16: f.has("ebf16"),
-            // sveebf16: f.has("sveebf16"),
-            cssc: f.has("cssc"),
-            // rprfm: f.has("rprfm"),
-            sve2p1: f.has("sve2p1"),
-            sme2: f.has("sme2"),
-            sme2p1: f.has("sme2p1"),
-            // smei16i32: f.has("smei16i32"),
-            // smebi32i32: f.has("smebi32i32"),
-            smeb16b16: f.has("smeb16b16"),
-            smef16f16: f.has("smef16f16"),
-            mops: f.has("mops"),
-            hbc: f.has("hbc"),
-            sveb16b16: f.has("sveb16b16"),
-            lrcpc3: f.has("lrcpc3"),
-            lse128: f.has("lse128"),
-            fpmr: f.has("fpmr"),
-            lut: f.has("lut"),
-            faminmax: f.has("faminmax"),
-            f8cvt: f.has("f8cvt"),
-            f8fma: f.has("f8fma"),
-            f8dp4: f.has("f8dp4"),
-            f8dp2: f.has("f8dp2"),
-            f8e4m3: f.has("f8e4m3"),
-            f8e5m2: f.has("f8e5m2"),
-            smelutv2: f.has("smelutv2"),
-            smef8f16: f.has("smef8f16"),
-            smef8f32: f.has("smef8f32"),
-            smesf8fma: f.has("smesf8fma"),
-            smesf8dp4: f.has("smesf8dp4"),
-            smesf8dp2: f.has("smesf8dp2"),
-            // pauthlr: f.has("pauthlr"),
-        }
-    }
-}
-
 impl AtHwcap {
     /// Initializes the cache from the feature -bits.
     ///
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
index 7a68d2f87ba..bbb173227d0 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs
@@ -3,8 +3,7 @@
 use super::auxvec;
 use crate::detect::{Feature, bit, cache};
 
-/// Try to read the features from the auxiliary vector, and if that fails, try
-/// to read them from /proc/cpuinfo.
+/// Try to read the features from the auxiliary vector.
 pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
     let enable_feature = |value: &mut cache::Initializer, f, enable| {
@@ -31,41 +30,5 @@ pub(crate) fn detect_features() -> cache::Initializer {
         );
         return value;
     }
-
-    #[cfg(feature = "std_detect_file_io")]
-    if let Ok(c) = super::cpuinfo::CpuInfo::new() {
-        enable_feature(
-            &mut value,
-            Feature::neon,
-            c.field("Features").has("neon") && !has_broken_neon(&c),
-        );
-        enable_feature(&mut value, Feature::i8mm, c.field("Features").has("i8mm"));
-        enable_feature(
-            &mut value,
-            Feature::dotprod,
-            c.field("Features").has("asimddp"),
-        );
-        enable_feature(&mut value, Feature::pmull, c.field("Features").has("pmull"));
-        enable_feature(&mut value, Feature::crc, c.field("Features").has("crc32"));
-        enable_feature(&mut value, Feature::aes, c.field("Features").has("aes"));
-        enable_feature(
-            &mut value,
-            Feature::sha2,
-            c.field("Features").has("sha1") && c.field("Features").has("sha2"),
-        );
-        return value;
-    }
     value
 }
-
-/// Is the CPU known to have a broken NEON unit?
-///
-/// See https://crbug.com/341598.
-#[cfg(feature = "std_detect_file_io")]
-fn has_broken_neon(cpuinfo: &super::cpuinfo::CpuInfo) -> bool {
-    cpuinfo.field("CPU implementer") == "0x51"
-        && cpuinfo.field("CPU architecture") == "7"
-        && cpuinfo.field("CPU variant") == "0x1"
-        && cpuinfo.field("CPU part") == "0x04d"
-        && cpuinfo.field("CPU revision") == "0"
-}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 0f643bbd272..4293df20da1 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -53,9 +53,7 @@ pub(crate) struct AuxVec {
 /// - If that fails, this function returns an error.
 ///
 /// Note that run-time feature detection is not invoked for features that can
-/// be detected at compile-time. Also note that if this function returns an
-/// error, cpuinfo still can (and will) be used to try to perform run-time
-/// feature detection on some platforms.
+/// be detected at compile-time.
 ///
 ///  Note: The `std_detect_dlsym_getauxval` cargo feature is ignored on
 /// `*-linux-{gnu,musl,ohos}*` and `*-android*` targets because we can safely assume `getauxval`
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
deleted file mode 100644
index 48a5c972868..00000000000
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs
+++ /dev/null
@@ -1,331 +0,0 @@
-//! Parses /proc/cpuinfo
-#![cfg_attr(not(target_arch = "arm"), allow(dead_code))]
-
-use alloc::string::String;
-
-/// cpuinfo
-pub(crate) struct CpuInfo {
-    raw: String,
-}
-
-impl CpuInfo {
-    /// Reads /proc/cpuinfo into CpuInfo.
-    pub(crate) fn new() -> Result<Self, ()> {
-        let raw = super::read_file("/proc/cpuinfo")?;
-        Ok(Self {
-            raw: String::from_utf8(raw).map_err(|_| ())?,
-        })
-    }
-    /// Returns the value of the cpuinfo `field`.
-    pub(crate) fn field(&self, field: &str) -> CpuInfoField<'_> {
-        for l in self.raw.lines() {
-            if l.trim().starts_with(field) {
-                return CpuInfoField::new(l.split(": ").nth(1));
-            }
-        }
-        CpuInfoField(None)
-    }
-
-    /// Returns the `raw` contents of `/proc/cpuinfo`
-    #[cfg(test)]
-    fn raw(&self) -> &String {
-        &self.raw
-    }
-
-    #[cfg(test)]
-    fn from_str(other: &str) -> Result<Self, ()> {
-        Ok(Self {
-            raw: String::from(other),
-        })
-    }
-}
-
-/// Field of cpuinfo
-#[derive(Debug)]
-pub(crate) struct CpuInfoField<'a>(Option<&'a str>);
-
-impl<'a> PartialEq<&'a str> for CpuInfoField<'a> {
-    fn eq(&self, other: &&'a str) -> bool {
-        match self.0 {
-            None => other.is_empty(),
-            Some(f) => f == other.trim(),
-        }
-    }
-}
-
-impl<'a> CpuInfoField<'a> {
-    pub(crate) fn new<'b>(v: Option<&'b str>) -> CpuInfoField<'b> {
-        match v {
-            None => CpuInfoField::<'b>(None),
-            Some(f) => CpuInfoField::<'b>(Some(f.trim())),
-        }
-    }
-    /// Does the field exist?
-    #[cfg(test)]
-    pub(crate) fn exists(&self) -> bool {
-        self.0.is_some()
-    }
-    /// Does the field contain `other`?
-    pub(crate) fn has(&self, other: &str) -> bool {
-        match self.0 {
-            None => other.is_empty(),
-            Some(f) => {
-                let other = other.trim();
-                for v in f.split(' ') {
-                    if v == other {
-                        return true;
-                    }
-                }
-                false
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn raw_dump() {
-        let cpuinfo = CpuInfo::new().unwrap();
-        if cpuinfo.field("vendor_id") == "GenuineIntel" {
-            assert!(cpuinfo.field("flags").exists());
-            assert!(!cpuinfo.field("vendor33_id").exists());
-            assert!(cpuinfo.field("flags").has("sse"));
-            assert!(!cpuinfo.field("flags").has("avx314"));
-        }
-        println!("{}", cpuinfo.raw());
-    }
-
-    const CORE_DUO_T6500: &str = r"processor       : 0
-vendor_id       : GenuineIntel
-cpu family      : 6
-model           : 23
-model name      : Intel(R) Core(TM)2 Duo CPU     T6500  @ 2.10GHz
-stepping        : 10
-microcode       : 0xa0b
-cpu MHz         : 1600.000
-cache size      : 2048 KB
-physical id     : 0
-siblings        : 2
-core id         : 0
-cpu cores       : 2
-apicid          : 0
-initial apicid  : 0
-fdiv_bug        : no
-hlt_bug         : no
-f00f_bug        : no
-coma_bug        : no
-fpu             : yes
-fpu_exception   : yes
-cpuid level     : 13
-wp              : yes
-flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts aperfmperf pni dtes64 monitor ds_cpl est tm2 ssse3 cx16 xtpr pdcm sse4_1 xsave lahf_lm dtherm
-bogomips        : 4190.43
-clflush size    : 64
-cache_alignment : 64
-address sizes   : 36 bits physical, 48 bits virtual
-power management:
-";
-
-    #[test]
-    fn core_duo_t6500() {
-        let cpuinfo = CpuInfo::from_str(CORE_DUO_T6500).unwrap();
-        assert_eq!(cpuinfo.field("vendor_id"), "GenuineIntel");
-        assert_eq!(cpuinfo.field("cpu family"), "6");
-        assert_eq!(cpuinfo.field("model"), "23");
-        assert_eq!(
-            cpuinfo.field("model name"),
-            "Intel(R) Core(TM)2 Duo CPU     T6500  @ 2.10GHz"
-        );
-        assert_eq!(
-            cpuinfo.field("flags"),
-            "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts aperfmperf pni dtes64 monitor ds_cpl est tm2 ssse3 cx16 xtpr pdcm sse4_1 xsave lahf_lm dtherm"
-        );
-        assert!(cpuinfo.field("flags").has("fpu"));
-        assert!(cpuinfo.field("flags").has("dtherm"));
-        assert!(cpuinfo.field("flags").has("sse2"));
-        assert!(!cpuinfo.field("flags").has("avx"));
-    }
-
-    const ARM_CORTEX_A53: &str = r"Processor   : AArch64 Processor rev 3 (aarch64)
-        processor   : 0
-        processor   : 1
-        processor   : 2
-        processor   : 3
-        processor   : 4
-        processor   : 5
-        processor   : 6
-        processor   : 7
-        Features    : fp asimd evtstrm aes pmull sha1 sha2 crc32
-        CPU implementer : 0x41
-        CPU architecture: AArch64
-        CPU variant : 0x0
-        CPU part    : 0xd03
-        CPU revision    : 3
-
-        Hardware    : HiKey Development Board
-        ";
-
-    #[test]
-    fn arm_cortex_a53() {
-        let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A53).unwrap();
-        assert_eq!(
-            cpuinfo.field("Processor"),
-            "AArch64 Processor rev 3 (aarch64)"
-        );
-        assert_eq!(
-            cpuinfo.field("Features"),
-            "fp asimd evtstrm aes pmull sha1 sha2 crc32"
-        );
-        assert!(cpuinfo.field("Features").has("pmull"));
-        assert!(!cpuinfo.field("Features").has("neon"));
-        assert!(cpuinfo.field("Features").has("asimd"));
-    }
-
-    const ARM_CORTEX_A57: &str = r"Processor	: Cortex A57 Processor rev 1 (aarch64)
-processor	: 0
-processor	: 1
-processor	: 2
-processor	: 3
-Features	: fp asimd aes pmull sha1 sha2 crc32 wp half thumb fastmult vfp edsp neon vfpv3 tlsi vfpv4 idiva idivt
-CPU implementer	: 0x41
-CPU architecture: 8
-CPU variant	: 0x1
-CPU part	: 0xd07
-CPU revision	: 1";
-
-    #[test]
-    fn arm_cortex_a57() {
-        let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A57).unwrap();
-        assert_eq!(
-            cpuinfo.field("Processor"),
-            "Cortex A57 Processor rev 1 (aarch64)"
-        );
-        assert_eq!(
-            cpuinfo.field("Features"),
-            "fp asimd aes pmull sha1 sha2 crc32 wp half thumb fastmult vfp edsp neon vfpv3 tlsi vfpv4 idiva idivt"
-        );
-        assert!(cpuinfo.field("Features").has("pmull"));
-        assert!(cpuinfo.field("Features").has("neon"));
-        assert!(cpuinfo.field("Features").has("asimd"));
-    }
-
-    const RISCV_RV64GC: &str = r"processor       : 0
-hart            : 3
-isa             : rv64imafdc
-mmu             : sv39
-uarch           : sifive,u74-mc
-
-processor       : 1
-hart            : 1
-isa             : rv64imafdc
-mmu             : sv39
-uarch           : sifive,u74-mc
-
-processor       : 2
-hart            : 2
-isa             : rv64imafdc
-mmu             : sv39
-uarch           : sifive,u74-mc
-
-processor       : 3
-hart            : 4
-isa             : rv64imafdc
-mmu             : sv39
-uarch           : sifive,u74-mc";
-
-    #[test]
-    fn riscv_rv64gc() {
-        let cpuinfo = CpuInfo::from_str(RISCV_RV64GC).unwrap();
-        assert_eq!(cpuinfo.field("isa"), "rv64imafdc");
-        assert_eq!(cpuinfo.field("mmu"), "sv39");
-        assert_eq!(cpuinfo.field("uarch"), "sifive,u74-mc");
-    }
-
-    const POWER8E_POWERKVM: &str = r"processor       : 0
-cpu             : POWER8E (raw), altivec supported
-clock           : 3425.000000MHz
-revision        : 2.1 (pvr 004b 0201)
-
-processor       : 1
-cpu             : POWER8E (raw), altivec supported
-clock           : 3425.000000MHz
-revision        : 2.1 (pvr 004b 0201)
-
-processor       : 2
-cpu             : POWER8E (raw), altivec supported
-clock           : 3425.000000MHz
-revision        : 2.1 (pvr 004b 0201)
-
-processor       : 3
-cpu             : POWER8E (raw), altivec supported
-clock           : 3425.000000MHz
-revision        : 2.1 (pvr 004b 0201)
-
-timebase        : 512000000
-platform        : pSeries
-model           : IBM pSeries (emulated by qemu)
-machine         : CHRP IBM pSeries (emulated by qemu)";
-
-    #[test]
-    fn power8_powerkvm() {
-        let cpuinfo = CpuInfo::from_str(POWER8E_POWERKVM).unwrap();
-        assert_eq!(cpuinfo.field("cpu"), "POWER8E (raw), altivec supported");
-
-        assert!(cpuinfo.field("cpu").has("altivec"));
-    }
-
-    const POWER5P: &str = r"processor       : 0
-cpu             : POWER5+ (gs)
-clock           : 1900.098000MHz
-revision        : 2.1 (pvr 003b 0201)
-
-processor       : 1
-cpu             : POWER5+ (gs)
-clock           : 1900.098000MHz
-revision        : 2.1 (pvr 003b 0201)
-
-processor       : 2
-cpu             : POWER5+ (gs)
-clock           : 1900.098000MHz
-revision        : 2.1 (pvr 003b 0201)
-
-processor       : 3
-cpu             : POWER5+ (gs)
-clock           : 1900.098000MHz
-revision        : 2.1 (pvr 003b 0201)
-
-processor       : 4
-cpu             : POWER5+ (gs)
-clock           : 1900.098000MHz
-revision        : 2.1 (pvr 003b 0201)
-
-processor       : 5
-cpu             : POWER5+ (gs)
-clock           : 1900.098000MHz
-revision        : 2.1 (pvr 003b 0201)
-
-processor       : 6
-cpu             : POWER5+ (gs)
-clock           : 1900.098000MHz
-revision        : 2.1 (pvr 003b 0201)
-
-processor       : 7
-cpu             : POWER5+ (gs)
-clock           : 1900.098000MHz
-revision        : 2.1 (pvr 003b 0201)
-
-timebase        : 237331000
-platform        : pSeries
-machine         : CHRP IBM,9133-55A";
-
-    #[test]
-    fn power5p() {
-        let cpuinfo = CpuInfo::from_str(POWER5P).unwrap();
-        assert_eq!(cpuinfo.field("cpu"), "POWER5+ (gs)");
-
-        assert!(!cpuinfo.field("cpu").has("altivec"));
-    }
-}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
index 159b053ba68..0cfa8869887 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs
@@ -3,8 +3,7 @@
 use super::auxvec;
 use crate::detect::{Feature, bit, cache};
 
-/// Try to read the features from the auxiliary vector, and if that fails, try
-/// to read them from `/proc/cpuinfo`.
+/// Try to read the features from the auxiliary vector.
 pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
     let enable_feature = |value: &mut cache::Initializer, f, enable| {
@@ -20,6 +19,5 @@ pub(crate) fn detect_features() -> cache::Initializer {
         enable_feature(&mut value, Feature::msa, bit::test(auxv.hwcap, 1));
         return value;
     }
-    // TODO: fall back via `cpuinfo`.
     value
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
index fbe6bea93c9..8c689d0b1f0 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
@@ -5,9 +5,6 @@ use alloc::vec::Vec;
 
 mod auxvec;
 
-#[cfg(feature = "std_detect_file_io")]
-mod cpuinfo;
-
 #[cfg(feature = "std_detect_file_io")]
 fn read_file(path: &str) -> Result<Vec<u8>, ()> {
     let mut path = Vec::from(path.as_bytes());
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
index 5ebae195f2f..39eac8461d3 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
@@ -3,8 +3,7 @@
 use super::auxvec;
 use crate::detect::{Feature, cache};
 
-/// Try to read the features from the auxiliary vector, and if that fails, try
-/// to read them from /proc/cpuinfo.
+/// Try to read the features from the auxiliary vector.
 pub(crate) fn detect_features() -> cache::Initializer {
     let mut value = cache::Initializer::default();
     let enable_feature = |value: &mut cache::Initializer, f, enable| {
@@ -24,13 +23,5 @@ pub(crate) fn detect_features() -> cache::Initializer {
         enable_feature(&mut value, Feature::power8, auxv.hwcap2 & 0x80000000 != 0);
         return value;
     }
-
-    // PowerPC's /proc/cpuinfo lacks a proper Feature field,
-    // but `altivec` support is indicated in the `cpu` field.
-    #[cfg(feature = "std_detect_file_io")]
-    if let Ok(c) = super::cpuinfo::CpuInfo::new() {
-        enable_feature(&mut value, Feature::altivec, c.field("cpu").has("altivec"));
-        return value;
-    }
     value
 }
-- 
cgit 1.4.1-3-g733a5


From cc6855e1e9aca84de7390406516ba606cf5aadc4 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Tue, 15 Apr 2025 05:57:45 +0530
Subject: Remove `cupid` dependency and `env-override-no-avx` CI run

---
 library/stdarch/.github/workflows/main.yml         | 15 ----
 library/stdarch/ci/run.sh                          |  4 -
 library/stdarch/crates/std_detect/Cargo.toml       |  4 -
 library/stdarch/crates/std_detect/README.md        |  3 +-
 .../stdarch/crates/std_detect/src/detect/cache.rs  | 61 +--------------
 .../stdarch/crates/std_detect/src/detect/macros.rs |  7 --
 .../crates/std_detect/tests/x86-specific.rs        | 91 ----------------------
 7 files changed, 3 insertions(+), 182 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/.github/workflows/main.yml b/library/stdarch/.github/workflows/main.yml
index 305ba372829..91eeade82a5 100644
--- a/library/stdarch/.github/workflows/main.yml
+++ b/library/stdarch/.github/workflows/main.yml
@@ -35,20 +35,6 @@ jobs:
       run: rustup update nightly --no-self-update && rustup default nightly
     - run: cargo test --manifest-path crates/stdarch-verify/Cargo.toml
 
-  env_override:
-    name: Env Override
-    needs: [style]
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-latest, windows-latest]
-    steps:
-    - uses: actions/checkout@v4
-    - name: Install Rust
-      run: rustup update nightly --no-self-update && rustup default nightly
-    - run: RUST_STD_DETECT_UNSTABLE=avx cargo test --features=std_detect_env_override --manifest-path crates/std_detect/Cargo.toml env_override_no_avx
-      shell: bash
-
   test:
     needs: [style]
     name: Test
@@ -243,7 +229,6 @@ jobs:
     needs:
       - docs
       - verify
-      - env_override
       - test
       - build-std-detect
     runs-on: ubuntu-latest
diff --git a/library/stdarch/ci/run.sh b/library/stdarch/ci/run.sh
index 207df5162c7..a86d1406405 100755
--- a/library/stdarch/ci/run.sh
+++ b/library/stdarch/ci/run.sh
@@ -44,12 +44,8 @@ case ${TARGET} in
     mips-* | mipsel-*)
 	export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
 	;;
-    # Some of our test dependencies use the deprecated `gcc` crates which is
-    # missing a fix from https://github.com/alexcrichton/cc-rs/pull/627. Apply
-    # the workaround manually here.
     armv7-*eabihf | thumbv7-*eabihf)
         export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon"
-        export TARGET_CFLAGS="-mfpu=vfpv3-d16"
         ;;
     # Some of our test dependencies use the deprecated `gcc` crates which
     # doesn't detect RISC-V compilers automatically, so do it manually here.
diff --git a/library/stdarch/crates/std_detect/Cargo.toml b/library/stdarch/crates/std_detect/Cargo.toml
index 6f4f1c7b614..b6741100a72 100644
--- a/library/stdarch/crates/std_detect/Cargo.toml
+++ b/library/stdarch/crates/std_detect/Cargo.toml
@@ -31,14 +31,10 @@ alloc = { version = "1.0.0", optional = true, package = "rustc-std-workspace-all
 [target.'cfg(not(windows))'.dependencies]
 libc = { version = "0.2.0", optional = true, default-features = false }
 
-[dev-dependencies]
-cupid = "0.6.0"
-
 [features]
 default = [ "std_detect_dlsym_getauxval", "std_detect_file_io" ]
 std_detect_file_io = [ "libc" ]
 std_detect_dlsym_getauxval = [ "libc" ]
-std_detect_env_override = [ "libc" ]
 rustc-dep-of-std = [
     "core",
     "compiler_builtins",
diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md
index 009c10bebe7..091f5542e0e 100644
--- a/library/stdarch/crates/std_detect/README.md
+++ b/library/stdarch/crates/std_detect/README.md
@@ -52,8 +52,7 @@ crate from working on applications in which `std` is not available.
 * All `x86`/`x86_64` targets are supported on all platforms by querying the
   `cpuid` instruction directly for the features supported by the hardware and
   the operating system. `std_detect` assumes that the binary is an user-space
-  application. If you need raw support for querying `cpuid`, consider using the
-  [`cupid`](https://crates.io/crates/cupid) crate.
+  application.
 
 * Linux/Android:
   * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `loongarch64`, `s390x`:
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 83bcedea612..67a8b1ea091 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -121,65 +121,12 @@ impl Cache {
     }
 }
 
-cfg_if::cfg_if! {
-    if #[cfg(feature = "std_detect_env_override")] {
-        #[inline]
-        fn disable_features(disable: &[u8], value: &mut Initializer) {
-            if let Ok(disable) = core::str::from_utf8(disable) {
-                for v in disable.split(" ") {
-                    let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32));
-                }
-            }
-        }
-
-        #[inline]
-        fn initialize(mut value: Initializer) -> Initializer {
-            use core::ffi::CStr;
-            const RUST_STD_DETECT_UNSTABLE: &CStr = c"RUST_STD_DETECT_UNSTABLE";
-            cfg_if::cfg_if! {
-                if #[cfg(windows)] {
-                    use alloc::vec;
-                    #[link(name = "kernel32")]
-                    unsafe extern "system" {
-                        fn GetEnvironmentVariableA(name: *const u8, buffer: *mut u8, size: u32) -> u32;
-                    }
-                    let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::<u8>(), core::ptr::null_mut(), 0) };
-                    if len > 0 {
-                        // +1 to include the null terminator.
-                        let mut env = vec![0; len as usize + 1];
-                        let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::<u8>(), env.as_mut_ptr(), len + 1) };
-                        if len > 0 {
-                            disable_features(&env[..len as usize], &mut value);
-                        }
-                    }
-                } else {
-                    let env = unsafe {
-                        libc::getenv(RUST_STD_DETECT_UNSTABLE.as_ptr())
-                    };
-                    if !env.is_null() {
-                        let len = unsafe { libc::strlen(env) };
-                        let env = unsafe { core::slice::from_raw_parts(env as *const u8, len) };
-                        disable_features(env, &mut value);
-                    }
-                }
-            }
-            do_initialize(value);
-            value
-        }
-    } else {
-        #[inline]
-        fn initialize(value: Initializer) -> Initializer {
-            do_initialize(value);
-            value
-        }
-    }
-}
-
 #[inline]
-fn do_initialize(value: Initializer) {
+fn initialize(value: Initializer) -> Initializer {
     CACHE[0].initialize((value.0) as usize & Cache::MASK);
     CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK);
     CACHE[2].initialize((value.0 >> (2 * Cache::CAPACITY)) as usize & Cache::MASK);
+    value
 }
 
 // We only have to detect features once, and it's fairly costly, so hint to LLVM
@@ -204,10 +151,6 @@ fn detect_and_initialize() -> Initializer {
 ///
 /// It uses the `Feature` variant to index into this variable as a bitset. If
 /// the bit is set, the feature is enabled, and otherwise it is disabled.
-///
-/// If the feature `std_detect_env_override` is enabled looks for the env
-/// variable `RUST_STD_DETECT_UNSTABLE` and uses its content to disable
-/// Features that would had been otherwise detected.
 #[inline]
 pub(crate) fn test(bit: u32) -> bool {
     let (relative_bit, idx) = if bit < Cache::CAPACITY {
diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 9315b5ea313..64711c189a5 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -168,13 +168,6 @@ macro_rules! features {
                     Feature::_last => unreachable!(),
                 }
             }
-            #[cfg(feature = "std_detect_env_override")]
-            pub(crate) fn from_str(s: &str) -> Result<Feature, ()> {
-                match s {
-                    $($feature_lit => Ok(Feature::$feature),)*
-                    _ => Err(())
-                }
-            }
         }
 
         /// Each function performs run-time feature detection for a single
diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs
index f41f400c100..2b6a394910d 100644
--- a/library/stdarch/crates/std_detect/tests/x86-specific.rs
+++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs
@@ -10,7 +10,6 @@
     movrs_target_feature
 )]
 
-extern crate cupid;
 #[macro_use]
 extern crate std_detect;
 
@@ -109,96 +108,6 @@ fn dump() {
     println!("amx-movrs: {:?}", is_x86_feature_detected!("amx-movrs"));
 }
 
-#[cfg(feature = "std_detect_env_override")]
-#[test]
-fn env_override_no_avx() {
-    if let Ok(disable) = std::env::var("RUST_STD_DETECT_UNSTABLE") {
-        let information = cupid::master().unwrap();
-        for d in disable.split(" ") {
-            match d {
-                "avx" => {
-                    if information.avx() {
-                        assert_ne!(is_x86_feature_detected!("avx"), information.avx())
-                    }
-                }
-                "avx2" => {
-                    if information.avx2() {
-                        assert_ne!(is_x86_feature_detected!("avx2"), information.avx2())
-                    }
-                }
-                _ => {}
-            }
-        }
-    }
-}
-
-#[test]
-fn compare_with_cupid() {
-    let information = cupid::master().unwrap();
-    assert_eq!(is_x86_feature_detected!("aes"), information.aesni());
-    assert_eq!(
-        is_x86_feature_detected!("pclmulqdq"),
-        information.pclmulqdq()
-    );
-    assert_eq!(is_x86_feature_detected!("rdrand"), information.rdrand());
-    assert_eq!(is_x86_feature_detected!("rdseed"), information.rdseed());
-    assert_eq!(is_x86_feature_detected!("tsc"), information.tsc());
-    assert_eq!(is_x86_feature_detected!("sse"), information.sse());
-    assert_eq!(is_x86_feature_detected!("sse2"), information.sse2());
-    assert_eq!(is_x86_feature_detected!("sse3"), information.sse3());
-    assert_eq!(is_x86_feature_detected!("ssse3"), information.ssse3());
-    assert_eq!(is_x86_feature_detected!("sse4.1"), information.sse4_1());
-    assert_eq!(is_x86_feature_detected!("sse4.2"), information.sse4_2());
-    assert_eq!(is_x86_feature_detected!("sse4a"), information.sse4a());
-    assert_eq!(is_x86_feature_detected!("sha"), information.sha());
-    assert_eq!(is_x86_feature_detected!("f16c"), information.f16c());
-    assert_eq!(is_x86_feature_detected!("avx"), information.avx());
-    assert_eq!(is_x86_feature_detected!("avx2"), information.avx2());
-    assert_eq!(is_x86_feature_detected!("avx512f"), information.avx512f());
-    assert_eq!(is_x86_feature_detected!("avx512cd"), information.avx512cd());
-    assert_eq!(is_x86_feature_detected!("avx512er"), information.avx512er());
-    assert_eq!(is_x86_feature_detected!("avx512pf"), information.avx512pf());
-    assert_eq!(is_x86_feature_detected!("avx512bw"), information.avx512bw());
-    assert_eq!(is_x86_feature_detected!("avx512dq"), information.avx512dq());
-    assert_eq!(is_x86_feature_detected!("avx512vl"), information.avx512vl());
-    assert_eq!(
-        is_x86_feature_detected!("avx512ifma"),
-        information.avx512_ifma()
-    );
-    assert_eq!(
-        is_x86_feature_detected!("avx512vbmi"),
-        information.avx512_vbmi()
-    );
-    assert_eq!(
-        is_x86_feature_detected!("avx512vpopcntdq"),
-        information.avx512_vpopcntdq()
-    );
-    assert_eq!(is_x86_feature_detected!("fma"), information.fma());
-    assert_eq!(is_x86_feature_detected!("bmi1"), information.bmi1());
-    assert_eq!(is_x86_feature_detected!("bmi2"), information.bmi2());
-    assert_eq!(is_x86_feature_detected!("popcnt"), information.popcnt());
-    assert_eq!(is_x86_feature_detected!("abm"), information.lzcnt());
-    assert_eq!(is_x86_feature_detected!("tbm"), information.tbm());
-    assert_eq!(is_x86_feature_detected!("lzcnt"), information.lzcnt());
-    assert_eq!(is_x86_feature_detected!("xsave"), information.xsave());
-    assert_eq!(is_x86_feature_detected!("xsaveopt"), information.xsaveopt());
-    assert_eq!(
-        is_x86_feature_detected!("xsavec"),
-        information.xsavec_and_xrstor()
-    );
-    assert_eq!(
-        is_x86_feature_detected!("xsaves"),
-        information.xsaves_xrstors_and_ia32_xss()
-    );
-    assert_eq!(
-        is_x86_feature_detected!("cmpxchg16b"),
-        information.cmpxchg16b(),
-    );
-    assert_eq!(is_x86_feature_detected!("adx"), information.adx(),);
-    assert_eq!(is_x86_feature_detected!("rtm"), information.rtm(),);
-    assert_eq!(is_x86_feature_detected!("movbe"), information.movbe(),);
-}
-
 #[test]
 #[allow(deprecated)]
 fn x86_deprecated() {
-- 
cgit 1.4.1-3-g733a5


From 5fb4fabbfc23093f82ec84971e5997728e76537e Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Sun, 20 Apr 2025 14:31:13 +0000
Subject: Add power9 and power8 target-features

---
 .../stdarch/crates/std_detect/src/detect/arch/powerpc.rs    | 13 +++++++++++++
 .../stdarch/crates/std_detect/src/detect/arch/powerpc64.rs  | 13 +++++++++++++
 .../crates/std_detect/src/detect/os/linux/powerpc.rs        | 10 +++++++++-
 library/stdarch/crates/std_detect/tests/cpu-detection.rs    |  1 +
 4 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
index 85ec101f188..c390993a48a 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs
@@ -14,4 +14,17 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8: "power8";
     without cfg check: true;
     /// Power8
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_altivec: "power8-altivec";
+    /// Power8 altivec
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_vector: "power8-vector";
+    /// Power8 vector
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_crypto: "power8-crypto";
+    /// Power8 crypto
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9: "power9";
+    without cfg check: true;
+    /// Power9
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_altivec: "power9-altivec";
+    /// Power9 altivec
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_vector: "power9-vector";
+    /// Power9 vector
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
index 813429c40c6..cf05baa6f79 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs
@@ -14,4 +14,17 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8: "power8";
     without cfg check: true;
     /// Power8
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_altivec: "power8-altivec";
+    /// Power8 altivec
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_vector: "power8-vector";
+    /// Power8 vector
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_crypto: "power8-crypto";
+    /// Power8 crypto
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9: "power9";
+    without cfg check: true;
+    /// Power9
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_altivec: "power9-altivec";
+    /// Power9 altivec
+    @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_vector: "power9-vector";
+    /// Power9 vector
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
index 39eac8461d3..6a4f7e715d9 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs
@@ -20,7 +20,15 @@ pub(crate) fn detect_features() -> cache::Initializer {
         // index of the bit to test like in ARM and Aarch64)
         enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0);
         enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0);
-        enable_feature(&mut value, Feature::power8, auxv.hwcap2 & 0x80000000 != 0);
+        let power8_features = auxv.hwcap2 & 0x80000000 != 0;
+        enable_feature(&mut value, Feature::power8, power8_features);
+        enable_feature(&mut value, Feature::power8_altivec, power8_features);
+        enable_feature(&mut value, Feature::power8_crypto, power8_features);
+        enable_feature(&mut value, Feature::power8_vector, power8_features);
+        let power9_features = auxv.hwcap2 & 0x00800000 != 0;
+        enable_feature(&mut value, Feature::power9, power9_features);
+        enable_feature(&mut value, Feature::power9_altivec, power9_features);
+        enable_feature(&mut value, Feature::power9_vector, power9_features);
         return value;
     }
     value
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index 0dabca52a2c..bbc289a6a0a 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -340,6 +340,7 @@ fn powerpc64_linux_or_freebsd() {
     println!("altivec: {}", is_powerpc64_feature_detected!("altivec"));
     println!("vsx: {}", is_powerpc64_feature_detected!("vsx"));
     println!("power8: {}", is_powerpc64_feature_detected!("power8"));
+    println!("power9: {}", is_powerpc64_feature_detected!("power9"));
 }
 
 #[test]
-- 
cgit 1.4.1-3-g733a5


From 7db430fb80b17e9b9a996b33413f59d4b870b1fe Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Thu, 17 Apr 2025 01:08:57 +0000
Subject: std_detect: RISC-V platform guide documentation

Since there's no architectural feature detection on RISC-V (unlike `CPUID`
on x86 architectures and some system registers on Arm/AArch64), runtime
feature detection entirely depends on the platform-specific facility.

As a result, availability of each feature heavily depends on the platform
and its version.

To help users make a decision for feature checking on a RISC-V system, this
commit adds a platform guide with minimum supported platform versions.

Note:

It intentionally omits the description of the reverse implication related
to *extension groups* (such like implication of `B` *from* its members:
`Zba`, `Zbb` and `Zbs` extensions) because it currently does not synchronize
well with the `-Ctarget-feature` compiler option (due to missing reverse
implication checks using `cfg` and due to constraints of the current Rust's
feature handling).

Instead, it only describes forward implications (like `D` implying `F`) due
to the fact that it relatively synchronizes well between Rust and `stdarch`
for this kind of feature handling (not fully synchronized though).

Still, an extension group is considered "supported" once the
platform/version supports runtime detection of all members in it.
---
 .../crates/std_detect/src/detect/arch/riscv.rs     | 204 +++++++++++++--------
 1 file changed, 126 insertions(+), 78 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 63790537f85..40e758d44f1 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -22,87 +22,133 @@ features! {
     ///
     /// [ISA manual]: https://riscv.org/specifications/ratified/
     ///
+    /// # Platform-specific/agnostic Behavior and Availability
+    ///
+    /// Runtime detection depends on the platform-specific feature detection
+    /// facility and its availability per feature is
+    /// highly platform/version-specific.
+    ///
+    /// Still, a best-effort attempt is performed to enable subset/dependent
+    /// features if a superset feature is enabled regardless of the platform.
+    /// For instance, if the A extension (`"a"`) is enabled, its subsets (the
+    /// Zalrsc and Zaamo extensions; `"zalrsc"` and `"zaamo"`) are also enabled.
+    /// Likewise, if the F extension (`"f"`) is enabled, one of its dependencies
+    /// (the Zicsr extension `"zicsr"`) is also enabled.
+    ///
     /// # Unprivileged Specification
     ///
-    /// The supported ratified RISC-V instruction sets are as follows:
+    /// The supported ratified RISC-V instruction sets are as follows (OS
+    /// columns denote runtime feature detection support with or without the
+    /// minimum supported version):
+    ///
+    /// | Literal    | Base    | Linux      |
+    /// |:---------- |:------- |:---------- |
+    /// | `"rv32e"`  | RV32E   | No         |
+    /// | `"rv32i"`  | RV32I   | Yes [^ima] |
+    /// | `"rv64i"`  | RV64I   | Yes [^ima] |
+    ///
+    /// | Literal         | Extension   | Linux              |
+    /// |:--------------- |:----------- |:------------------ |
+    /// | `"a"`           | A           | Yes [^ima]         |
+    /// | `"b"`           | B           | 6.5                |
+    /// | `"c"`           | C           | Yes                |
+    /// | `"d"`           | D           | Yes                |
+    /// | `"f"`           | F           | Yes                |
+    /// | `"m"`           | M           | Yes [^ima]         |
+    /// | `"q"`           | Q           | No                 |
+    /// | `"v"`           | V           | 6.5                |
+    /// | `"zaamo"`       | Zaamo       | No [^ima] [^dep]   |
+    /// | `"zacas"`       | Zacas       | 6.8                |
+    /// | `"zalrsc"`      | Zalrsc      | No [^ima] [^dep]   |
+    /// | `"zawrs"`       | Zawrs       | 6.11               |
+    /// | `"zba"`         | Zba         | 6.5                |
+    /// | `"zbb"`         | Zbb         | 6.5                |
+    /// | `"zbc"`         | Zbc         | 6.8                |
+    /// | `"zbkb"`        | Zbkb        | 6.8                |
+    /// | `"zbkc"`        | Zbkc        | 6.8                |
+    /// | `"zbkx"`        | Zbkx        | 6.8                |
+    /// | `"zbs"`         | Zbs         | 6.5                |
+    /// | `"zca"`         | Zca         | 6.11 [^dep]        |
+    /// | `"zcb"`         | Zcb         | 6.11 [^dep]        |
+    /// | `"zcd"`         | Zcd         | 6.11 [^dep]        |
+    /// | `"zcf"`         | Zcf         | 6.11 [^dep]        |
+    /// | `"zcmop"`       | Zcmop       | 6.11               |
+    /// | `"zdinx"`       | Zdinx       | No                 |
+    /// | `"zfa"`         | Zfa         | 6.8                |
+    /// | `"zfh"`         | Zfh         | 6.8                |
+    /// | `"zfhmin"`      | Zfhmin      | 6.8                |
+    /// | `"zfinx"`       | Zfinx       | No                 |
+    /// | `"zhinx"`       | Zhinx       | No                 |
+    /// | `"zhinxmin"`    | Zhinxmin    | No                 |
+    /// | `"zicboz"`      | Zicboz      | 6.7                |
+    /// | `"zicntr"`      | Zicntr      | No [^ima-cntr] [^cntr] |
+    /// | `"zicond"`      | Zicond      | 6.8                |
+    /// | `"zicsr"`       | Zicsr       | No [^ima] [^dep]   |
+    /// | `"zifencei"`    | Zifencei    | No [^ima]          |
+    /// | `"zihintntl"`   | Zihintntl   | 6.8                |
+    /// | `"zihintpause"` | Zihintpause | 6.10               |
+    /// | `"zihpm"`       | Zihpm       | No                 |
+    /// | `"zimop"`       | Zimop       | 6.11               |
+    /// | `"zk"`          | Zk          | No [^zkr]          |
+    /// | `"zkn"`         | Zkn         | 6.8                |
+    /// | `"zknd"`        | Zknd        | 6.8                |
+    /// | `"zkne"`        | Zkne        | 6.8                |
+    /// | `"zknh"`        | Zknh        | 6.8                |
+    /// | `"zkr"`         | Zkr         | No [^zkr]          |
+    /// | `"zks"`         | Zks         | 6.8                |
+    /// | `"zksed"`       | Zksed       | 6.8                |
+    /// | `"zksh"`        | Zksh        | 6.8                |
+    /// | `"zkt"`         | Zkt         | 6.8                |
+    /// | `"ztso"`        | Ztso        | 6.8                |
+    /// | `"zvbb"`        | Zvbb        | 6.8                |
+    /// | `"zvbc"`        | Zvbc        | 6.8                |
+    /// | `"zve32f"`      | Zve32f      | 6.11 [^dep]        |
+    /// | `"zve32x"`      | Zve32x      | 6.11 [^dep]        |
+    /// | `"zve64d"`      | Zve64d      | 6.11 [^dep]        |
+    /// | `"zve64f"`      | Zve64f      | 6.11 [^dep]        |
+    /// | `"zve64x"`      | Zve64x      | 6.11 [^dep]        |
+    /// | `"zvfh"`        | Zvfh        | 6.8                |
+    /// | `"zvfhmin"`     | Zvfhmin     | 6.8                |
+    /// | `"zvkb"`        | Zvkb        | 6.8                |
+    /// | `"zvkg"`        | Zvkg        | 6.8                |
+    /// | `"zvkn"`        | Zvkn        | 6.8                |
+    /// | `"zvknc"`       | Zvknc       | 6.8                |
+    /// | `"zvkned"`      | Zvkned      | 6.8                |
+    /// | `"zvkng"`       | Zvkng       | 6.8                |
+    /// | `"zvknha"`      | Zvknha      | 6.8                |
+    /// | `"zvknhb"`      | Zvknhb      | 6.8                |
+    /// | `"zvks"`        | Zvks        | 6.8                |
+    /// | `"zvksc"`       | Zvksc       | 6.8                |
+    /// | `"zvksed"`      | Zvksed      | 6.8                |
+    /// | `"zvksg"`       | Zvksg       | 6.8                |
+    /// | `"zvksh"`       | Zvksh       | 6.8                |
+    /// | `"zvkt"`        | Zvkt        | 6.8                |
+    ///
+    /// [^ima]: Or enabled when the IMA base behavior is enabled on the Linux
+    /// kernel version 6.4 or later (for bases, the only matching one -- either
+    /// `"rv32i"` or `"rv64i"` -- is enabled).
+    ///
+    /// [^ima-cntr]: If the IMA base behavior is enabled on the Linux kernel
+    /// version 6.4 or later, the Zicntr extension should be also present.
+    /// However, it seems that Linux does not guarantee its existence.
+    ///
+    /// [^cntr]: Even if this extension is available, it does not necessarily
+    /// mean all performance counters are accessible.
+    /// For instance, the Linux kernel version 6.6 or later prohibits accesses
+    /// to performance counters other than `time` (wall-clock) by default.
+    /// Also beware that, even if performance counters like `cycle` and
+    /// `instret` are accessible, their value can be unreliable (e.g. returning
+    /// the constant value) under certain circumstances.
+    ///
+    /// [^dep]: Or enabled as a dependency of another extension (a superset)
+    /// even if runtime detection of this feature itself is not supported (as
+    /// long as the runtime detection of the superset is supported).
     ///
-    /// * RV32E: `"rv32e"`
-    /// * RV32I: `"rv32i"`
-    /// * RV64I: `"rv64i"`
-    /// * A: `"a"`
-    ///   * Zaamo: `"zaamo"`
-    ///   * Zalrsc: `"zalrsc"`
-    /// * B: `"b"`
-    ///   * Zba: `"zba"`
-    ///   * Zbb: `"zbb"`
-    ///   * Zbs: `"zbs"`
-    /// * C: `"c"`
-    ///   * Zca: `"zca"`
-    ///   * Zcd: `"zcd"` (if D is enabled)
-    ///   * Zcf: `"zcf"` (if F is enabled on RV32)
-    /// * D: `"d"`
-    /// * F: `"f"`
-    /// * M: `"m"`
-    /// * Q: `"q"`
-    /// * V: `"v"`
-    ///   * Zve32x: `"zve32x"`
-    ///   * Zve32f: `"zve32f"`
-    ///   * Zve64x: `"zve64x"`
-    ///   * Zve64f: `"zve64f"`
-    ///   * Zve64d: `"zve64d"`
-    /// * Zicboz: `"zicboz"`
-    /// * Zicntr: `"zicntr"`
-    /// * Zicond: `"zicond"`
-    /// * Zicsr: `"zicsr"`
-    /// * Zifencei: `"zifencei"`
-    /// * Zihintntl: `"zihintntl"`
-    /// * Zihintpause: `"zihintpause"`
-    /// * Zihpm: `"zihpm"`
-    /// * Zimop: `"zimop"`
-    /// * Zacas: `"zacas"`
-    /// * Zawrs: `"zawrs"`
-    /// * Zfa: `"zfa"`
-    /// * Zfh: `"zfh"`
-    ///   * Zfhmin: `"zfhmin"`
-    /// * Zfinx: `"zfinx"`
-    /// * Zdinx: `"zdinx"`
-    /// * Zhinx: `"zhinx"`
-    ///   * Zhinxmin: `"zhinxmin"`
-    /// * Zcb: `"zcb"`
-    /// * Zcmop: `"zcmop"`
-    /// * Zbc: `"zbc"`
-    /// * Zbkb: `"zbkb"`
-    /// * Zbkc: `"zbkc"`
-    /// * Zbkx: `"zbkx"`
-    /// * Zk: `"zk"`
-    /// * Zkn: `"zkn"`
-    ///   * Zknd: `"zknd"`
-    ///   * Zkne: `"zkne"`
-    ///   * Zknh: `"zknh"`
-    /// * Zkr: `"zkr"`
-    /// * Zks: `"zks"`
-    ///   * Zksed: `"zksed"`
-    ///   * Zksh: `"zksh"`
-    /// * Zkt: `"zkt"`
-    /// * Zvbb: `"zvbb"`
-    /// * Zvbc: `"zvbc"`
-    /// * Zvfh: `"zvfh"`
-    ///   * Zvfhmin: `"zvfhmin"`
-    /// * Zvkb: `"zvkb"`
-    /// * Zvkg: `"zvkg"`
-    /// * Zvkn: `"zvkn"`
-    ///   * Zvkned: `"zvkned"`
-    ///   * Zvknha: `"zvknha"`
-    ///   * Zvknhb: `"zvknhb"`
-    /// * Zvknc: `"zvknc"`
-    /// * Zvkng: `"zvkng"`
-    /// * Zvks: `"zvks"`
-    ///   * Zvksed: `"zvksed"`
-    ///   * Zvksh: `"zvksh"`
-    /// * Zvksc: `"zvksc"`
-    /// * Zvksg: `"zvksg"`
-    /// * Zvkt: `"zvkt"`
-    /// * Ztso: `"ztso"`
+    /// [^zkr]: Linux does not report existence of this extension even if
+    /// supported by the hardware mainly because the `seed` CSR is normally
+    /// inaccessible from the user mode.
+    /// For the Zk extension features except this CSR, check existence of both
+    /// `"zkn"` and `"zkt"` features instead.
     ///
     /// There's also bases and extensions marked as standard instruction set,
     /// but they are in frozen or draft state. These instruction sets are also
@@ -122,7 +168,9 @@ features! {
     /// corresponding unaligned memory access is reasonably fast.
     ///
     /// * `"unaligned-scalar-mem"`
+    ///   * Runtime detection requires Linux kernel version 6.4 or later.
     /// * `"unaligned-vector-mem"`
+    ///   * Runtime detection requires Linux kernel version 6.13 or later.
     #[stable(feature = "riscv_ratified", since = "1.78.0")]
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32i: "rv32i";
-- 
cgit 1.4.1-3-g733a5


From 50010d554600ad96afd6194ea34a36bb535f6f1c Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Wed, 30 Apr 2025 04:55:31 +0000
Subject: Revert "std_detect: RISC-V platform guide documentation"

This reverts commit e907456b2e10622ccd854a3bba8d02ce170b5dbb.

This is due to a CI failure (technically broken HTML with duplicate IDs)
caused by this commit (visibly fine but invalid per the HTML specification
and detected by the LinkCheck tool on the Rust CI process).

The author independently working on a rustdoc enhancement to enable writing
multiple references to a single footnote.  Once that change makes it to the
stage0 compiler (the next beta), the original change will be acceptable
again (postponed for possibly the version 1.89 cycle).
---
 .../crates/std_detect/src/detect/arch/riscv.rs     | 204 ++++++++-------------
 1 file changed, 78 insertions(+), 126 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 40e758d44f1..63790537f85 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -22,133 +22,87 @@ features! {
     ///
     /// [ISA manual]: https://riscv.org/specifications/ratified/
     ///
-    /// # Platform-specific/agnostic Behavior and Availability
-    ///
-    /// Runtime detection depends on the platform-specific feature detection
-    /// facility and its availability per feature is
-    /// highly platform/version-specific.
-    ///
-    /// Still, a best-effort attempt is performed to enable subset/dependent
-    /// features if a superset feature is enabled regardless of the platform.
-    /// For instance, if the A extension (`"a"`) is enabled, its subsets (the
-    /// Zalrsc and Zaamo extensions; `"zalrsc"` and `"zaamo"`) are also enabled.
-    /// Likewise, if the F extension (`"f"`) is enabled, one of its dependencies
-    /// (the Zicsr extension `"zicsr"`) is also enabled.
-    ///
     /// # Unprivileged Specification
     ///
-    /// The supported ratified RISC-V instruction sets are as follows (OS
-    /// columns denote runtime feature detection support with or without the
-    /// minimum supported version):
-    ///
-    /// | Literal    | Base    | Linux      |
-    /// |:---------- |:------- |:---------- |
-    /// | `"rv32e"`  | RV32E   | No         |
-    /// | `"rv32i"`  | RV32I   | Yes [^ima] |
-    /// | `"rv64i"`  | RV64I   | Yes [^ima] |
-    ///
-    /// | Literal         | Extension   | Linux              |
-    /// |:--------------- |:----------- |:------------------ |
-    /// | `"a"`           | A           | Yes [^ima]         |
-    /// | `"b"`           | B           | 6.5                |
-    /// | `"c"`           | C           | Yes                |
-    /// | `"d"`           | D           | Yes                |
-    /// | `"f"`           | F           | Yes                |
-    /// | `"m"`           | M           | Yes [^ima]         |
-    /// | `"q"`           | Q           | No                 |
-    /// | `"v"`           | V           | 6.5                |
-    /// | `"zaamo"`       | Zaamo       | No [^ima] [^dep]   |
-    /// | `"zacas"`       | Zacas       | 6.8                |
-    /// | `"zalrsc"`      | Zalrsc      | No [^ima] [^dep]   |
-    /// | `"zawrs"`       | Zawrs       | 6.11               |
-    /// | `"zba"`         | Zba         | 6.5                |
-    /// | `"zbb"`         | Zbb         | 6.5                |
-    /// | `"zbc"`         | Zbc         | 6.8                |
-    /// | `"zbkb"`        | Zbkb        | 6.8                |
-    /// | `"zbkc"`        | Zbkc        | 6.8                |
-    /// | `"zbkx"`        | Zbkx        | 6.8                |
-    /// | `"zbs"`         | Zbs         | 6.5                |
-    /// | `"zca"`         | Zca         | 6.11 [^dep]        |
-    /// | `"zcb"`         | Zcb         | 6.11 [^dep]        |
-    /// | `"zcd"`         | Zcd         | 6.11 [^dep]        |
-    /// | `"zcf"`         | Zcf         | 6.11 [^dep]        |
-    /// | `"zcmop"`       | Zcmop       | 6.11               |
-    /// | `"zdinx"`       | Zdinx       | No                 |
-    /// | `"zfa"`         | Zfa         | 6.8                |
-    /// | `"zfh"`         | Zfh         | 6.8                |
-    /// | `"zfhmin"`      | Zfhmin      | 6.8                |
-    /// | `"zfinx"`       | Zfinx       | No                 |
-    /// | `"zhinx"`       | Zhinx       | No                 |
-    /// | `"zhinxmin"`    | Zhinxmin    | No                 |
-    /// | `"zicboz"`      | Zicboz      | 6.7                |
-    /// | `"zicntr"`      | Zicntr      | No [^ima-cntr] [^cntr] |
-    /// | `"zicond"`      | Zicond      | 6.8                |
-    /// | `"zicsr"`       | Zicsr       | No [^ima] [^dep]   |
-    /// | `"zifencei"`    | Zifencei    | No [^ima]          |
-    /// | `"zihintntl"`   | Zihintntl   | 6.8                |
-    /// | `"zihintpause"` | Zihintpause | 6.10               |
-    /// | `"zihpm"`       | Zihpm       | No                 |
-    /// | `"zimop"`       | Zimop       | 6.11               |
-    /// | `"zk"`          | Zk          | No [^zkr]          |
-    /// | `"zkn"`         | Zkn         | 6.8                |
-    /// | `"zknd"`        | Zknd        | 6.8                |
-    /// | `"zkne"`        | Zkne        | 6.8                |
-    /// | `"zknh"`        | Zknh        | 6.8                |
-    /// | `"zkr"`         | Zkr         | No [^zkr]          |
-    /// | `"zks"`         | Zks         | 6.8                |
-    /// | `"zksed"`       | Zksed       | 6.8                |
-    /// | `"zksh"`        | Zksh        | 6.8                |
-    /// | `"zkt"`         | Zkt         | 6.8                |
-    /// | `"ztso"`        | Ztso        | 6.8                |
-    /// | `"zvbb"`        | Zvbb        | 6.8                |
-    /// | `"zvbc"`        | Zvbc        | 6.8                |
-    /// | `"zve32f"`      | Zve32f      | 6.11 [^dep]        |
-    /// | `"zve32x"`      | Zve32x      | 6.11 [^dep]        |
-    /// | `"zve64d"`      | Zve64d      | 6.11 [^dep]        |
-    /// | `"zve64f"`      | Zve64f      | 6.11 [^dep]        |
-    /// | `"zve64x"`      | Zve64x      | 6.11 [^dep]        |
-    /// | `"zvfh"`        | Zvfh        | 6.8                |
-    /// | `"zvfhmin"`     | Zvfhmin     | 6.8                |
-    /// | `"zvkb"`        | Zvkb        | 6.8                |
-    /// | `"zvkg"`        | Zvkg        | 6.8                |
-    /// | `"zvkn"`        | Zvkn        | 6.8                |
-    /// | `"zvknc"`       | Zvknc       | 6.8                |
-    /// | `"zvkned"`      | Zvkned      | 6.8                |
-    /// | `"zvkng"`       | Zvkng       | 6.8                |
-    /// | `"zvknha"`      | Zvknha      | 6.8                |
-    /// | `"zvknhb"`      | Zvknhb      | 6.8                |
-    /// | `"zvks"`        | Zvks        | 6.8                |
-    /// | `"zvksc"`       | Zvksc       | 6.8                |
-    /// | `"zvksed"`      | Zvksed      | 6.8                |
-    /// | `"zvksg"`       | Zvksg       | 6.8                |
-    /// | `"zvksh"`       | Zvksh       | 6.8                |
-    /// | `"zvkt"`        | Zvkt        | 6.8                |
-    ///
-    /// [^ima]: Or enabled when the IMA base behavior is enabled on the Linux
-    /// kernel version 6.4 or later (for bases, the only matching one -- either
-    /// `"rv32i"` or `"rv64i"` -- is enabled).
-    ///
-    /// [^ima-cntr]: If the IMA base behavior is enabled on the Linux kernel
-    /// version 6.4 or later, the Zicntr extension should be also present.
-    /// However, it seems that Linux does not guarantee its existence.
-    ///
-    /// [^cntr]: Even if this extension is available, it does not necessarily
-    /// mean all performance counters are accessible.
-    /// For instance, the Linux kernel version 6.6 or later prohibits accesses
-    /// to performance counters other than `time` (wall-clock) by default.
-    /// Also beware that, even if performance counters like `cycle` and
-    /// `instret` are accessible, their value can be unreliable (e.g. returning
-    /// the constant value) under certain circumstances.
-    ///
-    /// [^dep]: Or enabled as a dependency of another extension (a superset)
-    /// even if runtime detection of this feature itself is not supported (as
-    /// long as the runtime detection of the superset is supported).
+    /// The supported ratified RISC-V instruction sets are as follows:
     ///
-    /// [^zkr]: Linux does not report existence of this extension even if
-    /// supported by the hardware mainly because the `seed` CSR is normally
-    /// inaccessible from the user mode.
-    /// For the Zk extension features except this CSR, check existence of both
-    /// `"zkn"` and `"zkt"` features instead.
+    /// * RV32E: `"rv32e"`
+    /// * RV32I: `"rv32i"`
+    /// * RV64I: `"rv64i"`
+    /// * A: `"a"`
+    ///   * Zaamo: `"zaamo"`
+    ///   * Zalrsc: `"zalrsc"`
+    /// * B: `"b"`
+    ///   * Zba: `"zba"`
+    ///   * Zbb: `"zbb"`
+    ///   * Zbs: `"zbs"`
+    /// * C: `"c"`
+    ///   * Zca: `"zca"`
+    ///   * Zcd: `"zcd"` (if D is enabled)
+    ///   * Zcf: `"zcf"` (if F is enabled on RV32)
+    /// * D: `"d"`
+    /// * F: `"f"`
+    /// * M: `"m"`
+    /// * Q: `"q"`
+    /// * V: `"v"`
+    ///   * Zve32x: `"zve32x"`
+    ///   * Zve32f: `"zve32f"`
+    ///   * Zve64x: `"zve64x"`
+    ///   * Zve64f: `"zve64f"`
+    ///   * Zve64d: `"zve64d"`
+    /// * Zicboz: `"zicboz"`
+    /// * Zicntr: `"zicntr"`
+    /// * Zicond: `"zicond"`
+    /// * Zicsr: `"zicsr"`
+    /// * Zifencei: `"zifencei"`
+    /// * Zihintntl: `"zihintntl"`
+    /// * Zihintpause: `"zihintpause"`
+    /// * Zihpm: `"zihpm"`
+    /// * Zimop: `"zimop"`
+    /// * Zacas: `"zacas"`
+    /// * Zawrs: `"zawrs"`
+    /// * Zfa: `"zfa"`
+    /// * Zfh: `"zfh"`
+    ///   * Zfhmin: `"zfhmin"`
+    /// * Zfinx: `"zfinx"`
+    /// * Zdinx: `"zdinx"`
+    /// * Zhinx: `"zhinx"`
+    ///   * Zhinxmin: `"zhinxmin"`
+    /// * Zcb: `"zcb"`
+    /// * Zcmop: `"zcmop"`
+    /// * Zbc: `"zbc"`
+    /// * Zbkb: `"zbkb"`
+    /// * Zbkc: `"zbkc"`
+    /// * Zbkx: `"zbkx"`
+    /// * Zk: `"zk"`
+    /// * Zkn: `"zkn"`
+    ///   * Zknd: `"zknd"`
+    ///   * Zkne: `"zkne"`
+    ///   * Zknh: `"zknh"`
+    /// * Zkr: `"zkr"`
+    /// * Zks: `"zks"`
+    ///   * Zksed: `"zksed"`
+    ///   * Zksh: `"zksh"`
+    /// * Zkt: `"zkt"`
+    /// * Zvbb: `"zvbb"`
+    /// * Zvbc: `"zvbc"`
+    /// * Zvfh: `"zvfh"`
+    ///   * Zvfhmin: `"zvfhmin"`
+    /// * Zvkb: `"zvkb"`
+    /// * Zvkg: `"zvkg"`
+    /// * Zvkn: `"zvkn"`
+    ///   * Zvkned: `"zvkned"`
+    ///   * Zvknha: `"zvknha"`
+    ///   * Zvknhb: `"zvknhb"`
+    /// * Zvknc: `"zvknc"`
+    /// * Zvkng: `"zvkng"`
+    /// * Zvks: `"zvks"`
+    ///   * Zvksed: `"zvksed"`
+    ///   * Zvksh: `"zvksh"`
+    /// * Zvksc: `"zvksc"`
+    /// * Zvksg: `"zvksg"`
+    /// * Zvkt: `"zvkt"`
+    /// * Ztso: `"ztso"`
     ///
     /// There's also bases and extensions marked as standard instruction set,
     /// but they are in frozen or draft state. These instruction sets are also
@@ -168,9 +122,7 @@ features! {
     /// corresponding unaligned memory access is reasonably fast.
     ///
     /// * `"unaligned-scalar-mem"`
-    ///   * Runtime detection requires Linux kernel version 6.4 or later.
     /// * `"unaligned-vector-mem"`
-    ///   * Runtime detection requires Linux kernel version 6.13 or later.
     #[stable(feature = "riscv_ratified", since = "1.78.0")]
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32i: "rv32i";
-- 
cgit 1.4.1-3-g733a5


From 4d665d1a017b3bc5bc8a91e4c86e4c8b07ff7987 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 18 Apr 2025 12:03:16 +0530
Subject: Require `fma` and `f16c` for `avx512f` in `std_detect`

---
 library/stdarch/crates/std_detect/src/detect/os/x86.rs | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
index e48d04ad004..8565c2f85e2 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@@ -106,9 +106,11 @@ pub(crate) fn detect_features() -> cache::Initializer {
     {
         // borrows value till the end of this scope:
         let mut enable = |r, rb, f| {
-            if bit::test(r as usize, rb) {
+            let present = bit::test(r as usize, rb);
+            if present {
                 value.set(f as u32);
             }
+            present
         };
 
         enable(proc_info_ecx, 0, Feature::sse3);
@@ -120,7 +122,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
         enable(proc_info_ecx, 22, Feature::movbe);
         enable(proc_info_ecx, 23, Feature::popcnt);
         enable(proc_info_ecx, 25, Feature::aes);
-        enable(proc_info_ecx, 29, Feature::f16c);
+        let f16c = enable(proc_info_ecx, 29, Feature::f16c);
         enable(proc_info_ecx, 30, Feature::rdrand);
         enable(extended_features_ebx, 18, Feature::rdseed);
         enable(extended_features_ebx, 19, Feature::adx);
@@ -216,7 +218,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
                     }
 
                     // FMA (uses 256-bit wide registers):
-                    enable(proc_info_ecx, 12, Feature::fma);
+                    let fma = enable(proc_info_ecx, 12, Feature::fma);
 
                     // And AVX/AVX2:
                     enable(proc_info_ecx, 28, Feature::avx);
@@ -235,7 +237,11 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
                     // For AVX-512 the OS also needs to support saving/restoring
                     // the extended state, only then we enable AVX-512 support:
-                    if os_avx512_support {
+                    // Also, Rust makes `avx512f` imply `fma` and `f16c`, because
+                    // otherwise the assembler is broken. But Intel doesn't guarantee
+                    // that `fma` and `f16c` are available with `avx512f`, so we
+                    // need to check for them separately.
+                    if os_avx512_support && f16c && fma {
                         enable(extended_features_ebx, 16, Feature::avx512f);
                         enable(extended_features_ebx, 17, Feature::avx512dq);
                         enable(extended_features_ebx, 21, Feature::avx512ifma);
-- 
cgit 1.4.1-3-g733a5


From ef92b59242a98cc5f4e5ecaa873f02521eabd150 Mon Sep 17 00:00:00 2001
From: James Barford-Evans <james.barford-evans@arm.com>
Date: Tue, 22 Apr 2025 08:17:25 +0100
Subject: fix - aarch64_be tests

---
 .../ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile        |  5 ++++-
 library/stdarch/ci/run-docker.sh                             |  1 +
 library/stdarch/ci/run.sh                                    |  2 +-
 .../stdarch/crates/core_arch/src/aarch64/neon/generated.rs   | 12 ++++++------
 .../core_arch/src/arm_shared/neon/table_lookup_tests.rs      |  2 ++
 library/stdarch/crates/simd-test-macro/src/lib.rs            |  2 +-
 .../stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs |  1 +
 .../stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs  |  2 ++
 .../crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml        |  2 +-
 9 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
index d7be70843d1..16fd6d5744b 100644
--- a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
@@ -26,5 +26,8 @@ ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"
 
 ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc"
 ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64_be -cpu max -L ${AARCH64_BE_LIBC}"
-ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/bin/aarch64-none-linux-gnu-objdump"
+ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-objdump"
 ENV STDARCH_TEST_SKIP_FEATURE=tme
+# The table instructions, while correct, generate some rev64 instructions which
+# increases the number of instructions generated
+ENV STDARCH_ASSERT_INSTR_LIMIT=32
diff --git a/library/stdarch/ci/run-docker.sh b/library/stdarch/ci/run-docker.sh
index af83a32ecae..d7aa50a8c96 100755
--- a/library/stdarch/ci/run-docker.sh
+++ b/library/stdarch/ci/run-docker.sh
@@ -36,6 +36,7 @@ run() {
       --env NOSTD \
       --env NORUN \
       --env RUSTFLAGS \
+      --env CARGO_UNSTABLE_BUILD_STD \
       --volume "${HOME}/.cargo":/cargo \
       --volume "$(rustc --print sysroot)":/rust:ro \
       --volume "$(pwd)":/checkout:ro \
diff --git a/library/stdarch/ci/run.sh b/library/stdarch/ci/run.sh
index f582602a8c9..522b2661769 100755
--- a/library/stdarch/ci/run.sh
+++ b/library/stdarch/ci/run.sh
@@ -187,7 +187,7 @@ case "${TARGET}" in
             --cppcompiler "${TEST_CXX_COMPILER}" \
             --skip "${TEST_SKIP_INTRINSICS}" \
             --target "${TARGET}" \
-            --linker "${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER}" \
+            --linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \
             --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}"
         ;;
      *)
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 56e522a316b..32dfb0f5fa5 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -24470,7 +24470,7 @@ pub fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
     let x: int8x8_t = vrsubhn_s16(b, c);
@@ -24480,7 +24480,7 @@ pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
     let x: int16x4_t = vrsubhn_s32(b, c);
@@ -24490,7 +24490,7 @@ pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
     let x: int32x2_t = vrsubhn_s64(b, c);
@@ -24500,7 +24500,7 @@ pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
     let x: uint8x8_t = vrsubhn_u16(b, c);
@@ -24510,7 +24510,7 @@ pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
     let x: uint16x4_t = vrsubhn_u32(b, c);
@@ -24520,7 +24520,7 @@ pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
     let x: uint32x2_t = vrsubhn_u64(b, c);
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
index 83062068950..9403855f00e 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
@@ -19,6 +19,7 @@ macro_rules! test_vtbl {
      - table[$table_t:ident]: [$($table_v:expr),*] |
      $(- ctrl[$ctrl_t:ident]: [$($ctrl_v:expr),*] => [$($exp_v:expr),*])|*
     ) => {
+        #[cfg(target_endian = "little")]
         #[simd_test(enable = "neon")]
         unsafe fn $test_name() {
             // create table as array, and transmute it to
@@ -168,6 +169,7 @@ macro_rules! test_vtbx {
      - ext[$ext_t:ident]: [$($ext_v:expr),*] |
      $(- ctrl[$ctrl_t:ident]: [$($ctrl_v:expr),*] => [$($exp_v:expr),*])|*
     ) => {
+        #[cfg(target_endian = "little")]
         #[simd_test(enable = "neon")]
         unsafe fn $test_name() {
             // create table as array, and transmute it to
diff --git a/library/stdarch/crates/simd-test-macro/src/lib.rs b/library/stdarch/crates/simd-test-macro/src/lib.rs
index 9397c0452e7..18e4747d94d 100644
--- a/library/stdarch/crates/simd-test-macro/src/lib.rs
+++ b/library/stdarch/crates/simd-test-macro/src/lib.rs
@@ -58,7 +58,7 @@ pub fn simd_test(
     {
         "i686" | "x86_64" | "i586" => "is_x86_feature_detected",
         "arm" | "armv7" => "is_arm_feature_detected",
-        "aarch64" | "arm64ec" => "is_aarch64_feature_detected",
+        "aarch64" | "arm64ec" | "aarch64_be" => "is_aarch64_feature_detected",
         maybe_riscv if maybe_riscv.starts_with("riscv") => "is_riscv_feature_detected",
         "powerpc" | "powerpcle" => "is_powerpc_feature_detected",
         "powerpc64" | "powerpc64le" => "is_powerpc64_feature_detected",
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
index 9caa785564a..22a9cefff7b 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs
@@ -399,6 +399,7 @@ impl AtHwcap {
     }
 }
 
+#[cfg(target_endian = "little")]
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
index 4293df20da1..c30379ff065 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@@ -290,6 +290,7 @@ mod tests {
                 assert_eq!(v.hwcap2, 0);
             }
         } else if #[cfg(target_arch = "aarch64")] {
+            #[cfg(target_endian = "little")]
             #[test]
             fn linux_artificial_aarch64() {
                 let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-artificial-aarch64.auxv");
@@ -298,6 +299,7 @@ mod tests {
                 assert_eq!(v.hwcap, 0x0123456789abcdef);
                 assert_eq!(v.hwcap2, 0x02468ace13579bdf);
             }
+            #[cfg(target_endian = "little")]
             #[test]
             fn linux_no_hwcap2_aarch64() {
                 let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv");
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 6d6d1998474..2dd7fa15db5 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -8906,7 +8906,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[3]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn2]]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn]]}]]
       - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
     safety: safe
     types:
-- 
cgit 1.4.1-3-g733a5


From 63235a6ba5af96360160358dde68120b88870ae0 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui@loongson.cn>
Date: Mon, 6 Jan 2025 15:49:57 +0800
Subject: Partially stabilize LoongArch target features

---
 library/stdarch/crates/core_arch/src/lib.rs              |  3 +--
 .../crates/std_detect/src/detect/arch/loongarch.rs       | 16 ++++++++--------
 library/stdarch/crates/std_detect/src/detect/arch/mod.rs |  2 +-
 3 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index b8f4c2625ed..6c68a072118 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -72,8 +72,7 @@
     feature(
         stdarch_arm_feature_detection,
         stdarch_powerpc_feature_detection,
-        stdarch_s390x_feature_detection,
-        stdarch_loongarch_feature_detection
+        stdarch_s390x_feature_detection
     )
 )]
 
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
index 5405ff74cf6..e9d68f6a9bf 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
@@ -21,18 +21,18 @@ features! {
     /// * `"lbt"`
     /// * `"lvz"`
     /// * `"ual"`
-    #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")]
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] f: "f";
+    #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")]
+    @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] f: "f";
     /// F
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] d: "d";
+    @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] d: "d";
     /// D
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] frecipe: "frecipe";
+    @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] frecipe: "frecipe";
     /// Frecipe
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] div32: "div32";
     /// Div32
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lsx: "lsx";
+    @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lsx: "lsx";
     /// LSX
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lasx: "lasx";
+    @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lasx: "lasx";
     /// LASX
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lam_bh: "lam-bh";
     /// LAM-BH
@@ -42,9 +42,9 @@ features! {
     /// LD-SEQ-SA
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] scq: "scq";
     /// SCQ
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lbt: "lbt";
+    @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lbt: "lbt";
     /// LBT
-    @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lvz: "lvz";
+    @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lvz: "lvz";
     /// LVZ
     @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] ual: "ual";
     /// UAL
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
index ff00c202dea..d5a13acc028 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
@@ -50,7 +50,7 @@ cfg_if! {
         #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")]
         pub use mips64::*;
     } else if #[cfg(target_arch = "loongarch64")] {
-        #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")]
+        #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")]
         pub use loongarch::*;
     } else if #[cfg(target_arch = "s390x")] {
         #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
-- 
cgit 1.4.1-3-g733a5


From 98c4ba97836e4ef9864a218cec06251a856f2ea4 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Wed, 26 Mar 2025 00:56:23 +0530
Subject: Stabilize runtime detection of VEX variants of avx512

---
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 8658c46987d..1f8f79f8d6e 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -203,15 +203,15 @@ features! {
     /// AVX-512 P2INTERSECT
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512fp16: "avx512fp16";
     /// AVX-512 FP16 (FLOAT16 instructions)
-    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "44839")] avxifma: "avxifma";
+    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.88.0")] avxifma: "avxifma";
     /// AVX-IFMA (Integer Fused Multiply Add)
-    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "44839")] avxneconvert: "avxneconvert";
+    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.88.0")] avxneconvert: "avxneconvert";
     /// AVX-NE-CONVERT (Exceptionless Convert)
-    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "44839")] avxvnni: "avxvnni";
+    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.88.0")] avxvnni: "avxvnni";
     /// AVX-VNNI (Vector Neural Network Instructions)
-    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "44839")] avxvnniint16: "avxvnniint16";
+    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.88.0")] avxvnniint16: "avxvnniint16";
     /// AVX-VNNI_INT8 (VNNI with 16-bit Integers)
-    @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "44839")] avxvnniint8: "avxvnniint8";
+    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.88.0")] avxvnniint8: "avxvnniint8";
     /// AVX-VNNI_INT16 (VNNI with 8-bit integers)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tile: "amx-tile";
     /// AMX (Advanced Matrix Extensions) - Tile load/store
-- 
cgit 1.4.1-3-g733a5


From 806848f01eff7b602718455d1d7dd5734e875087 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 17 May 2025 22:32:23 +0530
Subject: Correct rustc version for the stabilization of runtime detection of
 VEX variants of avx512

---
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 1f8f79f8d6e..85d4fa7179e 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -203,15 +203,15 @@ features! {
     /// AVX-512 P2INTERSECT
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512fp16: "avx512fp16";
     /// AVX-512 FP16 (FLOAT16 instructions)
-    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.88.0")] avxifma: "avxifma";
+    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxifma: "avxifma";
     /// AVX-IFMA (Integer Fused Multiply Add)
-    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.88.0")] avxneconvert: "avxneconvert";
+    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxneconvert: "avxneconvert";
     /// AVX-NE-CONVERT (Exceptionless Convert)
-    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.88.0")] avxvnni: "avxvnni";
+    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxvnni: "avxvnni";
     /// AVX-VNNI (Vector Neural Network Instructions)
-    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.88.0")] avxvnniint16: "avxvnniint16";
+    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxvnniint16: "avxvnniint16";
     /// AVX-VNNI_INT8 (VNNI with 16-bit Integers)
-    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.88.0")] avxvnniint8: "avxvnniint8";
+    @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxvnniint8: "avxvnniint8";
     /// AVX-VNNI_INT16 (VNNI with 8-bit integers)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tile: "amx-tile";
     /// AMX (Advanced Matrix Extensions) - Tile load/store
-- 
cgit 1.4.1-3-g733a5


From 08484b5e7a93181c49f4ce8654270dfa47dd6ebb Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Wed, 30 Apr 2025 05:42:42 +0000
Subject: std_detect: RISC-V platform guide documentation (non-table part)

This is a partial revert of a revert, making the
commit e907456b2e10622ccd854a3bba8d02ce170b5dbb come around again
for non-table part.
---
 .../stdarch/crates/std_detect/src/detect/arch/riscv.rs    | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 63790537f85..c85ba04861e 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -22,6 +22,19 @@ features! {
     ///
     /// [ISA manual]: https://riscv.org/specifications/ratified/
     ///
+    /// # Platform-specific/agnostic Behavior and Availability
+    ///
+    /// Runtime detection depends on the platform-specific feature detection
+    /// facility and its availability per feature is
+    /// highly platform/version-specific.
+    ///
+    /// Still, a best-effort attempt is performed to enable subset/dependent
+    /// features if a superset feature is enabled regardless of the platform.
+    /// For instance, if the A extension (`"a"`) is enabled, its subsets (the
+    /// Zalrsc and Zaamo extensions; `"zalrsc"` and `"zaamo"`) are also enabled.
+    /// Likewise, if the F extension (`"f"`) is enabled, one of its dependencies
+    /// (the Zicsr extension `"zicsr"`) is also enabled.
+    ///
     /// # Unprivileged Specification
     ///
     /// The supported ratified RISC-V instruction sets are as follows:
@@ -122,7 +135,9 @@ features! {
     /// corresponding unaligned memory access is reasonably fast.
     ///
     /// * `"unaligned-scalar-mem"`
+    ///   * Runtime detection requires Linux kernel version 6.4 or later.
     /// * `"unaligned-vector-mem"`
+    ///   * Runtime detection requires Linux kernel version 6.13 or later.
     #[stable(feature = "riscv_ratified", since = "1.78.0")]
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32i: "rv32i";
-- 
cgit 1.4.1-3-g733a5


From 87b28d488576f6f2092f3e91bf107d5ca03fbf4d Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Wed, 14 May 2025 03:45:08 +0000
Subject: Check cfg on features that stage0 compiler support

Since the bootstrap compiler of Rust is bumped to the commit
5dadfd5c417f0b66816cb7ca662859e2c8751fb3 (version 1.88.0-beta.3 2025-05-11),
some features should be safe to enable cfg checks.

RISC-V Features:

*   "zicsr"
*   "zicntr"
*   "zihpm"
*   "zifencei"
*   "zihintntl"
*   "zihintpause"
*   "zimop"
*   "zicboz"
*   "zicond"
*   "ztso"
*   "zfa"
*   "zca"
*   "zcb"
*   "zcmop"
*   "b"

x86 Features:

*   "amx-avx512"
*   "amx-fp8"
*   "amx-movrs"
*   "amx-tf32"
*   "amx-transpose"
---
 .../stdarch/crates/std_detect/src/detect/arch/riscv.rs    | 15 ---------------
 library/stdarch/crates/std_detect/src/detect/arch/x86.rs  |  5 -----
 2 files changed, 20 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index c85ba04861e..1eaae9a9c2f 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -159,32 +159,23 @@ features! {
     /// Has reasonably performant unaligned vector
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicsr: "zicsr";
-    without cfg check: true;
     /// "Zicsr" Extension for Control and Status Register (CSR) Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicntr: "zicntr";
-    without cfg check: true;
     /// "Zicntr" Extension for Base Counters and Timers
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihpm: "zihpm";
-    without cfg check: true;
     /// "Zihpm" Extension for Hardware Performance Counters
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zifencei: "zifencei";
-    without cfg check: true;
     /// "Zifencei" Extension for Instruction-Fetch Fence
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintntl: "zihintntl";
-    without cfg check: true;
     /// "Zihintntl" Extension for Non-Temporal Locality Hints
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintpause: "zihintpause";
-    without cfg check: true;
     /// "Zihintpause" Extension for Pause Hint
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zimop: "zimop";
-    without cfg check: true;
     /// "Zimop" Extension for May-Be-Operations
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicboz: "zicboz";
-    without cfg check: true;
     /// "Zicboz" Extension for Cache-Block Zero Instruction
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicond: "zicond";
-    without cfg check: true;
     /// "Zicond" Extension for Integer Conditional Operations
 
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] m: "m";
@@ -204,7 +195,6 @@ features! {
     without cfg check: true;
     /// "Zam" Extension for Misaligned Atomics
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] ztso: "ztso";
-    without cfg check: true;
     /// "Ztso" Extension for Total Store Ordering
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] f: "f";
@@ -219,7 +209,6 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfhmin: "zfhmin";
     /// "Zfhmin" Extension for Minimal Half-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfa: "zfa";
-    without cfg check: true;
     /// "Zfa" Extension for Additional Floating-Point Instructions
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfinx: "zfinx";
@@ -234,7 +223,6 @@ features! {
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] c: "c";
     /// "C" Extension for Compressed Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zca: "zca";
-    without cfg check: true;
     /// "Zca" Compressed Instructions excluding Floating-Point Loads/Stores
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcf: "zcf";
     without cfg check: true;
@@ -243,14 +231,11 @@ features! {
     without cfg check: true;
     /// "Zcd" Compressed Instructions for Double-Precision Floating-Point Loads/Stores
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcb: "zcb";
-    without cfg check: true;
     /// "Zcb" Simple Code-size Saving Compressed Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcmop: "zcmop";
-    without cfg check: true;
     /// "Zcmop" Extension for Compressed May-Be-Operations
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] b: "b";
-    without cfg check: true;
     /// "B" Extension for Bit Manipulation
     @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zba: "zba";
     /// "Zba" Extension for Address Generation
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 85d4fa7179e..3669678aa87 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -224,19 +224,14 @@ features! {
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_complex: "amx-complex";
     /// AMX-COMPLEX (Complex number Operations)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_avx512: "amx-avx512";
-    without cfg check: true;
     /// AMX-AVX512 (AVX512 operations extended to matrices)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp8: "amx-fp8";
-    without cfg check: true;
     /// AMX-FP8 (Float8 Operations)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_movrs: "amx-movrs";
-    without cfg check: true;
     /// AMX-MOVRS (Matrix MOVERS operations)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tf32: "amx-tf32";
-    without cfg check: true;
     /// AMX-TF32 (TensorFloat32 Operations)
     @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose";
-    without cfg check: true;
     /// AMX-TRANSPOSE (Matrix Transpose Operations)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c";
     /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
-- 
cgit 1.4.1-3-g733a5


From 5984f7c61d038ee8df5368e22b1c9d9ef854f49b Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Thu, 22 May 2025 12:36:55 +0530
Subject: Add back `std_detect_env_override`

---
 library/stdarch/ci/run-docker.sh                   |  1 +
 library/stdarch/crates/std_detect/Cargo.toml       |  1 +
 .../stdarch/crates/std_detect/src/detect/cache.rs  | 61 +++++++++++++++++++++-
 .../stdarch/crates/std_detect/src/detect/macros.rs |  8 +++
 4 files changed, 69 insertions(+), 2 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/ci/run-docker.sh b/library/stdarch/ci/run-docker.sh
index d7aa50a8c96..657353004dc 100755
--- a/library/stdarch/ci/run-docker.sh
+++ b/library/stdarch/ci/run-docker.sh
@@ -37,6 +37,7 @@ run() {
       --env NORUN \
       --env RUSTFLAGS \
       --env CARGO_UNSTABLE_BUILD_STD \
+      --env RUST_STD_DETECT_UNSTABLE \
       --volume "${HOME}/.cargo":/cargo \
       --volume "$(rustc --print sysroot)":/rust:ro \
       --volume "$(pwd)":/checkout:ro \
diff --git a/library/stdarch/crates/std_detect/Cargo.toml b/library/stdarch/crates/std_detect/Cargo.toml
index b6741100a72..368423065a2 100644
--- a/library/stdarch/crates/std_detect/Cargo.toml
+++ b/library/stdarch/crates/std_detect/Cargo.toml
@@ -35,6 +35,7 @@ libc = { version = "0.2.0", optional = true, default-features = false }
 default = [ "std_detect_dlsym_getauxval", "std_detect_file_io" ]
 std_detect_file_io = [ "libc" ]
 std_detect_dlsym_getauxval = [ "libc" ]
+std_detect_env_override = [ "libc" ]
 rustc-dep-of-std = [
     "core",
     "compiler_builtins",
diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs
index 67a8b1ea091..83bcedea612 100644
--- a/library/stdarch/crates/std_detect/src/detect/cache.rs
+++ b/library/stdarch/crates/std_detect/src/detect/cache.rs
@@ -121,12 +121,65 @@ impl Cache {
     }
 }
 
+cfg_if::cfg_if! {
+    if #[cfg(feature = "std_detect_env_override")] {
+        #[inline]
+        fn disable_features(disable: &[u8], value: &mut Initializer) {
+            if let Ok(disable) = core::str::from_utf8(disable) {
+                for v in disable.split(" ") {
+                    let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32));
+                }
+            }
+        }
+
+        #[inline]
+        fn initialize(mut value: Initializer) -> Initializer {
+            use core::ffi::CStr;
+            const RUST_STD_DETECT_UNSTABLE: &CStr = c"RUST_STD_DETECT_UNSTABLE";
+            cfg_if::cfg_if! {
+                if #[cfg(windows)] {
+                    use alloc::vec;
+                    #[link(name = "kernel32")]
+                    unsafe extern "system" {
+                        fn GetEnvironmentVariableA(name: *const u8, buffer: *mut u8, size: u32) -> u32;
+                    }
+                    let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::<u8>(), core::ptr::null_mut(), 0) };
+                    if len > 0 {
+                        // +1 to include the null terminator.
+                        let mut env = vec![0; len as usize + 1];
+                        let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::<u8>(), env.as_mut_ptr(), len + 1) };
+                        if len > 0 {
+                            disable_features(&env[..len as usize], &mut value);
+                        }
+                    }
+                } else {
+                    let env = unsafe {
+                        libc::getenv(RUST_STD_DETECT_UNSTABLE.as_ptr())
+                    };
+                    if !env.is_null() {
+                        let len = unsafe { libc::strlen(env) };
+                        let env = unsafe { core::slice::from_raw_parts(env as *const u8, len) };
+                        disable_features(env, &mut value);
+                    }
+                }
+            }
+            do_initialize(value);
+            value
+        }
+    } else {
+        #[inline]
+        fn initialize(value: Initializer) -> Initializer {
+            do_initialize(value);
+            value
+        }
+    }
+}
+
 #[inline]
-fn initialize(value: Initializer) -> Initializer {
+fn do_initialize(value: Initializer) {
     CACHE[0].initialize((value.0) as usize & Cache::MASK);
     CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK);
     CACHE[2].initialize((value.0 >> (2 * Cache::CAPACITY)) as usize & Cache::MASK);
-    value
 }
 
 // We only have to detect features once, and it's fairly costly, so hint to LLVM
@@ -151,6 +204,10 @@ fn detect_and_initialize() -> Initializer {
 ///
 /// It uses the `Feature` variant to index into this variable as a bitset. If
 /// the bit is set, the feature is enabled, and otherwise it is disabled.
+///
+/// If the feature `std_detect_env_override` is enabled looks for the env
+/// variable `RUST_STD_DETECT_UNSTABLE` and uses its content to disable
+/// Features that would had been otherwise detected.
 #[inline]
 pub(crate) fn test(bit: u32) -> bool {
     let (relative_bit, idx) = if bit < Cache::CAPACITY {
diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs
index 64711c189a5..a2994fb7daa 100644
--- a/library/stdarch/crates/std_detect/src/detect/macros.rs
+++ b/library/stdarch/crates/std_detect/src/detect/macros.rs
@@ -168,6 +168,14 @@ macro_rules! features {
                     Feature::_last => unreachable!(),
                 }
             }
+
+            #[cfg(feature = "std_detect_env_override")]
+            pub(crate) fn from_str(s: &str) -> Result<Feature, ()> {
+                match s {
+                    $($feature_lit => Ok(Feature::$feature),)*
+                    _ => Err(())
+                }
+            }
         }
 
         /// Each function performs run-time feature detection for a single
-- 
cgit 1.4.1-3-g733a5


From 7a074c14a1bccecfde2e43a3209a97efcb96c077 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Mon, 26 May 2025 03:38:35 +0000
Subject: RISC-V: Linux: Imply Zicntr from the IMA base behavior

As the author confirmed as in:
<https://lists.infradead.org/pipermail/linux-riscv/2025-May/070844.html>,
runtime detection of the Zicntr extension (as in the Linux kernel 6.15)
is currently (and technically) redundant on the current base IMA behavior
(although can be meaningful if new base behavior is added).

This commit implies the Zicntr extension from the base IMA behavior.
---
 library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index 045f03b275b..b836724dca3 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -197,18 +197,16 @@ pub(crate) fn detect_features() -> cache::Initializer {
         // Query whether "I" base and extensions "M" and "A" (as in the ISA
         // manual version 2.2) are enabled.  "I" base at that time corresponds
         // to "I", "Zicsr", "Zicntr" and "Zifencei" (as in the ISA manual version
-        // 20240411) and we chose to imply "Zicsr" and "Zifencei" (not "Zicntr")
-        // because there will be a separate RISCV_HWPROBE_EXT_ZICNTR constant to
-        // determine existence of the "Zicntr" extension in Linux 6.15 (as of rc1).
-        // "fence.i" ("Zifencei") is conditionally valid on the Linux userland
-        // (when CMODX is enabled).
-        // This is a requirement of `RISCV_HWPROBE_KEY_IMA_EXT_0`-based tests.
+        // 20240411).
+        // This is a current requirement of
+        // `RISCV_HWPROBE_KEY_IMA_EXT_0`-based tests.
         let has_ima = (out[0].key != -1) && (out[0].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA != 0);
         if !has_ima {
             break 'hwprobe;
         }
         has_i |= has_ima;
         enable_feature(Feature::zicsr, has_ima);
+        enable_feature(Feature::zicntr, has_ima);
         enable_feature(Feature::zifencei, has_ima);
         enable_feature(Feature::m, has_ima);
         enable_feature(Feature::a, has_ima);
-- 
cgit 1.4.1-3-g733a5


From ba641e58a2250509e84edbb2e0b6ef7e94461563 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Thu, 8 May 2025 00:58:08 +0530
Subject: Stabilize keylocker intrinsics and runtime detection

---
 library/stdarch/crates/core_arch/src/x86/kl.rs     | 22 +++++++++++-----------
 library/stdarch/crates/core_arch/src/x86/mod.rs    |  2 +-
 .../crates/std_detect/src/detect/arch/x86.rs       |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/x86/kl.rs b/library/stdarch/crates/core_arch/src/x86/kl.rs
index 05d06753092..eb9eb83f411 100644
--- a/library/stdarch/crates/core_arch/src/x86/kl.rs
+++ b/library/stdarch/crates/core_arch/src/x86/kl.rs
@@ -127,7 +127,7 @@ unsafe extern "unadjusted" {
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadiwkey)
 #[inline]
 #[target_feature(enable = "kl")]
-#[unstable(feature = "keylocker_x86", issue = "134813")]
+#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
 #[cfg_attr(test, assert_instr(loadiwkey))]
 pub unsafe fn _mm_loadiwkey(
     control: u32,
@@ -153,7 +153,7 @@ pub unsafe fn _mm_loadiwkey(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_encodekey128_u32)
 #[inline]
 #[target_feature(enable = "kl")]
-#[unstable(feature = "keylocker_x86", issue = "134813")]
+#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
 #[cfg_attr(test, assert_instr(encodekey128))]
 pub unsafe fn _mm_encodekey128_u32(key_params: u32, key: __m128i, handle: *mut u8) -> u32 {
     let EncodeKey128Output(control, key0, key1, key2, _, _, _) = encodekey128(key_params, key);
@@ -176,7 +176,7 @@ pub unsafe fn _mm_encodekey128_u32(key_params: u32, key: __m128i, handle: *mut u
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_encodekey256_u32)
 #[inline]
 #[target_feature(enable = "kl")]
-#[unstable(feature = "keylocker_x86", issue = "134813")]
+#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
 #[cfg_attr(test, assert_instr(encodekey256))]
 pub unsafe fn _mm_encodekey256_u32(
     key_params: u32,
@@ -198,7 +198,7 @@ pub unsafe fn _mm_encodekey256_u32(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc128kl_u8)
 #[inline]
 #[target_feature(enable = "kl")]
-#[unstable(feature = "keylocker_x86", issue = "134813")]
+#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
 #[cfg_attr(test, assert_instr(aesenc128kl))]
 pub unsafe fn _mm_aesenc128kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 {
     let AesOutput(status, result) = aesenc128kl(input, handle);
@@ -214,7 +214,7 @@ pub unsafe fn _mm_aesenc128kl_u8(output: *mut __m128i, input: __m128i, handle: *
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec128kl_u8)
 #[inline]
 #[target_feature(enable = "kl")]
-#[unstable(feature = "keylocker_x86", issue = "134813")]
+#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
 #[cfg_attr(test, assert_instr(aesdec128kl))]
 pub unsafe fn _mm_aesdec128kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 {
     let AesOutput(status, result) = aesdec128kl(input, handle);
@@ -230,7 +230,7 @@ pub unsafe fn _mm_aesdec128kl_u8(output: *mut __m128i, input: __m128i, handle: *
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc256kl_u8)
 #[inline]
 #[target_feature(enable = "kl")]
-#[unstable(feature = "keylocker_x86", issue = "134813")]
+#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
 #[cfg_attr(test, assert_instr(aesenc256kl))]
 pub unsafe fn _mm_aesenc256kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 {
     let AesOutput(status, result) = aesenc256kl(input, handle);
@@ -246,7 +246,7 @@ pub unsafe fn _mm_aesenc256kl_u8(output: *mut __m128i, input: __m128i, handle: *
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec256kl_u8)
 #[inline]
 #[target_feature(enable = "kl")]
-#[unstable(feature = "keylocker_x86", issue = "134813")]
+#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
 #[cfg_attr(test, assert_instr(aesdec256kl))]
 pub unsafe fn _mm_aesdec256kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 {
     let AesOutput(status, result) = aesdec256kl(input, handle);
@@ -262,7 +262,7 @@ pub unsafe fn _mm_aesdec256kl_u8(output: *mut __m128i, input: __m128i, handle: *
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesencwide128kl_u8)
 #[inline]
 #[target_feature(enable = "widekl")]
-#[unstable(feature = "keylocker_x86", issue = "134813")]
+#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
 #[cfg_attr(test, assert_instr(aesencwide128kl))]
 pub unsafe fn _mm_aesencwide128kl_u8(
     output: *mut __m128i,
@@ -285,7 +285,7 @@ pub unsafe fn _mm_aesencwide128kl_u8(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdecwide128kl_u8)
 #[inline]
 #[target_feature(enable = "widekl")]
-#[unstable(feature = "keylocker_x86", issue = "134813")]
+#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
 #[cfg_attr(test, assert_instr(aesdecwide128kl))]
 pub unsafe fn _mm_aesdecwide128kl_u8(
     output: *mut __m128i,
@@ -308,7 +308,7 @@ pub unsafe fn _mm_aesdecwide128kl_u8(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesencwide256kl_u8)
 #[inline]
 #[target_feature(enable = "widekl")]
-#[unstable(feature = "keylocker_x86", issue = "134813")]
+#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
 #[cfg_attr(test, assert_instr(aesencwide256kl))]
 pub unsafe fn _mm_aesencwide256kl_u8(
     output: *mut __m128i,
@@ -331,7 +331,7 @@ pub unsafe fn _mm_aesencwide256kl_u8(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdecwide256kl_u8)
 #[inline]
 #[target_feature(enable = "widekl")]
-#[unstable(feature = "keylocker_x86", issue = "134813")]
+#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
 #[cfg_attr(test, assert_instr(aesdecwide256kl))]
 pub unsafe fn _mm_aesdecwide256kl_u8(
     output: *mut __m128i,
diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs
index d59b120fa56..721dcd5f0e9 100644
--- a/library/stdarch/crates/core_arch/src/x86/mod.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mod.rs
@@ -772,5 +772,5 @@ mod avx512fp16;
 pub use self::avx512fp16::*;
 
 mod kl;
-#[unstable(feature = "keylocker_x86", issue = "134813")]
+#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
 pub use self::kl::*;
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 3669678aa87..94a60a52065 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -259,9 +259,9 @@ features! {
     /// XSAVEC (Save Processor Extended States Compacted)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] cmpxchg16b: "cmpxchg16b";
     /// CMPXCH16B (16-byte compare-and-swap instruction)
-    @FEATURE: #[unstable(feature = "keylocker_x86", issue = "134813")] kl: "kl";
+    @FEATURE: #[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] kl: "kl";
     /// Intel Key Locker
-    @FEATURE: #[unstable(feature = "keylocker_x86", issue = "134813")] widekl: "widekl";
+    @FEATURE: #[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] widekl: "widekl";
     /// Intel Key Locker Wide
     @FEATURE: #[stable(feature = "simd_x86_adx", since = "1.33.0")] adx: "adx";
     /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-- 
cgit 1.4.1-3-g733a5


From 1ca45878417c605782a05276e2c96742831f4879 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Thu, 8 May 2025 01:00:38 +0530
Subject: Stabilize `sha512`, `sm3` and `sm4` intrinsics and runtime detection

---
 library/stdarch/crates/core_arch/src/x86/sha.rs      | 20 ++++++++++----------
 .../stdarch/crates/std_detect/src/detect/arch/x86.rs |  6 +++---
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/core_arch/src/x86/sha.rs b/library/stdarch/crates/core_arch/src/x86/sha.rs
index 8ff64a3d2d5..da568c449a6 100644
--- a/library/stdarch/crates/core_arch/src/x86/sha.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sha.rs
@@ -146,7 +146,7 @@ pub fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "sha512,avx")]
 #[cfg_attr(test, assert_instr(vsha512msg1))]
-#[unstable(feature = "sha512_sm_x86", issue = "126624")]
+#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
 pub fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i {
     unsafe { transmute(vsha512msg1(a.as_i64x4(), b.as_i64x2())) }
 }
@@ -159,7 +159,7 @@ pub fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i {
 #[inline]
 #[target_feature(enable = "sha512,avx")]
 #[cfg_attr(test, assert_instr(vsha512msg2))]
-#[unstable(feature = "sha512_sm_x86", issue = "126624")]
+#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
 pub fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(vsha512msg2(a.as_i64x4(), b.as_i64x4())) }
 }
@@ -175,7 +175,7 @@ pub fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[inline]
 #[target_feature(enable = "sha512,avx")]
 #[cfg_attr(test, assert_instr(vsha512rnds2))]
-#[unstable(feature = "sha512_sm_x86", issue = "126624")]
+#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
 pub fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, k: __m128i) -> __m256i {
     unsafe { transmute(vsha512rnds2(a.as_i64x4(), b.as_i64x4(), k.as_i64x2())) }
 }
@@ -188,7 +188,7 @@ pub fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, k: __m128i) -> __m256i {
 #[inline]
 #[target_feature(enable = "sm3,avx")]
 #[cfg_attr(test, assert_instr(vsm3msg1))]
-#[unstable(feature = "sha512_sm_x86", issue = "126624")]
+#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
 pub fn _mm_sm3msg1_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe { transmute(vsm3msg1(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
 }
@@ -201,7 +201,7 @@ pub fn _mm_sm3msg1_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "sm3,avx")]
 #[cfg_attr(test, assert_instr(vsm3msg2))]
-#[unstable(feature = "sha512_sm_x86", issue = "126624")]
+#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
 pub fn _mm_sm3msg2_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe { transmute(vsm3msg2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
 }
@@ -219,7 +219,7 @@ pub fn _mm_sm3msg2_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
 #[target_feature(enable = "sm3,avx")]
 #[cfg_attr(test, assert_instr(vsm3rnds2, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "sha512_sm_x86", issue = "126624")]
+#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
 pub fn _mm_sm3rnds2_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     static_assert!(
         IMM8 == (IMM8 & 0x3e),
@@ -235,7 +235,7 @@ pub fn _mm_sm3rnds2_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -
 #[inline]
 #[target_feature(enable = "sm4,avx")]
 #[cfg_attr(test, assert_instr(vsm4key4))]
-#[unstable(feature = "sha512_sm_x86", issue = "126624")]
+#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
 pub fn _mm_sm4key4_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(vsm4key4128(a.as_i32x4(), b.as_i32x4())) }
 }
@@ -247,7 +247,7 @@ pub fn _mm_sm4key4_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "sm4,avx")]
 #[cfg_attr(test, assert_instr(vsm4key4))]
-#[unstable(feature = "sha512_sm_x86", issue = "126624")]
+#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
 pub fn _mm256_sm4key4_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(vsm4key4256(a.as_i32x8(), b.as_i32x8())) }
 }
@@ -259,7 +259,7 @@ pub fn _mm256_sm4key4_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[inline]
 #[target_feature(enable = "sm4,avx")]
 #[cfg_attr(test, assert_instr(vsm4rnds4))]
-#[unstable(feature = "sha512_sm_x86", issue = "126624")]
+#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
 pub fn _mm_sm4rnds4_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(vsm4rnds4128(a.as_i32x4(), b.as_i32x4())) }
 }
@@ -271,7 +271,7 @@ pub fn _mm_sm4rnds4_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "sm4,avx")]
 #[cfg_attr(test, assert_instr(vsm4rnds4))]
-#[unstable(feature = "sha512_sm_x86", issue = "126624")]
+#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
 pub fn _mm256_sm4rnds4_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(vsm4rnds4256(a.as_i32x8(), b.as_i32x8())) }
 }
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index 94a60a52065..f23cfc33417 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -157,11 +157,11 @@ features! {
     /// AVX (Advanced Vector Extensions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx2: "avx2";
     /// AVX2 (Advanced Vector Extensions 2)
-    @FEATURE: #[unstable(feature = "sha512_sm_x86", issue = "126624")] sha512: "sha512";
+    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sha512: "sha512";
     /// SHA512
-    @FEATURE: #[unstable(feature = "sha512_sm_x86", issue = "126624")] sm3: "sm3";
+    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sm3: "sm3";
     /// SM3
-    @FEATURE: #[unstable(feature = "sha512_sm_x86", issue = "126624")] sm4: "sm4";
+    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sm4: "sm4";
     /// SM4
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512f: "avx512f" ;
     /// AVX-512 F (Foundation)
-- 
cgit 1.4.1-3-g733a5


From bc4333545a01502aed7c059f75789fd2d01757b3 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_rust@irq.a4lg.com>
Date: Sat, 31 May 2025 12:27:22 +0000
Subject: RISC-V: Linux 6.15 `riscv_hwprobe` support

This commit adds support for `riscv_hwprobe` on the Linux kernel 6.15.
It adds feature detection of 8 extensions (4 of them are new in this).

Existing RISC-V Extensions:

1.  "Zicntr"
2.  "Zihpm"
3.  "Zalrsc"
4.  "Zaamo"

New RISC-V Extensions:

5.  "Zicbom"
6.  "Zfbfmin"
7.  "Zvfbfmin"
8.  "Zvfbfwma"
---
 .../crates/std_detect/src/detect/arch/riscv.rs     | 12 +++++++++++
 .../crates/std_detect/src/detect/os/linux/riscv.rs | 23 +++++++++++++++++++---
 .../crates/std_detect/src/detect/os/riscv.rs       |  3 +++
 .../crates/std_detect/tests/cpu-detection.rs       |  4 ++++
 4 files changed, 39 insertions(+), 3 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
index 1eaae9a9c2f..b86190d7bbf 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs
@@ -63,6 +63,7 @@ features! {
     ///   * Zve64x: `"zve64x"`
     ///   * Zve64f: `"zve64f"`
     ///   * Zve64d: `"zve64d"`
+    /// * Zicbom: `"zicbom"`
     /// * Zicboz: `"zicboz"`
     /// * Zicntr: `"zicntr"`
     /// * Zicond: `"zicond"`
@@ -75,6 +76,7 @@ features! {
     /// * Zacas: `"zacas"`
     /// * Zawrs: `"zawrs"`
     /// * Zfa: `"zfa"`
+    /// * Zfbfmin: `"zfbfmin"`
     /// * Zfh: `"zfh"`
     ///   * Zfhmin: `"zfhmin"`
     /// * Zfinx: `"zfinx"`
@@ -99,6 +101,8 @@ features! {
     /// * Zkt: `"zkt"`
     /// * Zvbb: `"zvbb"`
     /// * Zvbc: `"zvbc"`
+    /// * Zvfbfmin: `"zvfbfmin"`
+    /// * Zvfbfwma: `"zvfbfwma"`
     /// * Zvfh: `"zvfh"`
     ///   * Zvfhmin: `"zvfhmin"`
     /// * Zvkb: `"zvkb"`
@@ -173,6 +177,8 @@ features! {
     /// "Zihintpause" Extension for Pause Hint
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zimop: "zimop";
     /// "Zimop" Extension for May-Be-Operations
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicbom: "zicbom";
+    /// "Zicbom" Extension for Cache-Block Management Instructions
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicboz: "zicboz";
     /// "Zicboz" Extension for Cache-Block Zero Instruction
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicond: "zicond";
@@ -210,6 +216,8 @@ features! {
     /// "Zfhmin" Extension for Minimal Half-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfa: "zfa";
     /// "Zfa" Extension for Additional Floating-Point Instructions
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfbfmin: "zfbfmin";
+    /// "Zfbfmin" Extension for Scalar BF16 Converts
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfinx: "zfinx";
     /// "Zfinx" Extension for Single-Precision Floating-Point in Integer Registers
@@ -289,6 +297,10 @@ features! {
     /// "Zvfh" Vector Extension for Half-Precision Floating-Point
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfhmin: "zvfhmin";
     /// "Zvfhmin" Vector Extension for Minimal Half-Precision Floating-Point
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfbfmin: "zvfbfmin";
+    /// "Zvfbfmin" Vector Extension for BF16 Converts
+    @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfbfwma: "zvfbfwma";
+    /// "Zvfbfwma" Vector Extension for BF16 Widening Multiply-Add
 
     @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvbb: "zvbb";
     /// "Zvbb" Extension for Vector Basic Bit-Manipulation
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index b836724dca3..5506ff31fc7 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -10,13 +10,13 @@ use super::super::riscv::imply_features;
 use super::auxvec;
 use crate::detect::{Feature, bit, cache};
 
-// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/prctl.h?h=v6.14>
+// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/prctl.h?h=v6.15>
 // for runtime status query constants.
 const PR_RISCV_V_GET_CONTROL: libc::c_int = 70;
 const PR_RISCV_V_VSTATE_CTRL_ON: libc::c_int = 2;
 const PR_RISCV_V_VSTATE_CTRL_CUR_MASK: libc::c_int = 3;
 
-// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwprobe.h?h=v6.14>
+// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwprobe.h?h=v6.15>
 // for riscv_hwprobe struct and hardware probing constants.
 
 #[repr(C)]
@@ -83,6 +83,14 @@ const RISCV_HWPROBE_EXT_ZCMOP: u64 = 1 << 47;
 const RISCV_HWPROBE_EXT_ZAWRS: u64 = 1 << 48;
 // Excluded because it only reports the existence of `prctl`-based pointer masking control.
 // const RISCV_HWPROBE_EXT_SUPM: u64 = 1 << 49;
+const RISCV_HWPROBE_EXT_ZICNTR: u64 = 1 << 50;
+const RISCV_HWPROBE_EXT_ZIHPM: u64 = 1 << 51;
+const RISCV_HWPROBE_EXT_ZFBFMIN: u64 = 1 << 52;
+const RISCV_HWPROBE_EXT_ZVFBFMIN: u64 = 1 << 53;
+const RISCV_HWPROBE_EXT_ZVFBFWMA: u64 = 1 << 54;
+const RISCV_HWPROBE_EXT_ZICBOM: u64 = 1 << 55;
+const RISCV_HWPROBE_EXT_ZAAMO: u64 = 1 << 56;
+const RISCV_HWPROBE_EXT_ZALRSC: u64 = 1 << 57;
 
 const RISCV_HWPROBE_KEY_CPUPERF_0: i64 = 5;
 const RISCV_HWPROBE_MISALIGNED_FAST: u64 = 3;
@@ -133,7 +141,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
     // Use auxiliary vector to enable single-letter ISA extensions.
     // The values are part of the platform-specific [asm/hwcap.h][hwcap]
     //
-    // [hwcap]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwcap.h?h=v6.14
+    // [hwcap]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwcap.h?h=v6.15
     let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform
     let mut has_i = bit::test(auxv.hwcap, (b'i' - b'a').into());
     #[allow(clippy::eq_op)]
@@ -221,12 +229,18 @@ pub(crate) fn detect_features() -> cache::Initializer {
         enable_feature(Feature::d, test(RISCV_HWPROBE_IMA_FD)); // F is implied.
         enable_feature(Feature::c, test(RISCV_HWPROBE_IMA_C));
 
+        enable_feature(Feature::zicntr, test(RISCV_HWPROBE_EXT_ZICNTR));
+        enable_feature(Feature::zihpm, test(RISCV_HWPROBE_EXT_ZIHPM));
+
         enable_feature(Feature::zihintntl, test(RISCV_HWPROBE_EXT_ZIHINTNTL));
         enable_feature(Feature::zihintpause, test(RISCV_HWPROBE_EXT_ZIHINTPAUSE));
         enable_feature(Feature::zimop, test(RISCV_HWPROBE_EXT_ZIMOP));
+        enable_feature(Feature::zicbom, test(RISCV_HWPROBE_EXT_ZICBOM));
         enable_feature(Feature::zicboz, test(RISCV_HWPROBE_EXT_ZICBOZ));
         enable_feature(Feature::zicond, test(RISCV_HWPROBE_EXT_ZICOND));
 
+        enable_feature(Feature::zalrsc, test(RISCV_HWPROBE_EXT_ZALRSC));
+        enable_feature(Feature::zaamo, test(RISCV_HWPROBE_EXT_ZAAMO));
         enable_feature(Feature::zawrs, test(RISCV_HWPROBE_EXT_ZAWRS));
         enable_feature(Feature::zacas, test(RISCV_HWPROBE_EXT_ZACAS));
         enable_feature(Feature::ztso, test(RISCV_HWPROBE_EXT_ZTSO));
@@ -255,6 +269,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
         enable_feature(Feature::zfh, test(RISCV_HWPROBE_EXT_ZFH));
         enable_feature(Feature::zfhmin, test(RISCV_HWPROBE_EXT_ZFHMIN));
         enable_feature(Feature::zfa, test(RISCV_HWPROBE_EXT_ZFA));
+        enable_feature(Feature::zfbfmin, test(RISCV_HWPROBE_EXT_ZFBFMIN));
 
         // Use prctl (if any) to determine whether the vector extension
         // is enabled on the current thread (assuming the entire process
@@ -290,6 +305,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
 
             enable_feature(Feature::zvfh, test(RISCV_HWPROBE_EXT_ZVFH));
             enable_feature(Feature::zvfhmin, test(RISCV_HWPROBE_EXT_ZVFHMIN));
+            enable_feature(Feature::zvfbfmin, test(RISCV_HWPROBE_EXT_ZVFBFMIN));
+            enable_feature(Feature::zvfbfwma, test(RISCV_HWPROBE_EXT_ZVFBFWMA));
         }
         is_v_set = true;
     };
diff --git a/library/stdarch/crates/std_detect/src/detect/os/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/riscv.rs
index 97496df8938..4c59ede8029 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/riscv.rs
@@ -105,6 +105,8 @@ pub(crate) fn imply_features(mut value: cache::Initializer) -> cache::Initialize
         imply!(zvfh => zvfhmin); // functional
         imply!(zvfh => zve32f & zfhmin);
         imply!(zvfhmin => zve32f);
+        imply!(zvfbfwma => zvfbfmin & zfbfmin);
+        imply!(zvfbfmin => zve32f);
 
         imply!(v => zve64d);
         imply!(zve64d => zve64f & d);
@@ -115,6 +117,7 @@ pub(crate) fn imply_features(mut value: cache::Initializer) -> cache::Initialize
         imply!(zfh => zfhmin);
         imply!(q => d);
         imply!(d | zfhmin | zfa => f);
+        imply!(zfbfmin => f); // and some of (not all) "Zfh" instructions.
 
         // Relatively complex implication rules from the "C" extension.
         imply!(c => zca);
diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
index bbc289a6a0a..7976aedc758 100644
--- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs
+++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs
@@ -251,6 +251,7 @@ fn riscv_linux() {
     println!("zihintntl: {}", is_riscv_feature_detected!("zihintntl"));
     println!("zihintpause: {}", is_riscv_feature_detected!("zihintpause"));
     println!("zimop: {}", is_riscv_feature_detected!("zimop"));
+    println!("zicbom: {}", is_riscv_feature_detected!("zicbom"));
     println!("zicboz: {}", is_riscv_feature_detected!("zicboz"));
     println!("zicond: {}", is_riscv_feature_detected!("zicond"));
     println!("m: {}", is_riscv_feature_detected!("m"));
@@ -267,6 +268,7 @@ fn riscv_linux() {
     println!("zfh: {}", is_riscv_feature_detected!("zfh"));
     println!("zfhmin: {}", is_riscv_feature_detected!("zfhmin"));
     println!("zfa: {}", is_riscv_feature_detected!("zfa"));
+    println!("zfbfmin: {}", is_riscv_feature_detected!("zfbfmin"));
     println!("zfinx: {}", is_riscv_feature_detected!("zfinx"));
     println!("zdinx: {}", is_riscv_feature_detected!("zdinx"));
     println!("zhinx: {}", is_riscv_feature_detected!("zhinx"));
@@ -303,6 +305,8 @@ fn riscv_linux() {
     println!("zve64d: {}", is_riscv_feature_detected!("zve64d"));
     println!("zvfh: {}", is_riscv_feature_detected!("zvfh"));
     println!("zvfhmin: {}", is_riscv_feature_detected!("zvfhmin"));
+    println!("zvfbfmin: {}", is_riscv_feature_detected!("zvfbfmin"));
+    println!("zvfbfwma: {}", is_riscv_feature_detected!("zvfbfwma"));
     println!("zvbb: {}", is_riscv_feature_detected!("zvbb"));
     println!("zvbc: {}", is_riscv_feature_detected!("zvbc"));
     println!("zvkb: {}", is_riscv_feature_detected!("zvkb"));
-- 
cgit 1.4.1-3-g733a5


From 852d853a033e57480a33a055b5e5cf37ec0ecba2 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sun, 8 Jun 2025 13:22:14 +0200
Subject: add s390x z17 target features

---
 .../stdarch/crates/std_detect/src/detect/arch/s390x.rs    | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs b/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs
index 812607c6bcd..f05081da9af 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs
@@ -7,6 +7,9 @@ features! {
     @MACRO_ATTRS:
     /// Checks if `s390x` feature is enabled.
     #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] concurrent_functions: "concurrent-functions";
+    /// s390x concurrent-functions facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
     @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] deflate_conversion: "deflate-conversion";
     /// s390x deflate-conversion facility
     #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
@@ -19,6 +22,12 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] high_word: "high-word";
     /// s390x high-word facility
     #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension12: "message-security-assist-extension12";
+    /// s390x message-security-assist-extension12 facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_4: "miscellaneous-extensions-4";
+    /// s390x miscellaneous-extensions-4 facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
     @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] nnp_assist: "nnp-assist";
     /// s390x nnp-assist facility
     #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
@@ -34,6 +43,9 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_2: "vector-enhancements-2";
     /// s390x vector-enhancements-2 facility
     #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_3: "vector-enhancements-3";
+    /// s390x vector-enhancements-3 facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
     @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal: "vector-packed-decimal";
     /// s390x vector-packed-decimal facility
     #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
@@ -42,4 +54,7 @@ features! {
     #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
     @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement_2: "vector-packed-decimal-enhancement-2";
     /// s390x vector-packed-decimal-enhancement-2 facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement_3: "vector-packed-decimal-enhancement-3";
+    /// s390x vector-packed-decimal-enhancement-3 facility
 }
-- 
cgit 1.4.1-3-g733a5


From 7c2d2a30501b2115d79c4400f3b55b88314ddb1a Mon Sep 17 00:00:00 2001
From: Laine Taffin Altman <alexanderaltman@me.com>
Date: Mon, 9 Jun 2025 15:43:20 -0700
Subject: Darwin AArch64 detection update
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Synchronizes the lists of detectable features with macOS 15.5 “Sequoia” as of June 9, 2025.
---
 library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
index 6699a66b1ad..44d921689e5 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs
@@ -48,6 +48,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
     let aes = _sysctlbyname(c"hw.optional.arm.FEAT_AES");
     let bf16 = _sysctlbyname(c"hw.optional.arm.FEAT_BF16");
     let bti = _sysctlbyname(c"hw.optional.arm.FEAT_BTI");
+    let cssc = _sysctlbyname(c"hw.optional.arm.FEAT_CSSC");
     let dit = _sysctlbyname(c"hw.optional.arm.FEAT_DIT");
     let dpb = _sysctlbyname(c"hw.optional.arm.FEAT_DPB");
     let dpb2 = _sysctlbyname(c"hw.optional.arm.FEAT_DPB2");
@@ -59,6 +60,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
     let frintts = _sysctlbyname(c"hw.optional.arm.FEAT_FRINTTS");
     let flagm = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM");
     let flagm2 = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM2");
+    let hbc = _sysctlbyname(c"hw.optional.arm.FEAT_HBC");
     let i8mm = _sysctlbyname(c"hw.optional.arm.FEAT_I8MM");
     let jsconv = _sysctlbyname(c"hw.optional.arm.FEAT_JSCVT");
     let rcpc = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC");
@@ -87,10 +89,14 @@ pub(crate) fn detect_features() -> cache::Initializer {
     let afp = _sysctlbyname(c"hw.optional.arm.FEAT_AFP");
     let csv2 = _sysctlbyname(c"hw.optional.arm.FEAT_CSV2");
     let csv3 = _sysctlbyname(c"hw.optional.arm.FEAT_CSV3");
+    let ebf16 = _sysctlbyname(c"hw.optional.arm.FEAT_EBF16");
     let fpac = _sysctlbyname(c"hw.optional.arm.FEAT_FPAC");
+    let fpaccombine = _sysctlbyname(c"hw.optional.arm.FEAT_FPACCOMBINE");
+    let pacimp = _sysctlbyname(c"hw.optional.arm.FEAT_PACIMP");
     let pauth2 = _sysctlbyname(c"hw.optional.arm.FEAT_PAuth2");
     let rpres = _sysctlbyname(c"hw.optional.arm.FEAT_RPRES");
     let specres = _sysctlbyname(c"hw.optional.arm.FEAT_SPECRES");
+    let specres2 = _sysctlbyname(c"hw.optional.arm.FEAT_SPECRES2");
      */
 
     // The following "features" are reported by `sysctl` but are mandatory parts
@@ -111,6 +117,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
     enable_feature(Feature::bf16, bf16);
     enable_feature(Feature::bti, bti);
     enable_feature(Feature::crc, crc);
+    enable_feature(Feature::cssc, cssc);
     enable_feature(Feature::dit, dit);
     enable_feature(Feature::dotprod, dotprod);
     enable_feature(Feature::dpb, dpb);
@@ -123,6 +130,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
     enable_feature(Feature::fp, fp);
     enable_feature(Feature::fp16, fp16);
     enable_feature(Feature::frintts, frintts);
+    enable_feature(Feature::hbc, hbc);
     enable_feature(Feature::i8mm, i8mm);
     enable_feature(Feature::jsconv, jsconv);
     enable_feature(Feature::lse, lse);
-- 
cgit 1.4.1-3-g733a5


From 359f986dddd987b7ab1c7c5b141eba71f4a2ffad Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 12 Jun 2025 17:53:42 +0200
Subject: s390x: add feature detection for the z17 target features

---
 .../crates/std_detect/src/detect/arch/s390x.rs     |  21 ++++
 .../crates/std_detect/src/detect/os/linux/s390x.rs | 132 ++++++++++++---------
 2 files changed, 97 insertions(+), 56 deletions(-)

(limited to 'library/stdarch/crates/std_detect/src')

diff --git a/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs b/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs
index f05081da9af..4c20d011680 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs
@@ -22,9 +22,30 @@ features! {
     @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] high_word: "high-word";
     /// s390x high-word facility
     #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension3: "message-security-assist-extension3";
+    /// s390x message-security-assist-extension3 facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension4: "message-security-assist-extension4";
+    /// s390x message-security-assist-extension4 facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension5: "message-security-assist-extension5";
+    /// s390x message-security-assist-extension5 facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension8: "message-security-assist-extension8";
+    /// s390x message-security-assist-extension8 facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension9: "message-security-assist-extension9";
+    /// s390x message-security-assist-extension9 facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
     @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension12: "message-security-assist-extension12";
     /// s390x message-security-assist-extension12 facility
     #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_2: "miscellaneous-extensions-2";
+    /// s390x miscellaneous-extensions-2 facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
+    @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_3: "miscellaneous-extensions-3";
+    /// s390x miscellaneous-extensions-3 facility
+    #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
     @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_4: "miscellaneous-extensions-4";
     /// s390x miscellaneous-extensions-4 facility
     #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")]
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs
index 5cc35fd3025..9b53f526d61 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs
@@ -5,19 +5,11 @@ use crate::detect::{Feature, bit, cache};
 
 /// Try to read the features from the auxiliary vector
 pub(crate) fn detect_features() -> cache::Initializer {
-    if let Ok(auxv) = auxvec::auxv() {
-        let hwcap: AtHwcap = auxv.into();
-        return hwcap.cache();
-    }
-
-    cache::Initializer::default()
+    let opt_hwcap: Option<AtHwcap> = auxvec::auxv().ok().map(Into::into);
+    let facilities = ExtendedFacilityList::new();
+    cache(opt_hwcap, facilities)
 }
 
-/// These values are part of the platform-specific [asm/elf.h][kernel], and are a selection of the
-/// fields found in the [Facility Indications].
-///
-/// [Facility Indications]: https://www.ibm.com/support/pages/sites/default/files/2021-05/SA22-7871-10.pdf#page=63
-/// [kernel]: https://github.com/torvalds/linux/blob/b62cef9a5c673f1b8083159f5dc03c1c5daced2f/arch/s390/include/asm/elf.h#L129
 #[derive(Debug, Default, PartialEq)]
 struct AtHwcap {
     esan3: bool,
@@ -76,57 +68,85 @@ impl From<auxvec::AuxVec> for AtHwcap {
     }
 }
 
-impl AtHwcap {
-    /// Initializes the cache from the feature bits.
-    fn cache(self) -> cache::Initializer {
-        let mut value = cache::Initializer::default();
-        {
-            let mut enable_feature = |f, enable| {
-                if enable {
-                    value.set(f as u32);
-                }
-            };
-
-            // vector and related
-
-            // bit 129 of the extended facility list
-            enable_feature(Feature::vector, self.vxrs);
-
-            // bit 135 of the extended facility list
-            enable_feature(Feature::vector_enhancements_1, self.vxrs_ext);
-
-            // bit 148 of the extended facility list
-            enable_feature(Feature::vector_enhancements_2, self.vxrs_ext2);
-
-            // bit 134 of the extended facility list
-            enable_feature(Feature::vector_packed_decimal, self.vxrs_bcd);
-
-            // bit 152 of the extended facility list
-            enable_feature(Feature::vector_packed_decimal_enhancement, self.vxrs_pde);
-
-            // bit 192 of the extended facility list
-            enable_feature(Feature::vector_packed_decimal_enhancement_2, self.vxrs_pde2);
-
-            // bit 165 of the extended facility list
-            enable_feature(Feature::nnp_assist, self.nnpa);
+struct ExtendedFacilityList([u64; 4]);
+
+impl ExtendedFacilityList {
+    fn new() -> Self {
+        let mut result: [u64; 4] = [0; 4];
+        // SAFETY: rust/llvm only support s390x version with the `stfle` instruction.
+        unsafe {
+            core::arch::asm!(
+                // equivalently ".insn s, 0xb2b00000, 0({1})",
+                "stfle 0({})",
+                in(reg_addr) result.as_mut_ptr() ,
+                inout("r0") result.len() as u64 - 1 => _,
+                options(nostack)
+            );
+        }
+        Self(result)
+    }
 
-            // others
+    const fn get_bit(&self, n: usize) -> bool {
+        // NOTE: bits are numbered from the left.
+        self.0[n / 64] & (1 << (63 - (n % 64))) != 0
+    }
+}
 
-            // bit 45 of the extended facility list
-            enable_feature(Feature::high_word, self.high_gprs);
+/// Initializes the cache from the feature bits.
+///
+/// These values are part of the platform-specific [asm/elf.h][kernel], and are a selection of the
+/// fields found in the [Facility Indications].
+///
+/// [Facility Indications]: https://www.ibm.com/support/pages/sites/default/files/2021-05/SA22-7871-10.pdf#page=63
+/// [kernel]: https://github.com/torvalds/linux/blob/b62cef9a5c673f1b8083159f5dc03c1c5daced2f/arch/s390/include/asm/elf.h#L129
+fn cache(hwcap: Option<AtHwcap>, facilities: ExtendedFacilityList) -> cache::Initializer {
+    let mut value = cache::Initializer::default();
+
+    {
+        let mut enable_if_set = |bit_index, f| {
+            if facilities.get_bit(bit_index) {
+                value.set(f as u32);
+            }
+        };
+
+        // We use HWCAP for `vector` because it requires both hardware and kernel support.
+        if let Some(AtHwcap { vxrs: true, .. }) = hwcap {
+            // vector and related
 
-            // bit 73 of the extended facility list
-            enable_feature(Feature::transactional_execution, self.te);
+            enable_if_set(129, Feature::vector);
 
-            // bit 133 of the extended facility list
-            enable_feature(Feature::guarded_storage, self.gs);
+            enable_if_set(135, Feature::vector_enhancements_1);
+            enable_if_set(148, Feature::vector_enhancements_2);
+            enable_if_set(198, Feature::vector_enhancements_3);
 
-            // bit 150 of the extended facility list
-            enable_feature(Feature::enhanced_sort, self.sort);
+            enable_if_set(134, Feature::vector_packed_decimal);
+            enable_if_set(152, Feature::vector_packed_decimal_enhancement);
+            enable_if_set(192, Feature::vector_packed_decimal_enhancement_2);
+            enable_if_set(199, Feature::vector_packed_decimal_enhancement_3);
 
-            // bit 151 of the extended facility list
-            enable_feature(Feature::deflate_conversion, self.dflt);
+            enable_if_set(165, Feature::nnp_assist);
         }
-        value
+
+        // others
+
+        enable_if_set(76, Feature::message_security_assist_extension3);
+        enable_if_set(77, Feature::message_security_assist_extension4);
+        enable_if_set(57, Feature::message_security_assist_extension5);
+        enable_if_set(146, Feature::message_security_assist_extension8);
+        enable_if_set(155, Feature::message_security_assist_extension9);
+        enable_if_set(86, Feature::message_security_assist_extension12);
+
+        enable_if_set(58, Feature::miscellaneous_extensions_2);
+        enable_if_set(61, Feature::miscellaneous_extensions_3);
+        enable_if_set(84, Feature::miscellaneous_extensions_4);
+
+        enable_if_set(45, Feature::high_word);
+        enable_if_set(73, Feature::transactional_execution);
+        enable_if_set(133, Feature::guarded_storage);
+        enable_if_set(150, Feature::enhanced_sort);
+        enable_if_set(151, Feature::deflate_conversion);
+        enable_if_set(201, Feature::concurrent_functions);
     }
+
+    value
 }
-- 
cgit 1.4.1-3-g733a5